In [None]:
# Clone required repositories
!git clone https://github.com/awsm-research/DeepLineDP.git
!cd ./DeepLineDP/ && mkdir -p ./datasets/original
!cd ./DeepLineDP/datasets/original && git clone https://github.com/awsm-research/line-level-defect-prediction.git
!cp -r ./DeepLineDP/datasets/original/line-level-defect-prediction/Dataset/File-level/ ./DeepLineDP/datasets/original
!cp -r ./DeepLineDP/datasets/original/line-level-defect-prediction/Dataset/Line-level/ ./DeepLineDP/datasets/original

# Setup R environment
# Note: installing R packages takes about 20 minutes 
!pip uninstall rpy2 -y
!pip install rpy2==3.0.0
%load_ext rpy2.ipython

In [None]:
%%R
install.packages('tidyverse', repos = "http://cran.us.r-project.org")
install.packages('gridExtra', repos = "http://cran.us.r-project.org")
install.packages('ModelMetrics', repos = "http://cran.us.r-project.org")
install.packages('caret', repos = "http://cran.us.r-project.org")
install.packages('reshape2', repos = "http://cran.us.r-project.org")
install.packages('pROC', repos = "http://cran.us.r-project.org")
install.packages('effsize', repos = "http://cran.us.r-project.org")
install.packages('ScottKnottESD', repos = "http://cran.us.r-project.org")

In [None]:
# Setup conda environment
!wget https://repo.anaconda.com/miniconda/Miniconda3-py39_23.1.0-1-Linux-x86_64.sh
!chmod +x Miniconda3-py39_23.1.0-1-Linux-x86_64.sh
!bash ./Miniconda3-py39_23.1.0-1-Linux-x86_64.sh -b -f -p /usr/local

import sys
sys.path.append('/usr/local/lib/python3.9/site-packages/')

!cd ./DeepLineDP/ && conda env create -f requirements.yml
!conda init bash
!bash --login

# In new shell: 
# TO DO: write script to run all below commands in conda environment
# conda deactivate
# conda activate DeepLineDP_env
# pip install torch
# pip install imblearn
# chmod a+x ./train_models_and_generate_predictions_for_file-level_baselines.sh
# ./train_models_and_generate_predictions_for_file-level_baselines.sh

In [None]:
# Preprocess data
# Note: preprocessing data takes about 10 minutes
!pip install pandas==1.3.3
!pip install torch
!cd ./DeepLineDP/script/ && python preprocess_data.py && python export_data_for_line_level_baseline.py

In [None]:
# Train Word2Vec model
# Note: training Word2Vec model takes about 5 minutes
!pip install gensim==3.8.3
!pip install more-itertools==8.10.0
!cd ./DeepLineDP/script/ && python train_word2vec.py activemq
!cd ./DeepLineDP/script/ && python train_word2vec.py camel
!cd ./DeepLineDP/script/ && python train_word2vec.py derby
!cd ./DeepLineDP/script/ && python train_word2vec.py groovy
!cd ./DeepLineDP/script/ && python train_word2vec.py hbase
!cd ./DeepLineDP/script/ && python train_word2vec.py hive
!cd ./DeepLineDP/script/ && python train_word2vec.py jruby
!cd ./DeepLineDP/script/ && python train_word2vec.py lucene
!cd ./DeepLineDP/script/ && python train_word2vec.py wicket

In [None]:
# Train DeepLineDP model
# Note: training DeepLineDP model takes about 50 minutes
!pip install scikit-learn==1.0
!cd ./DeepLineDP/script/ && python train_model.py -dataset activemq
!cd ./DeepLineDP/script/ && python train_model.py -dataset camel
!cd ./DeepLineDP/script/ && python train_model.py -dataset derby
!cd ./DeepLineDP/script/ && python train_model.py -dataset groovy
!cd ./DeepLineDP/script/ && python train_model.py -dataset hbase
!cd ./DeepLineDP/script/ && python train_model.py -dataset hive
!cd ./DeepLineDP/script/ && python train_model.py -dataset jruby
!cd ./DeepLineDP/script/ && python train_model.py -dataset lucene
!cd ./DeepLineDP/script/ && python train_model.py -dataset wicket

In [None]:
# Generate predictions
# Note: generating predictions takes about 10 minutes
!cd ./DeepLineDP/script/ && python generate_prediction.py -dataset activemq
!cd ./DeepLineDP/script/ && python generate_prediction.py -dataset camel
!cd ./DeepLineDP/script/ && python generate_prediction.py -dataset derby
!cd ./DeepLineDP/script/ && python generate_prediction.py -dataset groovy
!cd ./DeepLineDP/script/ && python generate_prediction.py -dataset hbase
!cd ./DeepLineDP/script/ && python generate_prediction.py -dataset hive
!cd ./DeepLineDP/script/ && python generate_prediction.py -dataset jruby
!cd ./DeepLineDP/script/ && python generate_prediction.py -dataset lucene
!cd ./DeepLineDP/script/ && python generate_prediction.py -dataset wicket

In [None]:
# Generate cross projects predictions
# Note: generating cross projects predictions takes about 65 minutes
!cd ./DeepLineDP/script/ && python generate_prediction_cross_projects.py -dataset activemq
!cd ./DeepLineDP/script/ && python generate_prediction_cross_projects.py -dataset camel
!cd ./DeepLineDP/script/ && python generate_prediction_cross_projects.py -dataset derby
!cd ./DeepLineDP/script/ && python generate_prediction_cross_projects.py -dataset groovy
!cd ./DeepLineDP/script/ && python generate_prediction_cross_projects.py -dataset hbase
!cd ./DeepLineDP/script/ && python generate_prediction_cross_projects.py -dataset hive
!cd ./DeepLineDP/script/ && python generate_prediction_cross_projects.py -dataset jruby
!cd ./DeepLineDP/script/ && python generate_prediction_cross_projects.py -dataset lucene
!cd ./DeepLineDP/script/ && python generate_prediction_cross_projects.py -dataset wicket

In [None]:
# Train file-level baselines' models
# Note: training file-level baselines' models takes about ... minutes

# Bi-LSTM
!cd ./DeepLineDP/script/file-level-baseline/ && python Bi-LSTM-baseline.py -data activemq -train
!cd ./DeepLineDP/script/file-level-baseline/ && python Bi-LSTM-baseline.py -data camel -train
!cd ./DeepLineDP/script/file-level-baseline/ && python Bi-LSTM-baseline.py -data derby -train
!cd ./DeepLineDP/script/file-level-baseline/ && python Bi-LSTM-baseline.py -data groovy -train
!cd ./DeepLineDP/script/file-level-baseline/ && python Bi-LSTM-baseline.py -data hbase -train
!cd ./DeepLineDP/script/file-level-baseline/ && python Bi-LSTM-baseline.py -data hive -train
!cd ./DeepLineDP/script/file-level-baseline/ && python Bi-LSTM-baseline.py -data jruby -train
!cd ./DeepLineDP/script/file-level-baseline/ && python Bi-LSTM-baseline.py -data lucene -train
!cd ./DeepLineDP/script/file-level-baseline/ && python Bi-LSTM-baseline.py -data wicket -train

# CNN
!cd ./DeepLineDP/script/file-level-baseline/ && python CNN-baseline.py -data activemq -train
!cd ./DeepLineDP/script/file-level-baseline/ && python CNN-baseline.py -data camel -train
!cd ./DeepLineDP/script/file-level-baseline/ && python CNN-baseline.py -data derby -train
!cd ./DeepLineDP/script/file-level-baseline/ && python CNN-baseline.py -data groovy -train
!cd ./DeepLineDP/script/file-level-baseline/ && python CNN-baseline.py -data hbase -train
!cd ./DeepLineDP/script/file-level-baseline/ && python CNN-baseline.py -data hive -train
!cd ./DeepLineDP/script/file-level-baseline/ && python CNN-baseline.py -data jruby -train
!cd ./DeepLineDP/script/file-level-baseline/ && python CNN-baseline.py -data lucene -train
!cd ./DeepLineDP/script/file-level-baseline/ && python CNN-baseline.py -data wicket -train

# DBN
!cd ./DeepLineDP/script/file-level-baseline/ && python DBN-baseline.py -data activemq -train
!cd ./DeepLineDP/script/file-level-baseline/ && python DBN-baseline.py -data camel -train
!cd ./DeepLineDP/script/file-level-baseline/ && python DBN-baseline.py -data derby -train
!cd ./DeepLineDP/script/file-level-baseline/ && python DBN-baseline.py -data groovy -train
!cd ./DeepLineDP/script/file-level-baseline/ && python DBN-baseline.py -data hbase -train
!cd ./DeepLineDP/script/file-level-baseline/ && python DBN-baseline.py -data hive -train
!cd ./DeepLineDP/script/file-level-baseline/ && python DBN-baseline.py -data jruby -train
!cd ./DeepLineDP/script/file-level-baseline/ && python DBN-baseline.py -data lucene -train
!cd ./DeepLineDP/script/file-level-baseline/ && python DBN-baseline.py -data wicket -train

# BoW
!cd ./DeepLineDP/script/file-level-baseline/ && python BoW-baseline.py -data activemq -train
!cd ./DeepLineDP/script/file-level-baseline/ && python BoW-baseline.py -data camel -train
!cd ./DeepLineDP/script/file-level-baseline/ && python BoW-baseline.py -data derby -train
!cd ./DeepLineDP/script/file-level-baseline/ && python BoW-baseline.py -data groovy -train
!cd ./DeepLineDP/script/file-level-baseline/ && python BoW-baseline.py -data hbase -train
!cd ./DeepLineDP/script/file-level-baseline/ && python BoW-baseline.py -data hive -train
!cd ./DeepLineDP/script/file-level-baseline/ && python BoW-baseline.py -data jruby -train
!cd ./DeepLineDP/script/file-level-baseline/ && python BoW-baseline.py -data lucene -train
!cd ./DeepLineDP/script/file-level-baseline/ && python BoW-baseline.py -data wicket -train

In [None]:
# Generate predictions for file-level baselines' models
# Note: generating predictions for file-level baselines' models takes about ... minutes

# Bi-LSTM
!cd ./DeepLineDP/script/file-level-baseline/ && python Bi-LSTM-baseline.py -data activemq -predict -target_epochs 6
!cd ./DeepLineDP/script/file-level-baseline/ && python Bi-LSTM-baseline.py -data camel -predict -target_epochs 6
!cd ./DeepLineDP/script/file-level-baseline/ && python Bi-LSTM-baseline.py -data derby -predict -target_epochs 6
!cd ./DeepLineDP/script/file-level-baseline/ && python Bi-LSTM-baseline.py -data groovy -predict -target_epochs 6
!cd ./DeepLineDP/script/file-level-baseline/ && python Bi-LSTM-baseline.py -data hbase -predict -target_epochs 6
!cd ./DeepLineDP/script/file-level-baseline/ && python Bi-LSTM-baseline.py -data hive -predict -target_epochs 6
!cd ./DeepLineDP/script/file-level-baseline/ && python Bi-LSTM-baseline.py -data jruby -predict -target_epochs 6
!cd ./DeepLineDP/script/file-level-baseline/ && python Bi-LSTM-baseline.py -data lucene -predict -target_epochs 6
!cd ./DeepLineDP/script/file-level-baseline/ && python Bi-LSTM-baseline.py -data wicket -predict -target_epochs 6

# CNN
!cd ./DeepLineDP/script/file-level-baseline/ && python CNN-baseline.py -data activemq -predict -target_epochs 6
!cd ./DeepLineDP/script/file-level-baseline/ && python CNN-baseline.py -data camel -predict -target_epochs 6
!cd ./DeepLineDP/script/file-level-baseline/ && python CNN-baseline.py -data derby -predict -target_epochs 6
!cd ./DeepLineDP/script/file-level-baseline/ && python CNN-baseline.py -data groovy -predict -target_epochs 6
!cd ./DeepLineDP/script/file-level-baseline/ && python CNN-baseline.py -data hbase -predict -target_epochs 6
!cd ./DeepLineDP/script/file-level-baseline/ && python CNN-baseline.py -data hive -predict -target_epochs 6
!cd ./DeepLineDP/script/file-level-baseline/ && python CNN-baseline.py -data jruby -predict -target_epochs 6
!cd ./DeepLineDP/script/file-level-baseline/ && python CNN-baseline.py -data lucene -predict -target_epochs 6
!cd ./DeepLineDP/script/file-level-baseline/ && python CNN-baseline.py -data wicket -predict -target_epochs 6

# DBN
!cd ./DeepLineDP/script/file-level-baseline/ && python DBN-baseline.py -data activemq -predict
!cd ./DeepLineDP/script/file-level-baseline/ && python DBN-baseline.py -data camel -predict
!cd ./DeepLineDP/script/file-level-baseline/ && python DBN-baseline.py -data derby -predict
!cd ./DeepLineDP/script/file-level-baseline/ && python DBN-baseline.py -data groovy -predict
!cd ./DeepLineDP/script/file-level-baseline/ && python DBN-baseline.py -data hbase -predict
!cd ./DeepLineDP/script/file-level-baseline/ && python DBN-baseline.py -data hive -predict
!cd ./DeepLineDP/script/file-level-baseline/ && python DBN-baseline.py -data jruby -predict
!cd ./DeepLineDP/script/file-level-baseline/ && python DBN-baseline.py -data lucene -predict
!cd ./DeepLineDP/script/file-level-baseline/ && python DBN-baseline.py -data wicket -predict

# BoW
!cd ./DeepLineDP/script/file-level-baseline/ && python BoW-baseline.py -data activemq -predict
!cd ./DeepLineDP/script/file-level-baseline/ && python BoW-baseline.py -data camel -predict
!cd ./DeepLineDP/script/file-level-baseline/ && python BoW-baseline.py -data derby -predict
!cd ./DeepLineDP/script/file-level-baseline/ && python BoW-baseline.py -data groovy -predict
!cd ./DeepLineDP/script/file-level-baseline/ && python BoW-baseline.py -data hbase -predict
!cd ./DeepLineDP/script/file-level-baseline/ && python BoW-baseline.py -data hive -predict
!cd ./DeepLineDP/script/file-level-baseline/ && python BoW-baseline.py -data jruby -predict
!cd ./DeepLineDP/script/file-level-baseline/ && python BoW-baseline.py -data lucene -predict
!cd ./DeepLineDP/script/file-level-baseline/ && python BoW-baseline.py -data wicket -predict

In [None]:
# Export all to Google Drive and download
# from google.colab import drive
# drive.mount('/content/drive')
# !zip -r /content/drive/MyDrive/PBRwIO/DeepLineDP.zip ./DeepLineDP/

Mounted at /content/drive


In [None]:
# Import all from Google Drive
# from google.colab import drive
# drive.mount('/content/drive')
# !cp /content/drive/MyDrive/PBRwIO/DeepLineDP_with_predictions_including_cross_projects_without_baselines_with_line-level_baselines.zip .
# !unzip ./DeepLineDP_with_predictions_including_cross_projects_without_baselines_with_line-level_baselines.zip