# Clone required repositories

In [None]:
!git clone https://github.com/pwr-pbr23/M8.git
!cd /content/M8/Reproduction/DeepLineDP/ && mkdir -p ./datasets/original
!cd /content/M8/Reproduction/DeepLineDP/datasets/original && git clone https://github.com/awsm-research/line-level-defect-prediction.git
!cp -r /content/M8/Reproduction/DeepLineDP/datasets/original/line-level-defect-prediction/Dataset/File-level/ /content/M8/Reproduction/DeepLineDP/datasets/original
!cp -r /content/M8/Reproduction/DeepLineDP/datasets/original/line-level-defect-prediction/Dataset/Line-level/ /content/M8/Reproduction/DeepLineDP/datasets/original

# Setup R environment part I

In [None]:
!pip uninstall rpy2 -y
!pip install rpy2==3.5.1
%load_ext rpy2.ipython

# Setup R environment part II

Note: installing R packages takes about 20 minutes

In [None]:
%%R
install.packages('tidyverse', repos = "http://cran.us.r-project.org")
install.packages('gridExtra', repos = "http://cran.us.r-project.org")
install.packages('ModelMetrics', repos = "http://cran.us.r-project.org")
install.packages('caret', repos = "http://cran.us.r-project.org")
install.packages('reshape2', repos = "http://cran.us.r-project.org")
install.packages('pROC', repos = "http://cran.us.r-project.org")
install.packages('effsize', repos = "http://cran.us.r-project.org")
install.packages('ScottKnottESD', repos = "http://cran.us.r-project.org")

# Setup conda environment part I

In [None]:
!pip install -q condacolab
import condacolab
condacolab.install()

!cd /content/M8/Reproduction/DeepLineDP/ && conda env create -f requirements.yml

# Setup conda environment part II

In [None]:
%%bash
source activate DeepLineDP_env

pip install torch
pip install imblearn

# Preprocess data

Note: preprocessing data takes about 10 minutes

In [None]:
%%bash
source activate DeepLineDP_env

cd /content/M8/Reproduction/DeepLineDP/script/ && python preprocess_data.py && python export_data_for_line_level_baseline.py

# Train Word2Vec model
 
Note: training Word2Vec model takes about 5 minutes

In [None]:
%%bash
source activate DeepLineDP_env

cd /content/M8/Reproduction/DeepLineDP/script/ && python train_word2vec.py activemq
cd /content/M8/Reproduction/DeepLineDP/script/ && python train_word2vec.py camel
cd /content/M8/Reproduction/DeepLineDP/script/ && python train_word2vec.py derby
cd /content/M8/Reproduction/DeepLineDP/script/ && python train_word2vec.py groovy
cd /content/M8/Reproduction/DeepLineDP/script/ && python train_word2vec.py hbase
cd /content/M8/Reproduction/DeepLineDP/script/ && python train_word2vec.py hive
cd /content/M8/Reproduction/DeepLineDP/script/ && python train_word2vec.py jruby
cd /content/M8/Reproduction/DeepLineDP/script/ && python train_word2vec.py lucene
cd /content/M8/Reproduction/DeepLineDP/script/ && python train_word2vec.py wicket

# Train DeepLineDP model

Note: training DeepLineDP model takes about 50 minutes

In [None]:
%%bash
source activate DeepLineDP_env

cd /content/M8/Reproduction/DeepLineDP/script/ && python train_model.py -dataset activemq
cd /content/M8/Reproduction/DeepLineDP/script/ && python train_model.py -dataset camel
cd /content/M8/Reproduction/DeepLineDP/script/ && python train_model.py -dataset derby
cd /content/M8/Reproduction/DeepLineDP/script/ && python train_model.py -dataset groovy
cd /content/M8/Reproduction/DeepLineDP/script/ && python train_model.py -dataset hbase
cd /content/M8/Reproduction/DeepLineDP/script/ && python train_model.py -dataset hive
cd /content/M8/Reproduction/DeepLineDP/script/ && python train_model.py -dataset jruby
cd /content/M8/Reproduction/DeepLineDP/script/ && python train_model.py -dataset lucene
cd /content/M8/Reproduction/DeepLineDP/script/ && python train_model.py -dataset wicket

# Generate predictions

Note: generating predictions takes about 10 minutes

In [None]:
%%bash
source activate DeepLineDP_env

cd /content/M8/Reproduction/DeepLineDP/script/ && python generate_prediction.py -dataset activemq
cd /content/M8/Reproduction/DeepLineDP/script/ && python generate_prediction.py -dataset camel
cd /content/M8/Reproduction/DeepLineDP/script/ && python generate_prediction.py -dataset derby
cd /content/M8/Reproduction/DeepLineDP/script/ && python generate_prediction.py -dataset groovy
cd /content/M8/Reproduction/DeepLineDP/script/ && python generate_prediction.py -dataset hbase
cd /content/M8/Reproduction/DeepLineDP/script/ && python generate_prediction.py -dataset hive
cd /content/M8/Reproduction/DeepLineDP/script/ && python generate_prediction.py -dataset jruby
cd /content/M8/Reproduction/DeepLineDP/script/ && python generate_prediction.py -dataset lucene
cd /content/M8/Reproduction/DeepLineDP/script/ && python generate_prediction.py -dataset wicket

# Generate cross projects predictions

Note: generating cross projects predictions takes about 65 minutes

In [None]:
%%bash
source activate DeepLineDP_env

cd /content/M8/Reproduction/DeepLineDP/script/ && python generate_prediction_cross_projects.py -dataset activemq
cd /content/M8/Reproduction/DeepLineDP/script/ && python generate_prediction_cross_projects.py -dataset camel
cd /content/M8/Reproduction/DeepLineDP/script/ && python generate_prediction_cross_projects.py -dataset derby
cd /content/M8/Reproduction/DeepLineDP/script/ && python generate_prediction_cross_projects.py -dataset groovy
cd /content/M8/Reproduction/DeepLineDP/script/ && python generate_prediction_cross_projects.py -dataset hbase
cd /content/M8/Reproduction/DeepLineDP/script/ && python generate_prediction_cross_projects.py -dataset hive
cd /content/M8/Reproduction/DeepLineDP/script/ && python generate_prediction_cross_projects.py -dataset jruby
cd /content/M8/Reproduction/DeepLineDP/script/ && python generate_prediction_cross_projects.py -dataset lucene
cd /content/M8/Reproduction/DeepLineDP/script/ && python generate_prediction_cross_projects.py -dataset wicket

# Train file-level baselines' models

In [None]:
%%bash
source activate DeepLineDP_env

# Bi-LSTM
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python Bi-LSTM-baseline.py -data activemq -train
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python Bi-LSTM-baseline.py -data camel -train
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python Bi-LSTM-baseline_batch_size_16.py -data derby -train
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python Bi-LSTM-baseline_batch_size_16.py -data groovy -train
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python Bi-LSTM-baseline_batch_size_16.py -data hbase -train
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python Bi-LSTM-baseline_batch_size_16.py -data hive -train
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python Bi-LSTM-baseline_batch_size_16.py -data jruby -train
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python Bi-LSTM-baseline.py -data lucene -train
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python Bi-LSTM-baseline.py -data wicket -train

# CNN
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python CNN-baseline.py -data activemq -train
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python CNN-baseline.py -data camel -train
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python CNN-baseline.py -data derby -train
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python CNN-baseline.py -data groovy -train
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python CNN-baseline.py -data hbase -train
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python CNN-baseline.py -data hive -train
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python CNN-baseline.py -data jruby -train
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python CNN-baseline.py -data lucene -train
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python CNN-baseline.py -data wicket -train

# DBN
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python DBN-baseline.py -data activemq -train
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python DBN-baseline.py -data camel -train
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python DBN-baseline.py -data derby -train
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python DBN-baseline.py -data groovy -train
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python DBN-baseline.py -data hbase -train
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python DBN-baseline.py -data hive -train
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python DBN-baseline.py -data jruby -train
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python DBN-baseline.py -data lucene -train
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python DBN-baseline.py -data wicket -train

# BoW
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python BoW-baseline.py -data activemq -train
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python BoW-baseline.py -data camel -train
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python BoW-baseline.py -data derby -train
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python BoW-baseline.py -data groovy -train
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python BoW-baseline.py -data hbase -train
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python BoW-baseline.py -data hive -train
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python BoW-baseline.py -data jruby -train
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python BoW-baseline.py -data lucene -train
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python BoW-baseline.py -data wicket -train

# Generate predictions for file-level baselines' models

In [None]:
%%bash
source activate DeepLineDP_env

# Bi-LSTM
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python Bi-LSTM-baseline.py -data activemq -predict -target_epochs 6
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python Bi-LSTM-baseline.py -data camel -predict -target_epochs 6
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python Bi-LSTM-baseline.py -data derby -predict -target_epochs 6
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python Bi-LSTM-baseline.py -data groovy -predict -target_epochs 6
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python Bi-LSTM-baseline.py -data hbase -predict -target_epochs 6
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python Bi-LSTM-baseline.py -data hive -predict -target_epochs 6
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python Bi-LSTM-baseline.py -data jruby -predict -target_epochs 6
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python Bi-LSTM-baseline.py -data lucene -predict -target_epochs 6
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python Bi-LSTM-baseline.py -data wicket -predict -target_epochs 6

# CNN
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python CNN-baseline.py -data activemq -predict -target_epochs 6
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python CNN-baseline.py -data camel -predict -target_epochs 6
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python CNN-baseline.py -data derby -predict -target_epochs 6
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python CNN-baseline.py -data groovy -predict -target_epochs 6
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python CNN-baseline.py -data hbase -predict -target_epochs 6
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python CNN-baseline.py -data hive -predict -target_epochs 6
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python CNN-baseline.py -data jruby -predict -target_epochs 6
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python CNN-baseline.py -data lucene -predict -target_epochs 6
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python CNN-baseline.py -data wicket -predict -target_epochs 6

# DBN
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python DBN-baseline.py -data activemq -predict
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python DBN-baseline.py -data camel -predict
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python DBN-baseline.py -data derby -predict
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python DBN-baseline.py -data groovy -predict
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python DBN-baseline.py -data hbase -predict
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python DBN-baseline.py -data hive -predict
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python DBN-baseline.py -data jruby -predict
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python DBN-baseline.py -data lucene -predict
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python DBN-baseline.py -data wicket -predict

# BoW
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python BoW-baseline.py -data activemq -predict
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python BoW-baseline.py -data camel -predict
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python BoW-baseline.py -data derby -predict
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python BoW-baseline.py -data groovy -predict
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python BoW-baseline.py -data hbase -predict
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python BoW-baseline.py -data hive -predict
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python BoW-baseline.py -data jruby -predict
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python BoW-baseline.py -data lucene -predict
cd /content/M8/Reproduction/DeepLineDP/script/file-level-baseline/ && python BoW-baseline.py -data wicket -predict

# Generate results for RQ1-RQ4 with R script

In [None]:
%%R
# get_evaluation_result.R content

# Export all to Google Drive
Remember to change file path on Google Drive to suite your disk's directory hierarchy and file name to better describe which research reproduction's results you are zipping and exporting

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

# !zip -r /content/drive/MyDrive/PBRwIO/File-level_baselines_DBN_models_and_predictions.zip ./DeepLineDP/

# Import all from Google Drive
Remember to change file path on Google Drive to suite your disk's directory hierarchy and file name of research reproduction's results you are importing and unzipping

In [None]:
from google.colab import drive
drive.mount('/content/drive')

!cp /content/drive/MyDrive/PBRwIO/All_reproduction_results.zip .
!unzip ./All_reproduction_results.zip