In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from utils import bert_text_processor as btp
from models import bert_cve_classifier as bcvec
from sklearn.utils import class_weight
from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

SEED = 42
np.random.seed(SEED)
tf.set_random_seed(SEED)

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
dataset = pd.read_csv('../../data/GH_complete_labeled_issues_prs - preprocessed.csv', encoding='utf-8', 
                      na_filter=False)
dataset = dataset[dataset.label != 0]
texts = dataset['description'].tolist()
labels = dataset['label'].tolist()

print('Before:', Counter(labels))
labels = [0 if item == 1 else 1 for item in labels]
print('After:', Counter(labels))

Before: Counter({1: 22572, 2: 671})
After: Counter({0: 22572, 1: 671})


In [6]:
train_text, test_text, train_labels, test_labels = train_test_split(texts, labels, 
                                                                    test_size=0.25, random_state=SEED)
len(train_text), len(test_text)

(17432, 5811)

In [7]:
# Initialize session
sess = tf.Session()

# Params for bert model and tokenization
BERT_PATH = "models/model_assets/gokube-phase2/base_bert_tfhub_models/bert_uncased_L12_H768_A12"
MAX_SEQ_LENGTH = 512

In [8]:
# process text data
btp_test = btp.BertTextProcessor(tf_session=sess, 
                                  bert_model_path=BERT_PATH, 
                                  max_seq_length=MAX_SEQ_LENGTH)
btp_test.create_bert_tokenizer()
btp_test.convert_text_to_input_examples(test_text, test_labels)
btp_test.convert_examples_to_features()

Loading Base BERT Model
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


I0811 05:51:18.691654 140712378369856 tf_logging.py:115] Saver not created because there are no variables in the graph to restore
Converting text to examples: 5811it [00:00, 579994.30it/s]
Converting examples to features:   0%|          | 0/5811 [00:00<?, ?it/s]

Loading BERT WordPiece Tokenizer
Creating Input Examples from data
Creating BERT Input Features from Input Examples


Converting examples to features: 100%|██████████| 5811/5811 [01:28<00:00, 65.66it/s] 


In [9]:
bc = bcvec.BERTClassifier(bert_model_path=BERT_PATH, 
                          max_seq_length=MAX_SEQ_LENGTH)
bc.build_model_architecture()

Build BERT Classifier CVE Model Architecture
Loading Base BERT Model
Trainable layers: 199
Non Trainable layers: 5
Constructing Base BERT architecture
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


I0811 05:56:37.218409 140712378369856 tf_logging.py:115] Saver not created because there are no variables in the graph to restore


In [10]:
MODEL1 = '../../../dsarkar/saved_models/bert_vuln_models/bert_cve75iter2_weights-ep:01-trn_loss:0.379-trn_acc:0.904-val_loss:0.229-val_acc:0.918.h5'
bc.load_model_weights(model_weights_path=MODEL1)
test_predictions = bc.model_estimator.predict(x=[btp_test.input_ids, 
                                                 btp_test.input_masks, 
                                                 btp_test.segment_ids],
                                                 batch_size=256,
                                              verbose=1)

Loading BERT Classifier CVE Model Weights


In [11]:
test_preds = test_predictions.ravel()
test_preds = [1 if pred > 0.5 else 0 for pred in test_preds]
print('Performance Report:')
print(classification_report(y_true=test_labels, y_pred=test_preds))
print(confusion_matrix(y_true=test_labels, y_pred=test_preds))

Performance Report:
              precision    recall  f1-score   support

           0       1.00      0.93      0.96      5646
           1       0.26      0.86      0.39       165

    accuracy                           0.92      5811
   macro avg       0.63      0.89      0.68      5811
weighted avg       0.97      0.92      0.94      5811

[[5232  414]
 [  23  142]]


In [14]:
MODEL2 = '../../../dsarkar/saved_models/bert_vuln_models/bert_cve75iter2_weights-ep:02-trn_loss:0.176-trn_acc:0.959-val_loss:0.153-val_acc:0.941.h5'
bc.load_model_weights(model_weights_path=MODEL2)
test_predictions = bc.model_estimator.predict(x=[btp_test.input_ids, 
                                                 btp_test.input_masks, 
                                                 btp_test.segment_ids],
                                                 batch_size=256,
                                              verbose=1)

Loading BERT Classifier CVE Model Weights


In [15]:
test_preds = test_predictions.ravel()
test_preds = [1 if pred > 0.5 else 0 for pred in test_preds]
print('Performance Report:')
print(classification_report(y_true=test_labels, y_pred=test_preds))
print(confusion_matrix(y_true=test_labels, y_pred=test_preds))

Performance Report:
              precision    recall  f1-score   support

           0       1.00      0.93      0.96      5646
           1       0.27      0.88      0.41       165

    accuracy                           0.93      5811
   macro avg       0.63      0.91      0.69      5811
weighted avg       0.98      0.93      0.95      5811

[[5243  403]
 [  19  146]]


In [16]:
MODEL3 = '../../../dsarkar/saved_models/bert_vuln_models/bert_cve75iter2_weights-ep:03-trn_loss:0.096-trn_acc:0.981-val_loss:0.284-val_acc:0.976.h5'
bc.load_model_weights(model_weights_path=MODEL3)
test_predictions = bc.model_estimator.predict(x=[btp_test.input_ids, 
                                                 btp_test.input_masks, 
                                                 btp_test.segment_ids],
                                                 batch_size=256,
                                              verbose=1)

Loading BERT Classifier CVE Model Weights


In [17]:
test_preds = test_predictions.ravel()
test_preds = [1 if pred > 0.5 else 0 for pred in test_preds]
print('Performance Report:')
print(classification_report(y_true=test_labels, y_pred=test_preds))
print(confusion_matrix(y_true=test_labels, y_pred=test_preds))

Performance Report:
              precision    recall  f1-score   support

           0       0.99      0.98      0.99      5646
           1       0.59      0.77      0.67       165

    accuracy                           0.98      5811
   macro avg       0.79      0.88      0.83      5811
weighted avg       0.98      0.98      0.98      5811

[[5558   88]
 [  38  127]]


In [18]:
MODEL4 = '../../../dsarkar/saved_models/bert_vuln_models/bert_cve75iter2_weights-ep:04-trn_loss:0.053-trn_acc:0.987-val_loss:0.265-val_acc:0.976.h5'
bc.load_model_weights(model_weights_path=MODEL4)
test_predictions = bc.model_estimator.predict(x=[btp_test.input_ids, 
                                                 btp_test.input_masks, 
                                                 btp_test.segment_ids],
                                                 batch_size=256,
                                              verbose=1)

Loading BERT Classifier CVE Model Weights


In [19]:
test_preds = test_predictions.ravel()
test_preds = [1 if pred > 0.5 else 0 for pred in test_preds]
print('Performance Report:')
print(classification_report(y_true=test_labels, y_pred=test_preds))
print(confusion_matrix(y_true=test_labels, y_pred=test_preds))

Performance Report:
              precision    recall  f1-score   support

           0       0.99      0.98      0.99      5646
           1       0.56      0.78      0.65       165

    accuracy                           0.98      5811
   macro avg       0.78      0.88      0.82      5811
weighted avg       0.98      0.98      0.98      5811

[[5544  102]
 [  36  129]]


In [20]:
MODEL5 = '../../../dsarkar/saved_models/bert_vuln_models/bert_cve75iter2_weights-ep:05-trn_loss:0.055-trn_acc:0.991-val_loss:0.233-val_acc:0.956.h5'
bc.load_model_weights(model_weights_path=MODEL5)
test_predictions = bc.model_estimator.predict(x=[btp_test.input_ids, 
                                                 btp_test.input_masks, 
                                                 btp_test.segment_ids],
                                                 batch_size=256,
                                              verbose=1)

Loading BERT Classifier CVE Model Weights


In [21]:
test_preds = test_predictions.ravel()
test_preds = [1 if pred > 0.5 else 0 for pred in test_preds]
print('Performance Report:')
print(classification_report(y_true=test_labels, y_pred=test_preds))
print(confusion_matrix(y_true=test_labels, y_pred=test_preds))

Performance Report:
              precision    recall  f1-score   support

           0       0.99      0.96      0.98      5646
           1       0.38      0.82      0.52       165

    accuracy                           0.96      5811
   macro avg       0.69      0.89      0.75      5811
weighted avg       0.98      0.96      0.96      5811

[[5423  223]
 [  29  136]]


In [22]:
MODEL6 = '../../../dsarkar/saved_models/bert_vuln_models/bert_cve75iter2_weights-ep:06-trn_loss:0.044-trn_acc:0.994-val_loss:0.239-val_acc:0.931.h5'
bc.load_model_weights(model_weights_path=MODEL6)
test_predictions = bc.model_estimator.predict(x=[btp_test.input_ids, 
                                                 btp_test.input_masks, 
                                                 btp_test.segment_ids],
                                                 batch_size=256,
                                              verbose=1)

Loading BERT Classifier CVE Model Weights


In [23]:
test_preds = test_predictions.ravel()
test_preds = [1 if pred > 0.5 else 0 for pred in test_preds]
print('Performance Report:')
print(classification_report(y_true=test_labels, y_pred=test_preds))
print(confusion_matrix(y_true=test_labels, y_pred=test_preds))

Performance Report:
              precision    recall  f1-score   support

           0       1.00      0.93      0.96      5646
           1       0.28      0.90      0.43       165

    accuracy                           0.93      5811
   macro avg       0.64      0.92      0.70      5811
weighted avg       0.98      0.93      0.95      5811

[[5271  375]
 [  17  148]]


In [24]:
MODEL7 = '../../../dsarkar/saved_models/bert_vuln_models/bert_cve75iter2_weights-ep:07-trn_loss:0.062-trn_acc:0.990-val_loss:0.303-val_acc:0.985.h5'
bc.load_model_weights(model_weights_path=MODEL7)
test_predictions = bc.model_estimator.predict(x=[btp_test.input_ids, 
                                                 btp_test.input_masks, 
                                                 btp_test.segment_ids],
                                                 batch_size=256,
                                              verbose=1)

Loading BERT Classifier CVE Model Weights


In [25]:
test_preds = test_predictions.ravel()
test_preds = [1 if pred > 0.5 else 0 for pred in test_preds]
print('Performance Report:')
print(classification_report(y_true=test_labels, y_pred=test_preds))
print(confusion_matrix(y_true=test_labels, y_pred=test_preds))

Performance Report:
              precision    recall  f1-score   support

           0       0.99      0.99      0.99      5646
           1       0.71      0.70      0.70       165

    accuracy                           0.98      5811
   macro avg       0.85      0.84      0.85      5811
weighted avg       0.98      0.98      0.98      5811

[[5598   48]
 [  50  115]]


In [26]:
MODEL8 = '../../../dsarkar/saved_models/bert_vuln_models/bert_cve75iter2_weights-ep:08-trn_loss:0.021-trn_acc:0.998-val_loss:0.377-val_acc:0.986.h5'
bc.load_model_weights(model_weights_path=MODEL8)
test_predictions = bc.model_estimator.predict(x=[btp_test.input_ids, 
                                                 btp_test.input_masks, 
                                                 btp_test.segment_ids],
                                                 batch_size=256,
                                              verbose=1)

Loading BERT Classifier CVE Model Weights


In [27]:
test_preds = test_predictions.ravel()
test_preds = [1 if pred > 0.5 else 0 for pred in test_preds]
print('Performance Report:')
print(classification_report(y_true=test_labels, y_pred=test_preds))
print(confusion_matrix(y_true=test_labels, y_pred=test_preds))

Performance Report:
              precision    recall  f1-score   support

           0       0.99      1.00      0.99      5646
           1       0.81      0.65      0.72       165

    accuracy                           0.99      5811
   macro avg       0.90      0.82      0.86      5811
weighted avg       0.98      0.99      0.98      5811

[[5620   26]
 [  57  108]]


In [28]:
MODEL9 = '../../../dsarkar/saved_models/bert_vuln_models/bert_cve75iter2_weights-ep:09-trn_loss:0.024-trn_acc:0.998-val_loss:0.330-val_acc:0.971.h5'
bc.load_model_weights(model_weights_path=MODEL9)
test_predictions = bc.model_estimator.predict(x=[btp_test.input_ids, 
                                                 btp_test.input_masks, 
                                                 btp_test.segment_ids],
                                                 batch_size=256,
                                              verbose=1)

Loading BERT Classifier CVE Model Weights


In [29]:
test_preds = test_predictions.ravel()
test_preds = [1 if pred > 0.5 else 0 for pred in test_preds]
print('Performance Report:')
print(classification_report(y_true=test_labels, y_pred=test_preds))
print(confusion_matrix(y_true=test_labels, y_pred=test_preds))

Performance Report:
              precision    recall  f1-score   support

           0       0.99      0.98      0.99      5646
           1       0.59      0.77      0.67       165

    accuracy                           0.98      5811
   macro avg       0.79      0.88      0.83      5811
weighted avg       0.98      0.98      0.98      5811

[[5559   87]
 [  38  127]]


In [30]:
MODEL10 = '../../../dsarkar/saved_models/bert_vuln_models/bert_cve75iter2_weights-ep:10-trn_loss:0.039-trn_acc:0.994-val_loss:0.239-val_acc:0.972.h5'
bc.load_model_weights(model_weights_path=MODEL10)
test_predictions = bc.model_estimator.predict(x=[btp_test.input_ids, 
                                                 btp_test.input_masks, 
                                                 btp_test.segment_ids],
                                                 batch_size=256,
                                              verbose=1)

Loading BERT Classifier CVE Model Weights


In [31]:
test_preds = test_predictions.ravel()
test_preds = [1 if pred > 0.5 else 0 for pred in test_preds]
print('Performance Report:')
print(classification_report(y_true=test_labels, y_pred=test_preds))
print(confusion_matrix(y_true=test_labels, y_pred=test_preds))

Performance Report:
              precision    recall  f1-score   support

           0       0.99      0.98      0.99      5646
           1       0.51      0.81      0.63       165

    accuracy                           0.97      5811
   macro avg       0.75      0.89      0.81      5811
weighted avg       0.98      0.97      0.98      5811

[[5519  127]
 [  31  134]]
