Train on core terms and contextual terms

In [1]:
import pickle
import numpy as np
from sklearn.metrics import classification_report, f1_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import joblib
from keras.callbacks import Callback
from sklearn.preprocessing import LabelEncoder

class F1ScoreCallback(Callback):
    def __init__(self, X_val, y_val):
        super(F1ScoreCallback, self).__init__()
        self.X_val = X_val
        self.y_val = y_val
        self.best_f1 = 0.0
        self.best_model = None
        self.f1_scores = []

    def on_epoch_end(self, epoch, logs=None):
        y_val_pred = np.argmax(self.model.predict(self.X_val), axis=1)
        f1 = f1_score(self.y_val, y_val_pred, average='weighted')
        self.f1_scores.append(f1)
        

        if f1 > self.best_f1:
            self.best_f1 = f1
            self.best_model = self.model
            print(f"Epoch {epoch + 1} - F1 Score: {f1:.4f}")
            print("Saved best model")
            print(self.f1_scores)

with open('train_without_test4.pickle', 'rb') as f1:
    balanced = pickle.load(f1)

with open('test_core_contextual_terms_with_embeddings.pickle', 'rb') as f2:
    unbalanced = pickle.load(f2)

train = np.array([item['cve_core_contextual_ada_embedding'] for item in balanced if item['cwe'] != 'None'])
test = np.array([item['cwe'] for item in balanced if item['cwe'] != 'None'])
np.random.seed(42)
X_train, X_val, y_train, y_val = train_test_split(train,test,test_size=0.1,random_state=42)

X_test = np.array([item['cve_core_contextual_ada_embedding'] for item in unbalanced if item['cwe'] != 'None'])
y_test = np.array([item['cwe'] for item in unbalanced if item['cwe'] != 'None'])

label_encoder_train = LabelEncoder()
y_train_encoded = label_encoder_train.fit_transform(y_train)
label_encoder_test = LabelEncoder()
y_test_encoded = label_encoder_test.fit_transform(y_test)


input_dim = X_train.shape[1]
output_dim = len(np.unique(y_train))

model = Sequential()
model.add(Dense(128, input_dim=input_dim, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(output_dim, activation='softmax'))


model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

f1_callback = F1ScoreCallback(X_val, label_encoder_train.transform(y_val))

history = model.fit(X_train, y_train_encoded, epochs=40, batch_size=32, validation_data=(X_val, label_encoder_train.transform(y_val)), verbose=1, callbacks=[f1_callback])

best_model = f1_callback.best_model


# Save the best model
joblib.dump(best_model, 'best_model.joblib')

# Make predictions on the test set
y_pred_probs = best_model.predict(X_test)
y_pred = np.argmax(y_pred_probs, axis=1)

y_pred_original = label_encoder_train.inverse_transform(y_pred)

print("Classification Report:\n", classification_report(y_test, y_pred_original, digits=4))

joblib.dump(label_encoder_train, 'label_encoder_train.joblib')

Epoch 1/40
Epoch 1 - F1 Score: 0.5801
Saved best model
[0.5800555209446794]
Epoch 2/40
Epoch 2 - F1 Score: 0.6167
Saved best model
[0.5800555209446794, 0.6167006290291936]
Epoch 3/40
Epoch 3 - F1 Score: 0.6413
Saved best model
[0.5800555209446794, 0.6167006290291936, 0.6413260315279218]
Epoch 4/40
Epoch 4 - F1 Score: 0.6510
Saved best model
[0.5800555209446794, 0.6167006290291936, 0.6413260315279218, 0.6510021617735743]
Epoch 5/40
Epoch 5 - F1 Score: 0.6610
Saved best model
[0.5800555209446794, 0.6167006290291936, 0.6413260315279218, 0.6510021617735743, 0.6610493144404357]
Epoch 6/40
Epoch 7/40
Epoch 7 - F1 Score: 0.6702
Saved best model
[0.5800555209446794, 0.6167006290291936, 0.6413260315279218, 0.6510021617735743, 0.6610493144404357, 0.6602948156271925, 0.6702245255695949]
Epoch 8/40
Epoch 9/40
Epoch 9 - F1 Score: 0.6745
Saved best model
[0.5800555209446794, 0.6167006290291936, 0.6413260315279218, 0.6510021617735743, 0.6610493144404357, 0.6602948156271925, 0.6702245255695949, 0.6690

['label_encoder_train.joblib']

Run inference

In [3]:
import pickle
import numpy as np
from sklearn.metrics import classification_report
import joblib
from sklearn.preprocessing import LabelEncoder
# Save the best model
with open('test_core_contextual_terms_with_embeddings.pickle', 'rb') as f2:
    unbalanced = pickle.load(f2)

X_test = np.array([item['cve_core_contextual_ada_embedding'] for item in unbalanced if item['cwe'] != 'None'])
y_test = np.array([item['cwe'] for item in unbalanced if item['cwe'] != 'None'])

best_model=joblib.load('best_model.joblib')
label_encoder_train=joblib.load('label_encoder_train.joblib')

# Make predictions on the test set
y_pred_probs = best_model.predict(X_test)
y_pred = np.argmax(y_pred_probs, axis=1)

y_pred_original = label_encoder_train.inverse_transform(y_pred)

print("Classification Report:\n", classification_report(y_test, y_pred_original, digits=4))

Classification Report:
               precision    recall  f1-score   support

         119     0.8284    0.4421    0.5765      1070
         120     0.4010    0.8163    0.5378       196
         125     0.7394    0.8534    0.7923       532
         134     0.4595    0.8947    0.6071        19
         189     0.5412    0.7731    0.6367       119
         190     0.7000    0.7350    0.7171       200
          20     0.4670    0.2012    0.2813       810
         200     0.5707    0.5678    0.5692       590
         203     0.6000    0.6667    0.6316        27
          22     0.8872    0.8803    0.8837       518
         254     0.0877    0.1471    0.1099        34
         255     0.3368    0.4776    0.3951        67
         264     0.5111    0.5050    0.5080       503
         269     0.3194    0.4340    0.3680       106
         276     0.3077    0.2500    0.2759        64
         284     0.2990    0.2358    0.2636       123
         287     0.5000    0.6281    0.5568       285
   