Train on core terms and contextual terms

In [3]:
import pickle
import numpy as np
from sklearn.metrics import classification_report, f1_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import joblib
from keras.callbacks import Callback
from sklearn.preprocessing import LabelEncoder

class F1ScoreCallback(Callback):
    def __init__(self, X_val, y_val):
        super(F1ScoreCallback, self).__init__()
        self.X_val = X_val
        self.y_val = y_val
        self.best_f1 = 0.0
        self.best_model = None
        self.f1_scores = []

    def on_epoch_end(self, epoch, logs=None):
        y_val_pred = np.argmax(self.model.predict(self.X_val), axis=1)
        f1 = f1_score(self.y_val, y_val_pred, average='weighted')
        self.f1_scores.append(f1)
        

        if f1 > self.best_f1:
            self.best_f1 = f1
            self.best_model = self.model
            print(f"Epoch {epoch + 1} - F1 Score: {f1:.4f}")
            print("Saved best model")
            print(self.f1_scores)

with open('train_core_cont.pickle', 'rb') as f1:
    balanced = pickle.load(f1)

with open('test_core_cont.pickle', 'rb') as f2:
    unbalanced = pickle.load(f2)

train = np.array([item['cve_core_contextual_ada_embedding'] for item in balanced if item['cwe'] != 'None'])
test = np.array([item['cwe'] for item in balanced if item['cwe'] != 'None'])
np.random.seed(42)
X_train, X_val, y_train, y_val = train_test_split(train,test,test_size=0.1,random_state=42)

X_test = np.array([item['cve_core_contextual_ada_embedding'] for item in unbalanced if item['cwe'] != 'None'])
y_test = np.array([item['cwe'] for item in unbalanced if item['cwe'] != 'None'])

label_encoder_train = LabelEncoder()
y_train_encoded = label_encoder_train.fit_transform(y_train)
label_encoder_test = LabelEncoder()
y_test_encoded = label_encoder_test.fit_transform(y_test)


input_dim = X_train.shape[1]
output_dim = len(np.unique(y_train))

model = Sequential()
model.add(Dense(128, input_dim=input_dim, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(output_dim, activation='softmax'))


model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

f1_callback = F1ScoreCallback(X_val, label_encoder_train.transform(y_val))

history = model.fit(X_train, y_train_encoded, epochs=40, batch_size=32, validation_data=(X_val, label_encoder_train.transform(y_val)), verbose=1, callbacks=[f1_callback])

best_model = f1_callback.best_model


# Save the best model
joblib.dump(best_model, 'best_model.joblib')

# Make predictions on the test set
y_pred_probs = best_model.predict(X_test)
y_pred = np.argmax(y_pred_probs, axis=1)

y_pred_original = label_encoder_train.inverse_transform(y_pred)

print("Classification Report:\n", classification_report(y_test, y_pred_original, digits=4))

joblib.dump(label_encoder_train, 'label_encoder_train.joblib')

Epoch 1/40
Epoch 1 - F1 Score: 0.6236
Saved best model
[0.6235621435468757]
Epoch 2/40
Epoch 2 - F1 Score: 0.6720
Saved best model
[0.6235621435468757, 0.6720225985530234]
Epoch 3/40
Epoch 3 - F1 Score: 0.6905
Saved best model
[0.6235621435468757, 0.6720225985530234, 0.6905123735486282]
Epoch 4/40
Epoch 4 - F1 Score: 0.7054
Saved best model
[0.6235621435468757, 0.6720225985530234, 0.6905123735486282, 0.7053533397442603]
Epoch 5/40
Epoch 5 - F1 Score: 0.7062
Saved best model
[0.6235621435468757, 0.6720225985530234, 0.6905123735486282, 0.7053533397442603, 0.7062118730991521]
Epoch 6/40
Epoch 7/40
Epoch 7 - F1 Score: 0.7138
Saved best model
[0.6235621435468757, 0.6720225985530234, 0.6905123735486282, 0.7053533397442603, 0.7062118730991521, 0.7048380257406531, 0.7137871537907192]
Epoch 8/40
Epoch 9/40
Epoch 9 - F1 Score: 0.7184
Saved best model
[0.6235621435468757, 0.6720225985530234, 0.6905123735486282, 0.7053533397442603, 0.7062118730991521, 0.7048380257406531, 0.7137871537907192, 0.7135

['label_encoder_train.joblib']

Run inference core and contextual

In [5]:
import pickle
import numpy as np
from sklearn.metrics import classification_report
import joblib

# Load the saved model
best_model = joblib.load('best_model.joblib')

# Load the label encoder
label_encoder_train = joblib.load('label_encoder_train.joblib')

# Load the test data
with open('test_core_cont.pickle', 'rb') as f2:
    unbalanced = pickle.load(f2)

X_test = np.array([item['cve_core_contextual_ada_embedding'] for item in unbalanced if item['cwe'] != 'None'])
y_test = np.array([item['cwe'] for item in unbalanced if item['cwe'] != 'None'])

# Make predictions on the test set
y_pred_probs = best_model.predict(X_test)
y_pred = np.argmax(y_pred_probs, axis=1)

# Convert the predicted labels back to their original form
y_pred_original = label_encoder_train.inverse_transform(y_pred)

# Generate and print the classification report
print("Classification Report:\n", classification_report(y_test, y_pred_original, digits=4))


Classification Report:
               precision    recall  f1-score   support

         119     0.8386    0.4953    0.6228      1070
         120     0.4061    0.8163    0.5424       196
         125     0.7640    0.8214    0.7917       532
         134     0.6923    0.9474    0.8000        19
         190     0.6832    0.8950    0.7749       200
          20     0.5804    0.3210    0.4134       810
         200     0.6912    0.4932    0.5757       590
         203     0.2899    0.7407    0.4167        27
          22     0.8827    0.8861    0.8844       518
         269     0.4268    0.3302    0.3723       106
         276     0.3333    0.2812    0.3051        64
         287     0.5263    0.6316    0.5742       285
         295     0.5631    0.7160    0.6304        81
         306     0.3761    0.4362    0.4039        94
         312     0.3077    0.2857    0.2963        42
         319     0.5686    0.5686    0.5686        51
         326     0.3913    0.2903    0.3333        31
   