## evaluation of the finetuned rcm

In [1]:
import pandas as pd
import re

In [2]:

# Charger votre dataset à partir du fichier CSV
df = pd.read_csv('our-cti-rcm-with-model-outputs.csv')


In [3]:


# Fonction pour extraire le code CWE (format CWE-XXX)
extract_cwe = lambda text: re.search(r'CWE-\d+', text).group(0) if re.search(r'CWE-\d+', text) else None

# Appliquer la fonction d'extraction sur les colonnes 'model_output' et comparer avec 'output'
df['extracted_model_output'] = df['model_outputs'].apply(extract_cwe)

# Calculer le nombre de prédictions correctes
correct_predictions = (df['output'] == df['extracted_model_output']).sum()

# Calculer le pourcentage d'exactitude
accuracy = (correct_predictions / len(df)) * 100

# Afficher la précision et les premières comparaisons
print(f"Précision du modèle : {accuracy:.2f}%")
print(df[['output', 'extracted_model_output']].head())


Précision du modèle : 59.44%
     output extracted_model_output
0   CWE-787                CWE-787
1    CWE-23                 CWE-22
2    CWE-89                 CWE-89
3    CWE-79                 CWE-89
4  CWE-1321                 CWE-89


## evaluation of the finetuned mcq

In [4]:
import pandas as pd

def extract(model_output:str)-> str:
    end_of_text = model_output.find("<|end_of_text|>")
    #return first english alphabet chracter before end_of_text
    answer = model_output[:end_of_text]
    answer = answer[::-1]

    for x in answer:
        if 'A' <= x.upper() <= 'Z':
            return x.upper()
    return ""

def evaluate_model(path:str)-> float:
    #take the answer as the first alphabet character just before <|end_of_text|> in df['model_outputs']
    df = pd.read_csv(path)
    # #use "model_outputs" to extract answer and compare it to "output"
    correct = 0
    for i in range(len(df)):
        correct_output = df.iloc[i]['output']
        model_output = extract(df.iloc[i]['model_outputs'])
        print(model_output, correct_output)
        if correct_output == model_output:
            correct+=1
    return correct / len(df) * 100


path = "our-cti-mcq-with-model-outputs.csv" #change it
result = evaluate_model(path = path)

print(result)

C C
D B
A B
A C
C B
C C
C C
D A
C C
C A
C C
C C
B B
C A
A A
D C
B B
D C
D D
B B
B A
A D
C B
B A
A A
C D
C C
C D
A C
D B
C A
D D
B C
B A
B C
B A
D A
A D
C A
C C
B C
C C
C B
C A
C C
A A
A A
C C
B B
C A
C A
C C
C C
C A
C B
A B
C A
C D
C C
C A
B B
C A
B C
C D
B B
A B
D C
B C
B B
C D
B A
A C
A B
A D
B C
C C
C B
A A
B A
C D
C A
C B
A A
D D
A A
A C
C D
B B
A B
C D
C B
C D
A C
D B
B A
A A
D D
C C
B D
C C
B C
B C
C A
C A
C B
B B
B D
C A
D D
C C
A C
D B
C D
D D
A B
C C
C A
A B
C B
C C
A A
B A
B B
A C
C D
C A
A C
C D
C A
C C
A A
B D
B B
A A
C B
C A
B D
B C
B A
C A
A A
D D
C C
A A
D D
A A
D C
D D
A D
A C
A D
B A
C B
C C
C C
A B
B C
B A
B A
A C
A A
D D
A A
D D
D C
A D
C C
C A
C D
C C
C C
C D
B B
B A
C C
C C
B A
C A
B D
C A
B C
A A
A C
B A
C A
C A
A D
C D
A A
C D
A A
A A
A B
B A
B D
C C
B D
C C
D D
A A
C C
A A
D C
B A
A A
C A
A D
C D
A B
B C
C C
A C
B A
C A
B B
C A
B B
C D
D D
D C
C B
C A
A A
B A
B A
A A
D D
C A
B A
A A
C A
A C
B D
C B
D C
A B
D A
A A
A A
C A
C A
A C
A B
B C
D A
C C
C A
C C
A A
A A


## evaluation of the finetuned vsp

In [5]:
import pandas as pd
import re
from cvss import CVSS3

# Charger votre dataset à partir du fichier CSV
df = pd.read_csv('our-cti-vspwith-model-outputs.csv')

# Fonction pour extraire le vecteur CVSS (format CVSS:3.X/...)
def extract_cvss(text):
    match = re.search(r'CVSS:3\.\d/[A-Z]+:[A-Z](?:/[A-Z]+:[A-Z])*', text)
    return match.group(0) if match else None

# Fonction pour calculer le score CVSS à partir du vecteur
def get_cvss_score(cvss_vector):
    try:
        c = CVSS3(cvss_vector)
        return c.scores()[0]  # Récupère le score de base CVSS
    except Exception as e:
        print(f"Erreur lors de l'analyse du vecteur CVSS: {e}")
        return None

# Fonction pour calculer l'erreur absolue moyenne (MAD)
def compute_mad(df, pred_col, gt_col):
    error = 0
    total = 0
    
    # Boucle pour parcourir chaque ligne du DataFrame
    for idx, row in df.iterrows():
        pred = row[pred_col]
        gt = row[gt_col]
        
        # Extraire les vecteurs CVSS à partir des colonnes
        pred_vector = extract_cvss(pred)
        gt_vector = extract_cvss(gt)

        if pred_vector and gt_vector:
            try:
                pred_score = get_cvss_score(pred_vector)
                gt_score = get_cvss_score(gt_vector)
                
                if pred_score is not None and gt_score is not None:
                    # Ajouter l'erreur absolue
                    error += abs(pred_score - gt_score)
                    total += 1
                else:
                    print(f"Vecteurs CVSS invalides à la ligne {idx + 1}")
            except Exception as e:
                print(f"Erreur à la ligne {idx + 1}: {e}")
                continue
        else:
            print(f"Pas de vecteur CVSS extrait à la ligne {idx + 1}")
    
    if total == 0:
        print("Aucune prédiction valide pour calculer l'erreur.")
        return None

    mad = error / total  # Calculer l'erreur absolue moyenne
    print(f"Erreur absolue totale : {error}, Nombre total : {total}")
    return mad

# Comparer et calculer l'erreur entre les colonnes 'output' et 'model_outputs'
mad = compute_mad(df, pred_col='model_outputs', gt_col='output')

if mad is not None:
    print(f"Erreur absolue moyenne (MAD) : {mad:.2f}")


Erreur absolue totale : 3228.9000000000087, Nombre total : 1205
Erreur absolue moyenne (MAD) : 2.68


## summary of this validation ==> generated in csv file 

In [6]:
import pandas as pd
import re
from cvss import CVSS3

def summarize_evaluations_and_save():
    # Initialiser une liste pour stocker les résultats
    results = []

    # === Évaluation pour le dataset RCM ===
    def extract_cwe(text):
        match = re.search(r'CWE-\d+', text)
        return match.group(0) if match else None

    df_rcm = pd.read_csv('our-cti-rcm-with-model-outputs.csv')
    df_rcm['extracted_model_output'] = df_rcm['model_outputs'].apply(extract_cwe)
    correct_rcm = (df_rcm['output'] == df_rcm['extracted_model_output']).sum()
    accuracy_rcm = (correct_rcm / len(df_rcm)) * 100
    results.append({"Dataset": "RCM", "Metric": "Accuracy", "Value": accuracy_rcm})

    # === Évaluation pour le dataset MCQ ===
    def extract_mcq_output(model_output):
        end_of_text = model_output.find("<|end_of_text|>")
        answer = model_output[:end_of_text][::-1]
        for char in answer:
            if 'A' <= char.upper() <= 'Z':
                return char.upper()
        return ""

    df_mcq = pd.read_csv('our-cti-mcq-with-model-outputs.csv')
    correct_mcq = sum(
        1 for i in range(len(df_mcq))
        if df_mcq.iloc[i]['output'] == extract_mcq_output(df_mcq.iloc[i]['model_outputs'])
    )
    accuracy_mcq = (correct_mcq / len(df_mcq)) * 100
    results.append({"Dataset": "MCQ", "Metric": "Accuracy", "Value": accuracy_mcq})

    # === Évaluation pour le dataset VSP ===
    def extract_cvss(text):
        match = re.search(r'CVSS:3\.\d/[A-Z]+:[A-Z](?:/[A-Z]+:[A-Z])*', text)
        return match.group(0) if match else None

    def get_cvss_score(cvss_vector):
        try:
            c = CVSS3(cvss_vector)
            return c.scores()[0]  # Récupère le score de base CVSS
        except Exception as e:
            print(f"Erreur lors de l'analyse du vecteur CVSS: {e}")
            return None

    df_vsp = pd.read_csv('our-cti-vspwith-model-outputs.csv')
    correct_vsp = sum(
        1 for i in range(len(df_vsp))
        if extract_cvss(df_vsp.iloc[i]['output']) == extract_cvss(df_vsp.iloc[i]['model_outputs'])
    )
    accuracy_vsp = (correct_vsp / len(df_vsp)) * 100
    results.append({"Dataset": "VSP", "Metric": "Accuracy", "Value": accuracy_vsp})

    # Calcul de la MAD pour VSP
    total_error = 0
    valid_predictions = 0
    for i in range(len(df_vsp)):
        pred_vector = extract_cvss(df_vsp.iloc[i]['model_outputs'])
        gt_vector = extract_cvss(df_vsp.iloc[i]['output'])
        if pred_vector and gt_vector:
            pred_score = get_cvss_score(pred_vector)
            gt_score = get_cvss_score(gt_vector)
            if pred_score is not None and gt_score is not None:
                total_error += abs(pred_score - gt_score)
                valid_predictions += 1

    if valid_predictions > 0:
        mad_vsp = total_error / valid_predictions
        results.append({"Dataset": "VSP", "Metric": "MAD", "Value": mad_vsp})
    else:
        print("Aucune prédiction valide pour calculer la MAD dans le dataset VSP.")

    # === Sauvegarde des résultats dans un fichier CSV ===
    results_df = pd.DataFrame(results)
    results_df.to_csv('SUMMARY_validateFinetuningWithArticleData.csv', index=False)
    print("Résultats exportés dans 'SUMMARY_validateFinetuningWithArticleData.csv'.")

# Appeler la fonction pour exécuter les évaluations et sauvegarder les résultats
summarize_evaluations_and_save()


Résultats exportés dans 'SUMMARY_validateFinetuningWithArticleData.csv'.
