In [43]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt
from semantic_stress_matcher import SemanticStressMatcher
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, accuracy_score, precision_score, recall_score, f1_score, roc_curve, auc, precision_recall_curve, PrecisionRecallDisplay, roc_auc_score, root_mean_squared_error, mean_absolute_error, make_scorer, mean_squared_error, r2_score
from preprocessor import TextPreprocessor
import time
import numpy as np
np.random.seed(42)

In [44]:
def evaluate_predictions(true_labels, predicted_labels, true_threshold_min, true_threshold_max, predicted_thresh_min, predicted_thresh_max, label_name, display_graph = True):
    # Is lable within the lower and upper bound (larger or equal to min and smaller than max)
    true_classification_labels = (true_labels >= true_threshold_min) & (true_labels < true_threshold_max)
    predicted_classification_labels = (predicted_labels >= predicted_thresh_min) & (predicted_labels < predicted_thresh_max)
    cm = confusion_matrix(true_classification_labels, predicted_classification_labels)
    accuracy = accuracy_score(true_classification_labels, predicted_classification_labels)
    error_rate = 1 - accuracy
    precision = precision_score(true_classification_labels, predicted_classification_labels)
    recall = recall_score(true_classification_labels, predicted_classification_labels)
    f1 = f1_score(true_classification_labels, predicted_classification_labels)
    tn, fp, fn, tp = cm.ravel()
    specificity = tn / (tn + fp)
    print(label_name)
    print(f"Accuracy: {accuracy}\nError_rate: {error_rate}\nPrecision: {precision}\nRecall: {recall}\nSpecificity: {specificity}\nF1 Score {f1}")
    mse = mean_squared_error(true_labels, predicted_labels)
    mae = mean_absolute_error(true_labels, predicted_labels)
    print(f"MSE: {mse}\nMAE: {mae}")
    if display_graph:
        plt.figure(figsize=(8, 6))
        # Confusion Matrix
        conf_matrix_display = ConfusionMatrixDisplay(confusion_matrix=cm)
        conf_matrix_display.plot(cmap='Greys', colorbar=False)
        plt.title(f"{label_name} Classification\nConfusion Matrix")
        plt.savefig(f"modelResults/plots/{label_name}_confusion_matrix.png")
        plt.close()
        # Performance Metrics Bar Chart

        plt.figure(figsize=(8, 6))
        metrics = ['Accuracy', 'Error Rate', 'Precision', 'Recall', 'Specificity', 'F1 Score']
        values = [accuracy, error_rate, precision, recall, specificity, f1]
        
        bars = plt.bar(metrics, values, color='#929591')
        plt.title(f"{label_name} Classification\nPerformance Metrics")
        plt.ylabel("Score")
        plt.ylim(0, 1.05)
        plt.grid(axis='y', linestyle='--', alpha=0.6)

        # Annotate bars
        for i, bar in enumerate(bars):
            height = bar.get_height()
            plt.annotate(f'{height:.2f}', xy=(bar.get_x() + bar.get_width() / 2, height),
                        xytext=(0, 3), textcoords="offset points",
                        ha='center', va='bottom')
        plt.savefig(f"modelResults/plots/{label_name}_performance_metrics.png")
        plt.close() 
        
        # Scatter Plot
        plt.figure(figsize=(8, 6))
        plt.scatter(true_labels, predicted_labels, alpha=0.6, color="#929591")
        plt.plot([-1, 1], [-1, 1], '--', color='gray')  # Diagonal line
        plt.xlabel('True Normalised Severity')
        plt.ylabel(f"{label_name} Compound Score")
        plt.title(f"{label_name} Regression\nCompound vs Actual Severity")
        plt.grid(True, linestyle='--')
        plt.savefig(f"modelResults/plots/{label_name}_scatter_regression.png")
        plt.close()
    return cm,accuracy,error_rate,precision,recall,specificity,f1,mse,mae

In [None]:
df = pd.read_excel("../../dataset/SAD_v1_cleaned.xlsx")

display(df)
# Display the mean of the normalised serverity to ensrue stratified sample split
print(f"Overall avg_severity mean: {df['avg_severity_normalised'].mean():.2f}")
# to 0 and 1
print(f"Overall seed to non seed ratio: {df['is_seed'].value_counts(normalize=True)}")
print("--NoN strat split--")
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
print(f"Train avg_severity mean: {train_df['avg_severity_normalised'].mean():.2f}")
print(f"Test avg_severity mean: {test_df['avg_severity_normalised'].mean():.2f}")

print("--strat split--")

# Create Databin with a 3 quantile split (Low, average, high)
# https://pandas.pydata.org/docs/reference/api/pandas.qcut.html
df['severity_bin'] = pd.qcut(df['avg_severity_normalised'], q=3, labels=False)

train_df, test_df = train_test_split(df, test_size=0.2, stratify=df['severity_bin'], random_state=42)

# Ensure a even distribution
print(f"Train avg_severity mean: {train_df['avg_severity_normalised'].mean():.2f}")
print(f"Test avg_severity mean: {test_df['avg_severity_normalised'].mean():.2f}")

# Ensure seeding is not skewed
print(f"Train seed to non seed ratio: {train_df['is_seed'].value_counts(normalize=True)}")
print(f"Test seed to non seed ratio: {test_df['is_seed'].value_counts(normalize=True)}")

# get the thresholds based on the binning
# https://note.nkmk.me/en/python-pandas-agg-aggregate/  gets the max and min from each bin (rounded to 3 dp)
quantile_thresholds = train_df.groupby('severity_bin')['avg_severity_normalised'].aggregate(['min', 'max']).round(3)
# used for the model evaluations
print("-Thresholds-")
print(quantile_thresholds)

# Remove once split
train_df = train_df.drop(columns=['severity_bin'])
test_df = test_df.drop(columns=['severity_bin'])

display(train_df)

Unnamed: 0,sentence,is_stressor,is_stressor_conf,original_label,top_label,second_label,avg_severity,median_severity,SD_severity,Votes,...,Everyday Decision Making,Emotional Turmoil,School,Family Issues,Social Relationships,Work,"Health, Fatigue, or Physical Pain",Source,is_seed,avg_severity_normalised
0,2 of my foster kittens died,1,1.0,Other,Emotional Turmoil,Other,8.2,8,1.60,6,...,0.0,0.9,0.0,0.0,0.0,0.0,0.0,popbots_live,1,-0.673469
1,A better question would be what is not stressi...,1,1.0,Work,Everyday Decision Making,Other,5.4,5,2.65,7,...,0.6,0.1,0.0,0.0,0.0,0.0,0.0,mTurk_synthetic_covid,0,-0.102041
2,a big project at work.,0,0.6,Work,Other,Work,2.2,0,2.71,5,...,0.0,0.0,0.0,0.0,0.0,0.4,0.0,mTurk_synthetic,0,0.551020
3,a certain person at work is being really angry...,1,1.0,Emotional Turmoil,Work,Social Relationships,3.4,3,0.49,8,...,0.0,0.0,0.0,0.0,0.3,0.7,0.0,mTurk_synthetic,0,0.306122
4,a coworker completely ignored and replaced my ...,1,1.0,Work,Work,Emotional Turmoil,4.2,4,0.75,6,...,0.0,0.2,0.0,0.0,0.1,0.7,0.0,mTurk_synthetic,0,0.142857
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5782,"I will not be attending school tomorrow, im no...",1,1.0,Family Issues,School,Emotional Turmoil,3.6,4,1.50,6,...,0.0,0.1,0.9,0.0,0.0,0.0,0.0,Inquire,0,0.265306
5783,"It's not fair to me, her, or my husband and ch...",1,1.0,Family Issues,Family Issues,Social Relationships,4.8,4,1.47,9,...,0.0,0.1,0.0,0.6,0.2,0.0,0.0,Inquire,0,0.020408
5784,God knows my mom and dad don't want me.,1,1.0,Family Issues,Family Issues,Emotional Turmoil,6.0,6,1.26,8,...,0.0,0.2,0.0,0.7,0.1,0.0,0.0,Inquire,0,-0.224490
5785,Got into another fight with my brother.,1,1.0,Family Issues,Family Issues,Social Relationships,3.6,4,1.02,7,...,0.0,0.1,0.0,0.8,0.1,0.0,0.0,Inquire,0,0.265306


Overall avg_severity mean: 0.18
Overall seed to non seed ratio: is_seed
0    0.901849
1    0.098151
Name: proportion, dtype: float64
--NoN strat split--
Train avg_severity mean: 0.18
Test avg_severity mean: 0.17
--strat split--
Train avg_severity mean: 0.17
Test avg_severity mean: 0.18
Train seed to non seed ratio: is_seed
0    0.900626
1    0.099374
Name: proportion, dtype: float64
Test seed to non seed ratio: is_seed
0    0.906736
1    0.093264
Name: proportion, dtype: float64
-Thresholds-
                min    max
severity_bin              
0            -0.918  0.020
1             0.061  0.306
2             0.347  1.000


Unnamed: 0,sentence,is_stressor,is_stressor_conf,original_label,top_label,second_label,avg_severity,median_severity,SD_severity,Votes,...,Everyday Decision Making,Emotional Turmoil,School,Family Issues,Social Relationships,Work,"Health, Fatigue, or Physical Pain",Source,is_seed,avg_severity_normalised
2118,typing this answer to you,0,1.0,Work,Other,Social Relationships,0.0,0,0.00,6,...,0.0,0.0,0.0,0.0,0.1,0.0,0.0,mTurk_synthetic,0,1.000000
227,going through a divorce and my ex is trying to...,1,1.0,Family Issues,Family Issues,Emotional Turmoil,7.6,7,0.80,8,...,0.0,0.3,0.0,0.6,0.1,0.0,0.0,mTurk_synthetic,0,-0.551020
4590,I'm tired of the stress.,1,1.0,Emotional Turmoil,Other,Everyday Decision Making,4.6,4,1.85,5,...,0.2,0.0,0.0,0.0,0.0,0.0,0.0,Inquire,0,0.061224
5569,i'm just a little disapointed because my boyfr...,1,1.0,Social Relationships,Social Relationships,Emotional Turmoil,3.4,4,1.62,6,...,0.0,0.1,0.0,0.0,0.9,0.0,0.0,Inquire,0,0.306122
1312,money issues stress me out,1,1.0,Financial Problem,Financial Problem,,5.4,6,1.62,5,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,mTurk_synthetic,0,-0.102041
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
928,"idiots passing me in ""no passing zones"" at 80 ...",1,1.0,Work,Work,Financial Problem,4.6,6,2.65,6,...,0.0,0.0,0.0,0.0,0.0,0.9,0.0,mTurk_synthetic,0,0.061224
5403,Its going to be hard living with everyone in m...,1,1.0,Everyday Decision Making,Family Issues,,6.4,6,1.50,5,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,Inquire,0,-0.306122
4841,i am just really hurting right now and i feel ...,1,1.0,Emotional Turmoil,Emotional Turmoil,Other,5.2,5,0.75,7,...,0.0,0.6,0.0,0.0,0.2,0.0,0.0,Inquire,0,-0.061224
1428,my credit card balance are so high.,1,0.8,Financial Problem,Financial Problem,Other,4.2,4,2.56,5,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,mTurk_synthetic,0,0.142857


In [46]:
# preprocess data
preprocessor = TextPreprocessor(preprocess_type = 2)
temp = []
# Training Data
for sentence in train_df['sentence']:
    cleaned = preprocessor.preprocess(sentence)
    temp.append(cleaned)
train_df['sentence'] = temp
temp = []
# Testing data
for sentence in test_df['sentence']:
    cleaned = preprocessor.preprocess(sentence)
    temp.append(cleaned)
test_df['sentence'] = temp

display(train_df)

DEBUG: TextPreprocessor Pre
typing this answer to you

DEBUG: TextPreprocessor Final
typing this answer to you

DEBUG: TextPreprocessor Pre
going through a divorce and my ex is trying to take my baby.

DEBUG: TextPreprocessor Final
going through a divorce and my ex is trying to take my baby.

DEBUG: TextPreprocessor Pre
I'm tired of the stress.

DEBUG: TextPreprocessor Final
I'm tired of the stress.

DEBUG: TextPreprocessor Pre
i'm just a little disapointed because my boyfriend doesn't even remember my fucking name. and

DEBUG: TextPreprocessor Final
i'm just a little disapointed because my boyfriend doesn't even remember my fucking name. and

DEBUG: TextPreprocessor Pre
money issues stress me out

DEBUG: TextPreprocessor Final
money issues stress me out

DEBUG: TextPreprocessor Pre
The next part, the travel-and-exposit part, was really giving me trouble.

DEBUG: TextPreprocessor Final
The next part, the travel-and-exposit part, was really giving me trouble.

DEBUG: TextPreprocessor Pr

Unnamed: 0,sentence,is_stressor,is_stressor_conf,original_label,top_label,second_label,avg_severity,median_severity,SD_severity,Votes,...,Everyday Decision Making,Emotional Turmoil,School,Family Issues,Social Relationships,Work,"Health, Fatigue, or Physical Pain",Source,is_seed,avg_severity_normalised
2118,typing this answer to you,0,1.0,Work,Other,Social Relationships,0.0,0,0.00,6,...,0.0,0.0,0.0,0.0,0.1,0.0,0.0,mTurk_synthetic,0,1.000000
227,going through a divorce and my ex is trying to...,1,1.0,Family Issues,Family Issues,Emotional Turmoil,7.6,7,0.80,8,...,0.0,0.3,0.0,0.6,0.1,0.0,0.0,mTurk_synthetic,0,-0.551020
4590,I'm tired of the stress.,1,1.0,Emotional Turmoil,Other,Everyday Decision Making,4.6,4,1.85,5,...,0.2,0.0,0.0,0.0,0.0,0.0,0.0,Inquire,0,0.061224
5569,i'm just a little disapointed because my boyfr...,1,1.0,Social Relationships,Social Relationships,Emotional Turmoil,3.4,4,1.62,6,...,0.0,0.1,0.0,0.0,0.9,0.0,0.0,Inquire,0,0.306122
1312,money issues stress me out,1,1.0,Financial Problem,Financial Problem,,5.4,6,1.62,5,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,mTurk_synthetic,0,-0.102041
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
928,"idiots passing me in ""no passing zones"" at 80 ...",1,1.0,Work,Work,Financial Problem,4.6,6,2.65,6,...,0.0,0.0,0.0,0.0,0.0,0.9,0.0,mTurk_synthetic,0,0.061224
5403,Its going to be hard living with everyone in m...,1,1.0,Everyday Decision Making,Family Issues,,6.4,6,1.50,5,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,Inquire,0,-0.306122
4841,i am just really hurting right now and i feel ...,1,1.0,Emotional Turmoil,Emotional Turmoil,Other,5.2,5,0.75,7,...,0.0,0.6,0.0,0.0,0.2,0.0,0.0,Inquire,0,-0.061224
1428,my credit card balance are so high.,1,0.8,Financial Problem,Financial Problem,Other,4.2,4,2.56,5,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,mTurk_synthetic,0,0.142857


In [None]:
embeddingModels = ["all-MiniLM-L6-v2","all-MiniLM-L12-v2","all-distilroberta-v1", "paraphrase-mpnet-base-v2"]
embeddingResults = []
for model in embeddingModels:
    matcher = SemanticStressMatcher(model_name=model, sentences=train_df['sentence'].tolist(),severities=train_df['avg_severity_normalised'].tolist(),labels=train_df['is_stressor'].tolist())

    # Evaluate
    true_scores = []
    predicted_scores = []
    timing_scores = []

    for i,row in test_df.iterrows():
        sentence = row['sentence']
        true_severity = row['avg_severity_normalised']
        #gather evaluation data
        start_time = time.time()
        pred_severity = matcher.find_closest(sentence)
        end_time = time.time()
        total_time = end_time - start_time
        timing_scores.append(total_time)
        true_scores.append(true_severity)
        predicted_scores.append(pred_severity["avg_severity"])
    embeddingResults.append([true_scores,predicted_scores,timing_scores])


Eval Dataset
{'match': 'i have a really big test coming up next week.', 'similarity': 0.7113111019134521, 'is_stressor': 1, 'avg_severity': 0.06122448979591844}
{'match': 'You often worry, and your worries prevent you from living life fully.', 'similarity': 0.5381557941436768, 'is_stressor': 1, 'avg_severity': 0.346938775510204}
{'match': "i'm just having a really hard time focusing on work lately, it's hard.", 'similarity': 0.8340033888816833, 'is_stressor': 1, 'avg_severity': 0.1020408163265305}
{'match': 'my dad needs surgery next week', 'similarity': 0.7795841097831726, 'is_stressor': 1, 'avg_severity': -0.183673469387755}
{'match': 'I always get so darn stressed about things.', 'similarity': 0.7146353721618652, 'is_stressor': 1, 'avg_severity': 0.3877551020408163}
{'match': 'work has been stressing me out.', 'similarity': 0.7881234884262085, 'is_stressor': 1, 'avg_severity': 0.3061224489795917}
{'match': "i've been so moody and negative about life lately..its really starting to de

In [48]:
rows = []
for i, result in enumerate(embeddingResults):
    print(embeddingModels[i])
    avgtime = sum(result[2]) / len(result[2])
    maxtime = max(result[2])
    metrics = evaluate_predictions(pd.Series(result[0]), pd.Series(result[1]), true_threshold_min=-1.0, true_threshold_max=0.020, predicted_thresh_min=-1.0, predicted_thresh_max=0.020, label_name=f"High Stress {embeddingModels[i]}")
    print(f"{embeddingModels[i]} Average Time per Message: {avgtime:.4f}")
    print(f"{embeddingModels[i]} Max Time: {maxtime:.4f}")
    rows.append({
        'model_name': embeddingModels[i],
        'Average Time Taken': avgtime,
        "High F1": metrics[6],
        "High Recall": metrics[4],
        "High Precision": metrics[3],
        "High Specificity": metrics[5],
        "High Accuracy": metrics[1],
        "High MSE": metrics[7],
        "High MAE": metrics[8],
    })

export_high = pd.DataFrame(rows)
export_high.to_csv('embedding_results.csv', index=False)

all-MiniLM-L6-v2
High Stress all-MiniLM-L6-v2
Accuracy: 0.6796200345423143
Error_rate: 0.32037996545768566
Precision: 0.48265895953757226
Recall: 0.46518105849582175
Specificity: 0.7759699624530664
F1 Score 0.47375886524822697
MSE: 0.13662557123938712
MAE: 0.28800535758344786
all-MiniLM-L6-v2 Average Time per Message: 0.0167
all-MiniLM-L6-v2 Max Time: 0.0307
all-MiniLM-L12-v2
High Stress all-MiniLM-L12-v2
Accuracy: 0.6692573402417962
Error_rate: 0.3307426597582038
Precision: 0.4659090909090909
Recall: 0.4568245125348189
Specificity: 0.7647058823529411
F1 Score 0.4613220815752461
MSE: 0.13376263056771823
MAE: 0.28610200556906695
all-MiniLM-L12-v2 Average Time per Message: 0.0238
all-MiniLM-L12-v2 Max Time: 0.0388
all-distilroberta-v1
High Stress all-distilroberta-v1
Accuracy: 0.6519861830742659
Error_rate: 0.3480138169257341
Precision: 0.4367816091954023
Recall: 0.4233983286908078
Specificity: 0.7546933667083855
F1 Score 0.42998585572843
MSE: 0.13567317590036965
MAE: 0.2898382150787776


<Figure size 800x600 with 0 Axes>

<Figure size 800x600 with 0 Axes>

<Figure size 800x600 with 0 Axes>

<Figure size 800x600 with 0 Axes>

In [49]:
rows = []
for i, result in enumerate(embeddingResults):
    print(embeddingModels[i])
    avgtime = sum(result[2]) / len(result[2])
    maxtime = max(result[2])
    metrics = evaluate_predictions(pd.Series(result[0]), pd.Series(result[1]), true_threshold_min=0.020, true_threshold_max=0.306,predicted_thresh_min=0.020, predicted_thresh_max=0.306, label_name=f"Moderate Stress {embeddingModels[i]}")
    print(f"{embeddingModels[i]} Average Time per Message: {avgtime:.4f}")
    print(f"{embeddingModels[i]} Max Time: {maxtime:.4f}")
    rows.append({
        'model_name': embeddingModels[i],
        "Moderate F1": metrics[6],
        "Moderate Recall": metrics[4],
        "Moderate Precision": metrics[3],
        "Moderate Specificity": metrics[5],
        "Moderate Accuracy": metrics[1],
        "Moderate MSE": metrics[7],
        "Moderate MAE": metrics[8]
    })

export_mod = pd.DataFrame(rows)
export_mod.to_csv('embedding_results.csv', index=False)

all-MiniLM-L6-v2
Moderate Stress all-MiniLM-L6-v2
Accuracy: 0.5734024179620034
Error_rate: 0.4265975820379966
Precision: 0.3530751708428246
Recall: 0.4246575342465753
Specificity: 0.6418663303909206
F1 Score 0.3855721393034826
MSE: 0.13662557123938712
MAE: 0.28800535758344786
all-MiniLM-L6-v2 Average Time per Message: 0.0167
all-MiniLM-L6-v2 Max Time: 0.0307
all-MiniLM-L12-v2
Moderate Stress all-MiniLM-L12-v2
Accuracy: 0.572538860103627
Error_rate: 0.427461139896373
Precision: 0.34953703703703703
Recall: 0.4136986301369863
Specificity: 0.6456494325346784
F1 Score 0.37892095357590966
MSE: 0.13376263056771823
MAE: 0.28610200556906695
all-MiniLM-L12-v2 Average Time per Message: 0.0238
all-MiniLM-L12-v2 Max Time: 0.0388
all-distilroberta-v1
Moderate Stress all-distilroberta-v1
Accuracy: 0.5682210708117443
Error_rate: 0.43177892918825567
Precision: 0.3476297968397291
Recall: 0.42191780821917807
Specificity: 0.6355611601513241
F1 Score 0.3811881188118812
MSE: 0.13567317590036965
MAE: 0.28983

<Figure size 800x600 with 0 Axes>

<Figure size 800x600 with 0 Axes>

<Figure size 800x600 with 0 Axes>

<Figure size 800x600 with 0 Axes>

In [50]:
rows = []
for i, result in enumerate(embeddingResults):
    print(embeddingModels[i])
    avgtime = sum(result[2]) / len(result[2])
    maxtime = max(result[2])
    metrics = evaluate_predictions(pd.Series(result[0]), pd.Series(result[1]), true_threshold_min=0.306, true_threshold_max=1.0, predicted_thresh_min=0.306, predicted_thresh_max=1.0, label_name=f"Low Stress {embeddingModels[i]}")
    print(f"{embeddingModels[i]} Average Time per Message: {avgtime:.4f}")
    print(f"{embeddingModels[i]} Max Time: {maxtime:.4f}")
    rows.append({
        'model_name': embeddingModels[i],
        "Low F1": metrics[6],
        "Low Recall": metrics[4],
        "Low Precision": metrics[3],
        "Low Specificity": metrics[5],
        "Low Accuracy": metrics[1],
        "Low MSE": metrics[7],
        "Low MAE": metrics[8],
    })

export_low = pd.DataFrame(rows)
export_low.to_csv('embedding_results.csv', index=False)

all-MiniLM-L6-v2
Low Stress all-MiniLM-L6-v2
Accuracy: 0.6407599309153713
Error_rate: 0.3592400690846287
Precision: 0.5013774104683195
Recall: 0.4364508393285372
Specificity: 0.7557354925775979
F1 Score 0.4666666666666667
MSE: 0.13662557123938712
MAE: 0.28800535758344786
all-MiniLM-L6-v2 Average Time per Message: 0.0167
all-MiniLM-L6-v2 Max Time: 0.0307
all-MiniLM-L12-v2
Low Stress all-MiniLM-L12-v2
Accuracy: 0.6424870466321243
Error_rate: 0.3575129533678757
Precision: 0.5040871934604905
Recall: 0.44364508393285373
Specificity: 0.7543859649122807
F1 Score 0.4719387755102041
MSE: 0.13376263056771823
MAE: 0.28610200556906695
all-MiniLM-L12-v2 Average Time per Message: 0.0238
all-MiniLM-L12-v2 Max Time: 0.0388
all-distilroberta-v1
Low Stress all-distilroberta-v1
Accuracy: 0.6744386873920553
Error_rate: 0.32556131260794474
Precision: 0.5555555555555556
Recall: 0.47961630695443647
Specificity: 0.7840755735492577
F1 Score 0.5148005148005148
MSE: 0.13567317590036965
MAE: 0.2898382150787776
al

<Figure size 800x600 with 0 Axes>

<Figure size 800x600 with 0 Axes>

<Figure size 800x600 with 0 Axes>

<Figure size 800x600 with 0 Axes>

In [None]:
comparison_df = export_high.merge(export_mod, on="model_name").merge(export_low, on="model_name").round(2)
# reset index
comparison_df.set_index("model_name", inplace=True)
display(comparison_df)
comparison_df.to_excel("comparison_df.xlsx")

Unnamed: 0_level_0,Average Time Taken,High F1,High Recall,High Precision,High Specificity,High Accuracy,High MSE,High MAE,Moderate F1,Moderate Recall,...,Moderate Accuracy,Moderate MSE,Moderate MAE,Low F1,Low Recall,Low Precision,Low Specificity,Low Accuracy,Low MSE,Low MAE
model_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
all-MiniLM-L6-v2,0.02,0.47,0.47,0.48,0.78,0.68,0.14,0.29,0.39,0.42,...,0.57,0.14,0.29,0.47,0.44,0.5,0.76,0.64,0.14,0.29
all-MiniLM-L12-v2,0.02,0.46,0.46,0.47,0.76,0.67,0.13,0.29,0.38,0.41,...,0.57,0.13,0.29,0.47,0.44,0.5,0.75,0.64,0.13,0.29
all-distilroberta-v1,0.04,0.43,0.42,0.44,0.75,0.65,0.14,0.29,0.38,0.42,...,0.57,0.14,0.29,0.51,0.48,0.56,0.78,0.67,0.14,0.29
paraphrase-mpnet-base-v2,0.06,0.48,0.48,0.49,0.78,0.69,0.13,0.28,0.39,0.43,...,0.58,0.13,0.28,0.51,0.48,0.54,0.77,0.67,0.13,0.28
