In [12]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk
import pandas as pd
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, accuracy_score, precision_score, recall_score, f1_score, roc_curve, auc, precision_recall_curve, PrecisionRecallDisplay, roc_auc_score, root_mean_squared_error, mean_absolute_error, make_scorer, mean_squared_error, r2_score
import matplotlib.pyplot as plt
from textblob import TextBlob
import numpy as np
from preprocessor import TextPreprocessor
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\ollie\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [13]:
def evaluate_predictions(true_labels, predicted_labels, true_threshold_min, true_threshold_max, predicted_thresh_min, predicted_thresh_max, label_name, display_graph = True):
    # Is lable within the lower and upper bound (larger or equal to min and smaller than max)
    true_classification_labels = (true_labels >= true_threshold_min) & (true_labels < true_threshold_max)
    predicted_classification_labels = (predicted_labels >= predicted_thresh_min) & (predicted_labels < predicted_thresh_max)
    cm = confusion_matrix(true_classification_labels, predicted_classification_labels)
    accuracy = accuracy_score(true_classification_labels, predicted_classification_labels)
    error_rate = 1 - accuracy
    precision = precision_score(true_classification_labels, predicted_classification_labels)
    recall = recall_score(true_classification_labels, predicted_classification_labels)
    f1 = f1_score(true_classification_labels, predicted_classification_labels)
    tn, fp, fn, tp = cm.ravel()
    specificity = tn / (tn + fp)
    print(label_name)
    print(f"Accuracy: {accuracy}\nError_rate: {error_rate}\nPrecision: {precision}\nRecall: {recall}\nSpecificity: {specificity}\nF1 Score {f1}")
    print(f"Recall: {recall}\nF1: {f1}")
    mse = mean_squared_error(true_labels, predicted_labels)
    mae = mean_absolute_error(true_labels, predicted_labels)
    print(f"MSE: {mse}\nMAE: {mae}")
    if display_graph:
        plt.figure(figsize=(8, 6))
        # Confusion Matrix
        conf_matrix_display = ConfusionMatrixDisplay(confusion_matrix=cm)
        conf_matrix_display.plot(cmap='Greys', colorbar=False)
        plt.title(f"{label_name} Classification\nConfusion Matrix")
        plt.savefig(f"../modelEvaluation/modelResults/plots/{label_name}_confusion_matrix.png")
        plt.close()
        # Performance Metrics Bar Chart

        plt.figure(figsize=(8, 6))
        metrics = ['Accuracy', 'Error Rate', 'Precision', 'Recall', 'Specificity', 'F1 Score']
        values = [accuracy, error_rate, precision, recall, specificity, f1]
        
        bars = plt.bar(metrics, values, color='#929591')
        plt.title(f"{label_name} Classification\nPerformance Metrics")
        plt.ylabel("Score")
        plt.ylim(0, 1.05)
        plt.grid(axis='y', linestyle='--', alpha=0.6)

        # Annotate bars
        for i, bar in enumerate(bars):
            height = bar.get_height()
            plt.annotate(f'{height:.2f}', xy=(bar.get_x() + bar.get_width() / 2, height),
                        xytext=(0, 3), textcoords="offset points",
                        ha='center', va='bottom')
        plt.savefig(f"../modelEvaluation/modelResults/plots/{label_name}_performance_metrics.png")
        plt.close() 
        
        # Scatter Plot
        plt.figure(figsize=(8, 6))
        plt.scatter(true_labels, predicted_labels, alpha=0.6, color="#929591")
        plt.plot([-1, 1], [-1, 1], '--', color='gray')  # Diagonal line
        plt.xlabel('True Normalised Severity')
        plt.ylabel(f"{label_name} Compound Score")
        plt.title(f"{label_name} Regression\nCompound vs Actual Severity")
        plt.grid(True, linestyle='--')
        plt.savefig(f"../modelEvaluation/modelResults/plots/{label_name}_scatter_regression.png")
        plt.close()
    return cm,accuracy,error_rate,precision,recall,specificity,f1,mse,mae

In [14]:
def find_best_thresholds(seed_data, pred_column):
    # 50 evenly spaces threshold values to test
    thresholds = np.linspace(-1, 1, 50)
    best_thresh_min = None
    best_thresh_max = None
    best_f1 = 0
    f1_scores = []
    recall_scores = []
    precision_scores = []
    
    for i in range(len(thresholds)):
        for j in range(i+1, len(thresholds)):
            threshold1 = thresholds[i]
            threshold2 = thresholds[j]
            preds = []
            for val in seed_data[pred_column]:
                if val <= threshold1:
                    preds.append("high")
                elif val <= threshold2:
                    preds.append("moderate")
                else:
                    preds.append("low")
            # multiple scores need to be averaged macro is standard as treats with equal importance
            f1 = f1_score(seed_data['class_label'], preds , average='macro', zero_division=0)
            recall = recall_score(seed_data['class_label'], preds , average='macro', zero_division=0)
            precision = precision_score(seed_data['class_label'], preds , average='macro', zero_division=0)
            recall_scores.append(recall)
            precision_scores.append(precision)
            f1_scores.append(f1)
            if f1 > best_f1:
                best_f1 = f1
                best_thresh_min = threshold1
                best_thresh_max = threshold2

    # plt.figure(figsize=(8, 5))
    # # plt.plot(thresholds, f1_scores, color="#929591", label="F1")
    # # plt.plot(thresholds, recall_scores, color="#000", label="Recall")
    # # plt.plot(thresholds, precision_scores, color="#b9bbb8", label="Precision")
    # plt.axvline(best_thresh_max, color="red", linestyle="--", label=f"Best Max Threshold ({best_thresh_max:.3f})")
    # plt.axvline(best_thresh_min, color="blue", linestyle="--", label=f"Best Min Threshold ({best_thresh_min:.3f})")
    # plt.xlabel("Threshold")
    # plt.ylabel("Score (F1, Recall, Precision)")
    # plt.title(f"Threshold Optimisation for {pred_column}")
    # plt.legend()
    # plt.grid(True, linestyle="--")
    # plt.savefig(f"../modelEvaluation/modelResults/plots/{pred_column}_Best_Threshold")
    # plt.tight_layout()
    # plt.show()

    return best_thresh_min, best_thresh_max, best_f1

In [15]:
def classify_stress_level_row(row, high_thresh, low_thresh):
    score = row['avg_severity_normalised']
    if score < high_thresh:
        return 'high'
    elif score < low_thresh:
        return 'moderate'
    else:
        return 'low'

In [16]:
df = pd.read_excel("../dataset/SAD_v1_cleaned.xlsx")
results = []
analyzer = SentimentIntensityAnalyzer()
# full
preprocessor = TextPreprocessor(preprocess_type = 1)

for i, row in df.iterrows():
    sentence = preprocessor.preprocess(str(row['sentence']))
    severity = row['avg_severity_normalised']
    score = analyzer.polarity_scores(sentence)
    seed = row['is_seed']
    results.append({
        'sentence': sentence,
        'vader_compound': score['compound'],
        'avg_severity_normalised': severity,
        'is_seed': seed
    })

vader_results = pd.DataFrame(results)

# Thresholding
seed_data = vader_results[vader_results['is_seed'] == 1].copy()
print(seed_data.shape)

#https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.quantile.html 
low_stress_thresh = seed_data['avg_severity_normalised'].quantile(2/3)
high_stress_thresh = seed_data['avg_severity_normalised'].quantile(1/3)
print("---- STRESS LEVEL THRESHOLDS (based on seed data) ----")
print(f"High stress < {high_stress_thresh:.3f}")
# give high low and mod score to the dataset
seed_data['class_label'] = seed_data.apply(classify_stress_level_row, axis=1, args=(high_stress_thresh, low_stress_thresh))


# Count labels
print(f"High Stress: {sum(seed_data['avg_severity_normalised'] < high_stress_thresh)}")

# compound < vader_thresh_high = high stress

# vader_thresh_high < compound < vader_thresh_low = moderate

# compound > vader_thresh_low = low
seed_data['true_label'] = seed_data['avg_severity_normalised'] < high_stress_thresh
vader_thresh_high, vader_thresh_low, vader_f1 = find_best_thresholds(seed_data, 'vader_compound')
print(f"VADER Optimised Threshold (High Stress): {vader_thresh_high:.3f} with F1: {vader_f1:.3f}")


vader_high_eval_preprocessor = evaluate_predictions(vader_results['avg_severity_normalised'],vader_results['vader_compound'], -1, high_stress_thresh, -1, -0.05, "VADER High Stress W PREPROCESSOR")
vader_high_eval_preprocessor_threshold = evaluate_predictions(vader_results['avg_severity_normalised'],vader_results['vader_compound'], -1, high_stress_thresh, -1, vader_thresh_high, "Threshold Tuned VADER High Stress W PREPROCESSOR")

DEBUG: TextPreprocessor Pre
2 of my foster kittens died

['2', 'of', 'my', 'foster', 'kittens', 'died']
['2', 'foster', 'kitten', 'died']
DEBUG: TextPreprocessor Final
2 foster kitten died

DEBUG: TextPreprocessor Pre
A better question would be what is not stressing me out.

['a', 'better', 'question', 'would', 'be', 'what', 'is', 'not', 'stressing', 'me', 'out', '.']
['better', 'question', 'would', 'stressing', '.']
DEBUG: TextPreprocessor Final
better question would stressing .

DEBUG: TextPreprocessor Pre
a big project at work.

['a', 'big', 'project', 'at', 'work', '.']
['big', 'project', 'work', '.']
DEBUG: TextPreprocessor Final
big project work .

DEBUG: TextPreprocessor Pre
a certain person at work is being really angry with me lately.

['a', 'certain', 'person', 'at', 'work', 'is', 'being', 'really', 'angry', 'with', 'me', 'lately', '.']
['certain', 'person', 'work', 'really', 'angry', 'lately', '.']
DEBUG: TextPreprocessor Final
certain person work really angry lately .

DEBU

<Figure size 800x600 with 0 Axes>

<Figure size 800x600 with 0 Axes>

In [None]:
df = pd.read_excel("../dataset/SAD_v1_cleaned.xlsx")
results = []
analyzer = SentimentIntensityAnalyzer()
preprocessor = TextPreprocessor(preprocess_type = 2)

for i, row in df.iterrows():
    sentence = preprocessor.preprocess(str(row['sentence']))
    severity = row['avg_severity_normalised']
    score = analyzer.polarity_scores(sentence)
    seed = row['is_seed']
    results.append({
        'sentence': sentence,
        'vader_compound': score['compound'],
        'avg_severity_normalised': severity,
        'is_seed': seed
    })

vader_results = pd.DataFrame(results)
print(vader_results.head())
vader_results.to_csv('../modelEvaluation/modelResults/vader_results.csv', index=False)  

# Thresholding
seed_data = vader_results[vader_results['is_seed'] == 1].copy()
print(seed_data.shape)

#https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.quantile.html 
low_stress_thresh = seed_data['avg_severity_normalised'].quantile(2/3)
high_stress_thresh = seed_data['avg_severity_normalised'].quantile(1/3)
print("---- STRESS LEVEL THRESHOLDS (based on seed data) ----")
print(f"High stress < {high_stress_thresh:.3f}")


# Count labels
print(f"High Stress: {sum(seed_data['avg_severity_normalised'] < high_stress_thresh)}")

# high stress
seed_data['true_label'] = seed_data['avg_severity_normalised'] < high_stress_thresh
vader_high_eval = evaluate_predictions(vader_results['avg_severity_normalised'],vader_results['vader_compound'], -1, high_stress_thresh, -1, -0.05, "VADER High Stress")



## Moderate
seed_data['true_label'] = (seed_data['avg_severity_normalised'] >= high_stress_thresh) & (seed_data['avg_severity_normalised'] < low_stress_thresh)
vader_moderate_eval = evaluate_predictions(vader_results['avg_severity_normalised'],vader_results['vader_compound'],true_threshold_min=high_stress_thresh,true_threshold_max=low_stress_thresh,predicted_thresh_min=-0.05,predicted_thresh_max=0.05,label_name="VADER Moderate Stress")

#lOW
#https://github.com/cjhutto/vaderSentiment
# positive sentiment: compound score >= 0.05
# neutral sentiment: (compound score > -0.05) and (compound score < 0.05)
# negative sentiment: compound score <= -0.05
seed_data['true_label'] = seed_data['avg_severity_normalised'] >= low_stress_thresh
vader_low_eval = evaluate_predictions(vader_results['avg_severity_normalised'], vader_results['vader_compound'], true_threshold_min=low_stress_thresh, true_threshold_max=1.0, predicted_thresh_min=0.05, predicted_thresh_max=1.0,label_name="VADER Low Stress")

DEBUG: TextPreprocessor Pre
2 of my foster kittens died

DEBUG: TextPreprocessor Final
2 of my foster kittens died

DEBUG: TextPreprocessor Pre
A better question would be what is not stressing me out.

DEBUG: TextPreprocessor Final
A better question would be what is not stressing me out.

DEBUG: TextPreprocessor Pre
a big project at work.

DEBUG: TextPreprocessor Final
a big project at work.

DEBUG: TextPreprocessor Pre
a certain person at work is being really angry with me lately.

DEBUG: TextPreprocessor Final
a certain person at work is being really angry with me lately.

DEBUG: TextPreprocessor Pre
a coworker completely ignored and replaced my work

DEBUG: TextPreprocessor Final
a coworker completely ignored and replaced my work

DEBUG: TextPreprocessor Pre
a coworker has been really cold with me recently and it makes work uncomfortable.

DEBUG: TextPreprocessor Final
a coworker has been really cold with me recently and it makes work uncomfortable.

DEBUG: TextPreprocessor Pre
a fe

<Figure size 800x600 with 0 Axes>

<Figure size 800x600 with 0 Axes>

<Figure size 800x600 with 0 Axes>

In [18]:
results = []
for index, row in df.iterrows():
    #Basic datacleaning as preprocessing isnt required for VADER and TextBlob
    preprocessor = TextPreprocessor(preprocess_type = 2)
    sentence = preprocessor.preprocess(str(row['sentence']))
    severity = row['avg_severity_normalised']
    seed = row['is_seed']

    # ----- TextBlob -----
    blob = TextBlob(sentence)
    textblob_polarity = blob.sentiment.polarity  # -1 to +1

    results.append({
        'sentence': sentence,
        'textblob_polarity': textblob_polarity,
        'avg_severity_normalised': severity,
        'is_seed': seed
    })

blob_results = pd.DataFrame(results)
blob_results.head()
blob_results.to_csv('../modelEvaluation/modelResults/blob_results.csv', index=False)  

seed_data = blob_results[blob_results['is_seed'] == 1].copy()

#https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.quantile.html 
low_stress_thresh = seed_data['avg_severity_normalised'].quantile(2/3)
high_stress_thresh = seed_data['avg_severity_normalised'].quantile(1/3)
print("---- STRESS LEVEL THRESHOLDS (based on seed data) ----")
print(f"High stress {high_stress_thresh:.3f}")

## High Stress
seed_data['true_label'] = seed_data['avg_severity_normalised'] < high_stress_thresh
blob_high_eval = evaluate_predictions(blob_results['avg_severity_normalised'],blob_results['textblob_polarity'], -1, high_stress_thresh, -1, -0.05, "TextBlob High Stress")

# ## Moderate
seed_data['true_label'] = (seed_data['avg_severity_normalised'] >= high_stress_thresh) & (seed_data['avg_severity_normalised'] < low_stress_thresh)
blob_moderate_eval = evaluate_predictions(blob_results['avg_severity_normalised'],blob_results['textblob_polarity'],true_threshold_min=high_stress_thresh,true_threshold_max=low_stress_thresh,predicted_thresh_min=-0.05,predicted_thresh_max=0.05,label_name="TextBlob Moderate Stress")

# ## Low Stress
seed_data['true_label'] = seed_data['avg_severity_normalised'] >= low_stress_thresh
blob_low_eval = evaluate_predictions(blob_results['avg_severity_normalised'], blob_results['textblob_polarity'], true_threshold_min=low_stress_thresh, true_threshold_max=1.0, predicted_thresh_min=0.05, predicted_thresh_max=1.0,label_name="TextBlob Low Stress")

DEBUG: TextPreprocessor Pre
2 of my foster kittens died

DEBUG: TextPreprocessor Final
2 of my foster kittens died

DEBUG: TextPreprocessor Pre
A better question would be what is not stressing me out.

DEBUG: TextPreprocessor Final
A better question would be what is not stressing me out.

DEBUG: TextPreprocessor Pre
a big project at work.

DEBUG: TextPreprocessor Final
a big project at work.

DEBUG: TextPreprocessor Pre
a certain person at work is being really angry with me lately.

DEBUG: TextPreprocessor Final
a certain person at work is being really angry with me lately.

DEBUG: TextPreprocessor Pre
a coworker completely ignored and replaced my work

DEBUG: TextPreprocessor Final
a coworker completely ignored and replaced my work

DEBUG: TextPreprocessor Pre
a coworker has been really cold with me recently and it makes work uncomfortable.

DEBUG: TextPreprocessor Final
a coworker has been really cold with me recently and it makes work uncomfortable.

DEBUG: TextPreprocessor Pre
a fe

<Figure size 800x600 with 0 Axes>

<Figure size 800x600 with 0 Axes>

<Figure size 800x600 with 0 Axes>

In [19]:
table = {
    "Model": ["VADER - High Stress", "TextBlob - High Stress"],
    "F1 Score": [vader_high_eval[6], blob_high_eval[6]],
    "Recall": [vader_high_eval[4], blob_high_eval[4]],
    "Precision": [vader_high_eval[3], blob_high_eval[3]],
    "Specificity": [vader_high_eval[5], blob_high_eval[5]],
    "Accuracy": [vader_high_eval[1], blob_high_eval[1]],
    "MSE": [vader_high_eval[7], blob_high_eval[7]],
    "MAE": [vader_high_eval[8], blob_high_eval[8]],
}
#Round all values to 2dp
comparison_df = pd.DataFrame(table).round(2)
comparison_df.set_index("Model", inplace=True)
display(comparison_df)


table = {
    "Model": ["VADER - Moderate Stress", "TextBlob - Moderate Stress"],
    "F1 Score": [vader_moderate_eval[6], blob_moderate_eval[6]],
    "Recall": [vader_moderate_eval[4], blob_moderate_eval[4]],
    "Precision": [vader_moderate_eval[3], blob_moderate_eval[3]],
    "Specificity": [vader_moderate_eval[5], blob_moderate_eval[5]],
    "Accuracy": [vader_moderate_eval[1], blob_moderate_eval[1]],
    "MSE": [vader_moderate_eval[7], blob_moderate_eval[7]],
    "MAE": [vader_moderate_eval[8], blob_moderate_eval[8]],
}
#Round all values to 2dp
comparison_df = pd.DataFrame(table).round(2)
comparison_df.set_index("Model", inplace=True)
display(comparison_df)

table = {
    "Model": ["VADER - Low Stress", "TextBlob - Low Stress"],
    "F1 Score": [vader_low_eval[6], blob_low_eval[6]],
    "Recall": [vader_low_eval[4], blob_low_eval[4]],
    "Precision": [vader_low_eval[3], blob_low_eval[3]],
    "Specificity": [vader_low_eval[5], blob_low_eval[5]],
    "Accuracy": [vader_low_eval[1], blob_low_eval[1]],
    "MSE": [vader_low_eval[7], blob_low_eval[7]],
    "MAE": [vader_low_eval[8], blob_low_eval[8]],
}
#Round all values to 2dp
comparison_df = pd.DataFrame(table).round(2)
comparison_df.set_index("Model", inplace=True)
display(comparison_df)

Unnamed: 0_level_0,F1 Score,Recall,Precision,Specificity,Accuracy,MSE,MAE
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
VADER - High Stress,0.43,0.74,0.3,0.4,0.49,0.37,0.51
TextBlob - High Stress,0.3,0.36,0.26,0.64,0.57,0.25,0.4


Unnamed: 0_level_0,F1 Score,Recall,Precision,Specificity,Accuracy,MSE,MAE
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
VADER - Moderate Stress,0.22,0.19,0.24,0.76,0.6,0.37,0.51
TextBlob - Moderate Stress,0.32,0.38,0.27,0.59,0.53,0.25,0.4


Unnamed: 0_level_0,F1 Score,Recall,Precision,Specificity,Accuracy,MSE,MAE
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
VADER - Low Stress,0.25,0.16,0.54,0.89,0.57,0.37,0.51
TextBlob - Low Stress,0.29,0.22,0.43,0.76,0.52,0.25,0.4


In [20]:
table = {
    "Model": ["VADER - High Stress", "VADER - High Stress with pre-processing", "VADER - High Stress with pre-processing and thresholding"],
    "F1 Score": [vader_high_eval[6], vader_high_eval_preprocessor[6], vader_high_eval_preprocessor_threshold[6]],
    "Recall": [vader_high_eval[4], vader_high_eval_preprocessor[4], vader_high_eval_preprocessor_threshold[4]],
    "Precision": [vader_high_eval[3], vader_high_eval_preprocessor[3], vader_high_eval_preprocessor_threshold[3]],
    "Specificity": [vader_high_eval[5], vader_high_eval_preprocessor[5], vader_high_eval_preprocessor_threshold[5]],
    "Accuracy": [vader_high_eval[1], vader_high_eval_preprocessor[1], vader_high_eval_preprocessor_threshold[1]],
    "MSE": [vader_high_eval[7], vader_high_eval_preprocessor[7], vader_high_eval_preprocessor_threshold[7]],
    "MAE": [vader_high_eval[8], vader_high_eval_preprocessor[8], vader_high_eval_preprocessor_threshold[8]],
}
#Round all values to 2dp
comparison_df = pd.DataFrame(table).round(2)
comparison_df.set_index("Model", inplace=True)
display(comparison_df)

Unnamed: 0_level_0,F1 Score,Recall,Precision,Specificity,Accuracy,MSE,MAE
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
VADER - High Stress,0.43,0.74,0.3,0.4,0.49,0.37,0.51
VADER - High Stress with pre-processing,0.43,0.71,0.31,0.43,0.5,0.35,0.5
VADER - High Stress with pre-processing and thresholding,0.37,0.41,0.34,0.71,0.63,0.35,0.5
