In [None]:
from collections.abc import Generator, Callable
from pathlib import Path
import typing
from typing import Any, TypeAlias, Literal
import pandas as pd
import numpy as np
import datetime as dt
import re
from functools import partial, reduce
from tqdm import tqdm
from IPython.display import (
    display, # type: ignore[reportUnknownVariableType]
    Markdown,
)

import importlib

from config.fastf1 import fastf1
from config import config
import src.data.constants as dataset_constants
importlib.reload(dataset_constants);
import src.data.loader
importlib.reload(src.data.loader);
from src.data.loader import stream_ndjson, load_submissions_df, load_comments_df
from src.data.preprocessing import concatenate_submissions_and_comments

from src.utils import (
    temporary_pandas_options,
    display_full_dataframe,
    hide_index,
    compose,
)
from src import utils
utils.set_random_seeds()
DEVICE = utils.get_device()

import logging
logging.getLogger('fastf1').setLevel(logging.WARNING)

# Load data and find submissions related to steward decisions

In [2]:
f1_submissions_df = load_submissions_df(
    dataset_constants.RawFile.FORMULA1_SUBMISSIONS,
    columns=dataset_constants.DEFAULT_SUBMISSION_COLUMNS | {'permalink', 'post_hint', 'link_flair_text'},
)                                  

f1_comments_df = load_comments_df(
    dataset_constants.RawFile.FORMULA1_COMMENTS,
    columns=dataset_constants.DEFAULT_COMMENT_COLUMNS | {'link_id'},
)

In [5]:
f1_comments_df = f1_comments_df[~f1_comments_df['body'].isin({'[removed]', '[deleted]'})]

In [6]:
# TODO: testing purposes
f1_submissions_df['permalink'] = 'www.reddit.com' + f1_submissions_df['permalink']

In [None]:
steward_decision_related_words = {
    'penalty', 'steward', 'decision', 'appeal', 'review', 'ruling', 'investigation', 'regulation',
    'seconds', 'sec', 
    'collision', 'crash', 'incident', 'overtake', 'virtual safety car', 'blocking', 'brake test', 'contact',
    'red flag', 'yellow flag', 
    'controversial', 'rigged', 'corrupt', 'bias', 'protest', 'FIA', 'document', 'infringement'}

# Manually exclude some posts unrelated to steward decisions
excluded_submission_ids = {
    'vdr1c6',
    'w7z5aj',
    'wf87e0',
    'x1zd5z',
    'x3y140',
}

words_regex = ''.join(fr'\b{word}\b|' for word in steward_decision_related_words)[:-1]
steward_decision_pattern = re.compile(words_regex, flags=re.IGNORECASE)

relevant_flairs = {':post-technical: Technical', ':post-news: News'}

has_related_words = f1_submissions_df['title'].apply(lambda title: steward_decision_pattern.search(title) is not None)
has_relevant_flairs = f1_submissions_df['link_flair_text'].isin(relevant_flairs)
is_image_post = f1_submissions_df['post_hint'] == 'image'
is_included = ~f1_submissions_df['id'].isin(excluded_submission_ids) 

steward_decision_submissions_df = f1_submissions_df[has_related_words & has_relevant_flairs & is_image_post & is_included].copy()

with display_full_dataframe():
    print(len(steward_decision_submissions_df))
    display(steward_decision_submissions_df.head(2))

# Discretization of continuous sentiment function

In [12]:
neutral_range = (-0.05, 0.05)

def to_sentiment_category(sentiment: float) -> Literal['Positive', 'Negative', 'Neutral']:
    if sentiment >= neutral_range[1]:
        return 'Positive'
    elif sentiment <= -neutral_range[0]:
        return 'Negative'
    else:
        return 'Neutral'

def to_discrete_sentiment(sentiment: float) -> int:
    category = to_sentiment_category(sentiment)

    match category:
        case 'Positive':
            return 1
        case 'Negative':
            return -1
        case 'Neutral':
            return 0

### VADER SENTIMENT ANALYSIS

In [17]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
vader_analyzer = SentimentIntensityAnalyzer()

In [None]:
for index, steward_decision_submission in steward_decision_submissions_df.iterrows():
    submission_link_id = f't3_{steward_decision_submission['id']}'
    comments_df = f1_comments_df[f1_comments_df['link_id'] == submission_link_id].copy()

    if comments_df.empty:
        steward_decision_submissions_df.loc[index, 'average_sentiment_vader'] = np.nan
        continue
    
    number_of_votes = np.abs(comments_df['score']).sum()
    
    if number_of_votes == 0:
        steward_decision_submissions_df.loc[index, 'average_sentiment_vader'] = np.nan
        continue

    comments_df.loc[:, 'compound'] = comments_df['body'].apply(
        lambda text: vader_analyzer.polarity_scores(text)['compound']
    )

    average_sentiment = (comments_df['compound'] * comments_df['score']).sum() / number_of_votes
    steward_decision_submissions_df.loc[index, 'average_sentiment_vader'] = average_sentiment

with display_full_dataframe():
    display(steward_decision_submissions_df.head(2))

### BERT SENTIMENT ANALYSIS

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
model.to(DEVICE);

import torch
from transformers import RobertaTokenizer, RobertaForSequenceClassification

def bert_sentiment(text: str) -> float:
    inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=512).to(DEVICE)

    with torch.no_grad():
        outputs = model(**inputs)

    logits = outputs.logits
    sentiment_score = torch.argmax(logits, dim=1).item()  # 0: negative, 1: neutral, 2: positive

    return (sentiment_score - 1) #-1: negative, 0: neutral, 1: positive

In [18]:
for index, steward_decision_submission in steward_decision_submissions_df.iterrows():
    submission_link_id = f't3_{steward_decision_submission['id']}'
    comments_df = f1_comments_df[f1_comments_df['link_id'] == submission_link_id].copy()

    if comments_df.empty:
        steward_decision_submissions_df.loc[index, 'average_sentiment_bert'] = np.nan
        continue
    
    number_of_votes = np.abs(comments_df['score']).sum()
    
    if number_of_votes == 0:
        steward_decision_submissions_df.loc[index, 'average_sentiment_bert'] = np.nan
        continue

    comments_df.loc[:, 'compound'] = comments_df['body'].apply(bert_sentiment)

    average_sentiment = (comments_df['compound'] * comments_df['score']).sum() / number_of_votes
    steward_decision_submissions_df.loc[index, 'average_sentiment_bert'] = average_sentiment

#  with display_full_dataframe():
    # display(steward_decision_submissions_df)
    # display(steward_decision_submissions_df['average_sentiment_bert'])

In [None]:
with display_full_dataframe():
    display(steward_decision_submissions_df)

In [15]:
# MEAN ABSOLUTE ERROR
print(
    np.abs(steward_decision_submissions_df['average_sentiment_bert'] - steward_decision_submissions_df['average_sentiment_vader']).sum() \
    / len(steward_decision_submissions_df.index)
)

In [16]:
display(steward_decision_submissions_df.columns)


# VALIDATION
# 1. CATEGORIZATIONI AGREEMENT METRIC - Cohen's Kappa

In [17]:
from sklearn.metrics import cohen_kappa_score

# Vader_sentiments = steward_decision_submissions_df['average_sentiment_vader']
# Bert_sentiments = steward_decision_submissions_df['average_sentiment_bert']

vader_sentiment_labels = np.array([to_sentiment_category(x) for x in individual_vader_sentiments])
bert_sentiment_labels = np.array([to_sentiment_category(y) for y in individual_bert_sentiments])

COHEN_KAPPA = cohen_kappa_score(vader_sentiment_labels, bert_sentiment_labels)
print(f"Cohen's Kappa: {COHEN_KAPPA:.2f}")

# Kappa > 0.75 is a Strong agreement
# Kappa = 0.4 - 0.75 is a Moderate agreement
# Kappa < 0.4 is a Weak agreement


# 2. BIAS DETECTION METRIC - Bland-Altman plot

In [None]:
# # calculate systematic biases between VADER and BERT sentiment analysis

# import seaborn as sns
# import matplotlib.pyplot as plt


# mean_scores = (np.array(individual_vader_sentiments) + np.array(individual_bert_sentiments)) / 2
# diff_scores = np.array(individual_vader_sentiments) - np.array(individual_bert_sentiments)

# # Create a scatter plot for the Bland-Altman analysis
# sns.scatterplot(x=mean_scores, y=diff_scores)
# plt.axhline(0, color='red', linestyle='dashed')  # No bias line
# plt.xlabel("Mean Sentiment Score")
# plt.ylabel("VADER - BERT Sentiment Score Difference")
# plt.title("Bland-Altman Plot")
# plt.show()
# # patterns indicate bias
# # if most points are close to 0, the models mostly agree

# EXTRINSIC VALIDATION - CONFUSION MATRIX

attempt 1

In [None]:
# import json
# import glob
# import numpy as np
# import matplotlib.pyplot as plt
# import seaborn as sns
# from sklearn.metrics import confusion_matrix, cohen_kappa_score

# ground_truth = {}
# path = os.path.join("GROUND_TRUTH_LABELS", "*.ndjson")
# ndjson_files_ground_truth = glob.glob(path)

# # Load Ground Truth Labels
# # ground_truth = {}  # Dictionary for easier comparison
# # ndjson_files_ground_truth = glob.glob("/GROUND_TRUTH_LABELS/*.ndjson")  # Adjust path as needed

# for file in ndjson_files_ground_truth:
#     with open(file, "r") as f:
#         for line in f:
#             entry = json.loads(line)
#             ground_truth[entry["comment_id"]] = entry["sentiment"]

# print("Sample Ground Truth Data:", list(ground_truth.items())[:5])

# # Convert VADER and BERT sentiment scores to labels (Positive, Neutral, Negative)
# vader_labels = [to_sentiment_category(sentiment) for sentiment in individual_vader_sentiments]
# bert_labels = [to_sentiment_category(sentiment) for sentiment in individual_bert_sentiments]

# print("Sample VADER Predictions:", vader_labels[:5])
# print("Sample BERT Predictions:", bert_labels[:5])

# # Assuming the comment IDs are in the same order in ground_truth as in the sentiment lists
# common_comment_ids = set(ground_truth.keys())  

# y_true = [ground_truth[comment_id] for comment_id in common_comment_ids]
# y_vader = vader_labels[:len(y_true)]  # Ensure same length 
# y_bert = bert_labels[:len(y_true)]  # Ensure same length 

# print(f"Total Common Comments: {len(common_comment_ids)}")

# # Step 3.1: Compute and Plot Confusion Matrices
# fig, axes = plt.subplots(1, 2, figsize=(12, 5))

# for ax, (y_pred, title) in zip(axes, [(y_vader, "VADER"), (y_bert, "BERT")]):
#     cm = confusion_matrix(y_true, y_pred, labels=["Positive", "Neutral", "Negative"])
#     sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
#                 xticklabels=["Positive", "Neutral", "Negative"],
#                 yticklabels=["Positive", "Neutral", "Negative"], ax=ax)
#     ax.set_title(f"Confusion Matrix: {title}")
#     ax.set_xlabel("Predicted Label")
#     ax.set_ylabel("True Label")

# plt.tight_layout()
# plt.show()

In [34]:
print("Sample VADER Sentiment Labels:", vader_labels[:5])
print("Sample BERT Sentiment Labels:", bert_labels[:5])

print("Sample Ground Truth Labels:", list(ground_truth.values())[:5])

print("Found NDJSON files:", ndjson_files_ground_truth)

import os
print("Current working directory:", os.getcwd())

print(os.listdir('GROUND_TRUTH_LABELS'))  # This should list the files in the folder

import glob
ndjson_files_ground_truth = glob.glob("GROUND_TRUTH_LABELS/*.ndjson")  # or use absolute path here
print("Found NDJSON files:", ndjson_files_ground_truth)

file_path = "GROUND_TRUTH_LABELS/labeled_comments_xtqa50.ndjson"  # Replace with actual filename
try:
    with open(file_path, "r") as f:
        print(f.readline())  # Try reading the first line
except FileNotFoundError:
    print(f"File {file_path} not found.")


attempt 2

In [None]:
import json
import glob
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

# Load Ground Truth Labels
ground_truth = {}
path = os.path.join("GROUND_TRUTH_LABELS", "*.ndjson")
ndjson_files_ground_truth = glob.glob(path)

for file in ndjson_files_ground_truth:
    with open(file, "r") as f:
        for line in f:
            entry = json.loads(line)
            ground_truth[entry["comment_id"]] = entry["sentiment"]

# Simulated VADER and BERT predictions (replace these with actual predictions)
individual_vader_sentiments = {"Negative": -1, "Neutral": 0, "Positive": 1}
individual_bert_sentiments = {"Negative": -1, "Neutral": 0, "Positive": 1}

# Convert sentiment scores to labels
vader_labels = {k: to_sentiment_category(v) for k, v in individual_vader_sentiments.items()}
bert_labels = {k: to_sentiment_category(v) for k, v in individual_bert_sentiments.items()}

# Find common comment IDs
common_comment_ids = set(ground_truth.keys()) & set(vader_labels.keys()) & set(bert_labels.keys())

y_true = [ground_truth[cid] for cid in common_comment_ids]
y_vader = [vader_labels[cid] for cid in common_comment_ids]
y_bert = [bert_labels[cid] for cid in common_comment_ids]

# Generate Confusion Matrices
fig, axes = plt.subplots(1, 2, figsize=(12, 5))
for ax, (y_pred, title) in zip(axes, [(y_vader, "VADER"), (y_bert, "BERT")]):
    cm = confusion_matrix(y_true, y_pred, labels=["Positive", "Neutral", "Negative"])
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
                xticklabels=["Positive", "Neutral", "Negative"],
                yticklabels=["Positive", "Neutral", "Negative"], ax=ax)
    ax.set_title(f"Confusion Matrix: {title}")
    ax.set_xlabel("Predicted Label")
    ax.set_ylabel("True Label")

plt.tight_layout()
plt.show()


attempt 3

In [None]:
import json
import glob
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

# Function to convert sentiment scores to labels
def to_sentiment_category(score):
    if score >= 0.05:
        return "Positive"
    elif score <= -0.05:
        return "Negative"
    else:
        return "Neutral"

# Load Ground Truth Labels
ground_truth = {}
path = os.path.join("GROUND_TRUTH_LABELS", "*.ndjson")
ndjson_files_ground_truth = glob.glob(path)

for file in ndjson_files_ground_truth:
    with open(file, "r") as f:
        for line in f:
            entry = json.loads(line)
            ground_truth[entry["comment_id"]] = entry["sentiment"]

validation_labels_dir = config.ROOT_DIR / 'validation_labels' / 'steward_decision_submissions'
labeled_comments = tuple(
    labeled_comment
    for file in validation_labels_dir.glob('*.ndjson')
    for labeled_comment in stream_ndjson(file)
)

# Simulated VADER and BERT predictions (Replace with actual predictions)
# These should be dictionaries where keys are comment IDs and values are sentiment scores.


# # Convert sentiment scores to categorical labels
# vader_labels = {k: to_sentiment_category(v) for k, v in individual_vader_sentiments.items()}
# bert_labels = {k: to_sentiment_category(v) for k, v in individual_bert_sentiments.items()}

# # Find common comment IDs
# common_comment_ids = set(ground_truth.keys()) & set(vader_labels.keys()) & set(bert_labels.keys())

# if not common_comment_ids:
#     print("No common comment IDs found. Check your data sources.")
# else:


# display(comments_df.loc[comments_df['id'] == 'iqr5d98', 'body'])




y_true = [labeled_comment['sentiment'] for labeled_comment in labeled_comments]
y_vader = [to_sentiment_category(vader_analyzer.polarity_scores(comments_df.loc[comments_df['id'] == labeled_comment['comment_id'], 'body'].iloc[0])['compound']) for labeled_comment in labeled_comments]
y_bert = [to_sentiment_category(bert_sentiment(comments_df.loc[comments_df['id'] == labeled_comment['comment_id'], 'body'].iloc[0])) for labeled_comment in labeled_comments]

# Generate Confusion Matrices
fig, axes = plt.subplots(1, 2, figsize=(12, 5))

for ax, (y_pred, title) in zip(axes, [(y_vader, "VADER"), (y_bert, "BERT")]):
    cm = confusion_matrix(y_true, y_pred, labels=["Positive", "Neutral", "Negative"])
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
                xticklabels=["Positive", "Neutral", "Negative"],
                yticklabels=["Positive", "Neutral", "Negative"], ax=ax)
    ax.set_title(f"Confusion Matrix: {title}")
    ax.set_xlabel("Predicted Label")
    ax.set_ylabel("True Label")

plt.tight_layout()
plt.show()


# attempt 4: accuracy, precision, recall and F1 validation metrics

In [22]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Compute overall accuracy
accuracy = accuracy_score(y_true, y_vader)
print(f"VADER Accuracy: {accuracy:.4f}")
accuracy = accuracy_score(y_true, y_bert)
print(f"BERT Accuracy: {accuracy:.4f}")

# Compute precision, recall, and F1-score for each class
labels = ["Positive", "Neutral", "Negative"]
for label in labels:
    vader_precision = precision_score(y_true, y_vader, labels=[label], average="macro", zero_division=0)
    vader_recall = recall_score(y_true, y_vader, labels=[label], average="macro", zero_division=0)
    vader_f1 = f1_score(y_true, y_vader, labels=[label], average="macro", zero_division=0)

    bert_precision = precision_score(y_true, y_bert, labels=[label], average="macro", zero_division=0)
    bert_recall = recall_score(y_true, y_bert, labels=[label], average="macro", zero_division=0)
    bert_f1 = f1_score(y_true, y_bert, labels=[label], average="macro", zero_division=0)

    print(f"\nMetrics for class '{label}':")
    print(f"  VADER -> Precision: {vader_precision:.4f}, Recall: {vader_recall:.4f}, F1-score: {vader_f1:.4f}")
    print(f"  BERT  -> Precision: {bert_precision:.4f}, Recall: {bert_recall:.4f}, F1-score: {bert_f1:.4f}")
