# Vader Experiments

## Import Libraries

In [6]:
import pandas as pd
import numpy as np
import os

from tqdm.notebook import tqdm
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from sklearn.model_selection import KFold

from sklearn.metrics import f1_score, accuracy_score

from scipy import stats

In [7]:
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /home/neemias/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [8]:
data_path_open_source = "/home/neemias/PerceptSent-LLM-approach/data/"
data_path_open_ai = "/home/neemias/PerceptSent-LLM-approach/data/gpt4-openai-classify/"
data_path_deep_seek = "/home/neemias/PerceptSent-LLM-approach/data/deepseek/"

data_paths = []

for data_path in [data_path_open_source, data_path_open_ai, data_path_deep_seek]:
    data_paths.extend([os.path.join(data_path, f) for f in os.listdir(data_path) if f.endswith('.csv')])

# The del data_paths[6] line was removed as it was arbitrary.

print(data_paths)  # Print the list of CSV file paths
print(len(data_paths)) # Print the number of CSV files found

['/home/neemias/PerceptSent-LLM-approach/data/prompts.csv', '/home/neemias/PerceptSent-LLM-approach/data/gpt4-openai-classify/percept_dataset_alpha5_p3.csv', '/home/neemias/PerceptSent-LLM-approach/data/gpt4-openai-classify/percept_dataset_alpha3_p2neg.csv', '/home/neemias/PerceptSent-LLM-approach/data/gpt4-openai-classify/percept_dataset_alpha4_p5.csv', '/home/neemias/PerceptSent-LLM-approach/data/gpt4-openai-classify/percept_dataset_alpha4_p3.csv', '/home/neemias/PerceptSent-LLM-approach/data/gpt4-openai-classify/percept_dataset_alpha3_p2plus.csv', '/home/neemias/PerceptSent-LLM-approach/data/gpt4-openai-classify/percept_dataset_alpha5_p5.csv', '/home/neemias/PerceptSent-LLM-approach/data/gpt4-openai-classify/percept_dataset_alpha3_p5.csv', '/home/neemias/PerceptSent-LLM-approach/data/gpt4-openai-classify/percept_dataset_alpha5_p2plus.csv', '/home/neemias/PerceptSent-LLM-approach/data/gpt4-openai-classify/percept_dataset_alpha5_p2neg.csv', '/home/neemias/PerceptSent-LLM-approach/data

## Sentiment Analyzer

In [4]:
for data_path in tqdm(data_paths):
    df = pd.read_csv(data_path)
    df_metrics = pd.DataFrame([])
    if len(df) < 5:  # n_splits = 5
        print(f"Skipping {data_path} because it has fewer than 5 samples ({len(df)}).")
        continue  # Move to the next file
    
    kfold = KFold(n_splits=5, shuffle=True, random_state=42)
    for fold, (train_idx, val_idx) in enumerate(kfold.split(df)):
        train_df = pd.DataFrame({"text": df["text"].iloc[train_idx].to_list(), 
                                        "sentiment": df["sentiment"].iloc[train_idx].to_list()})
        val_df = pd.DataFrame({"text": df["text"].iloc[val_idx].to_list(), 
                                    "sentiment": df["sentiment"].iloc[val_idx].to_list()})
        
        model = SentimentIntensityAnalyzer()

        text = val_df["text"].to_list()
        target = val_df["sentiment"].to_list()

        sentiments = np.unique(target)

        if (len(sentiments) == 3):
            sent_dic = {
                "neg": 1,
                "neu": 0,
                "pos": 2,
            }
        elif (len(sentiments) == 2):
            if (data_path.split('/')[-1].split('.')[0].split('_')[-1] == "p2plus"):
                sent_dic = {
                    "neg": 1, "neu": 0, "pos": 0,
                }
            else:
                sent_dic = {
                    "neg": 0, "neu": 0, "pos": 1,
                }

        pred = []
        for t in text:
            result = model.polarity_scores(t)
            del result["compound"]
            # print(f"Result: {result}") # For debug
            max_key = max(result, key=result.get)
            max_value = result[max_key]
            pred.append(sent_dic[max_key])

        accuracy_val = accuracy_score(target, pred)
        f1_val = f1_score(target, pred, average="weighted")    
        df_metrics = pd.concat([df_metrics, pd.DataFrame({"accuracy": [accuracy_val], "f1_score": [f1_val]
                                                            })], axis=0)
    # Determine the flag based on the data path
    if "openai" in data_path:
        flag = "openai"
    elif "deepseek" in data_path:
        flag = "deepseek"
    elif "percept" in data_path or "open_source" in data_path: # Handles both "percept" and "open_source"
        flag = "percept"  # Or "opensource" if you prefer
    else:
        flag = "unknown" # Handle cases where the path doesn't match known flags
        
    # display(df_metrics.head())
    if (len(sentiments) <= 3):
        df_metrics.to_csv(f"/home/neemias/PerceptSent-LLM-approach/experiments/vader-experiment/{flag}-{data_path.split('/')[-1]}",
                        index=False)

  0%|          | 0/25 [00:00<?, ?it/s]

Skipping /home/neemias/PerceptSent-LLM-approach/data/prompts.csv because it has fewer than 5 samples (1).


In [5]:
csv_files = [os.path.join("/home/neemias/PerceptSent-LLM-approach/experiments/vader-experiment", f) 
             for f in os.listdir("/home/neemias/PerceptSent-LLM-approach/experiments/vader-experiment") if f.endswith(".csv")]

for csv_file in csv_files:
    df_metrics = pd.read_csv(csv_file)
    f1_scores = df_metrics["f1_score"].to_list()
    mean_f1 = np.mean(f1_scores)


    # define the confidence level
    confidence_level = 0.95
    degrees_freedon = len(f1_scores)-1

    confidence_interval = stats.t.interval(
        confidence_level, 
        degrees_freedon, 
        loc=mean_f1, 
        scale=stats.sem(f1_scores)
    )

    print(f"\n\nProblem: {csv_file.split('/')[-1].split('.')[0]}")
    print(f"Max F1-score: {max(f1_scores)}")
    print(f"Average F1-score: {mean_f1}")
    print(f"Confidence interval 95%: {confidence_interval}")
    print(f"Inteval: {abs(confidence_interval[0]-mean_f1)} - Interval: {abs(confidence_interval[1]-mean_f1)}")
    confidence_interval = stats.t.interval(
        confidence_level, 
        degrees_freedon, 
        loc=max(f1_scores), 
        scale=stats.sem(f1_scores)
    )

    print(f"Inteval: {abs(confidence_interval[0]-max(f1_scores))} - Interval: {abs(confidence_interval[1]-max(f1_scores))}")



Problem: deepseek-percept_dataset_alpha5_p2neg
Max F1-score: 0.5189435259561487
Average F1-score: 0.49074234954804813
Confidence interval 95%: (0.45191682301330005, 0.5295678760827962)
Inteval: 0.03882552653474808 - Interval: 0.03882552653474808
Inteval: 0.03882552653474808 - Interval: 0.03882552653474802


Problem: deepseek-percept_dataset_alpha5_p2plus
Max F1-score: 0.6474433981859724
Average F1-score: 0.6282014212922771
Confidence interval 95%: (0.6074191385525002, 0.648983704032054)
Inteval: 0.02078228273977689 - Interval: 0.02078228273977689
Inteval: 0.02078228273977689 - Interval: 0.02078228273977689


Problem: deepseek-percept_dataset_alpha4_p2plus
Max F1-score: 0.59909004841813
Average F1-score: 0.5657914816935575
Confidence interval 95%: (0.5332214163067354, 0.5983615470803797)
Inteval: 0.03257006538682217 - Interval: 0.03257006538682217
Inteval: 0.03257006538682217 - Interval: 0.03257006538682217


Problem: openai-percept_dataset_alpha3_p2plus
Max F1-score: 0.52609391932570