In [1]:
pip install afinn


Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
import pandas as pd
import numpy as np
from afinn import Afinn
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split

file_path = r'D:\Research\Python\Notebooks\Final Notebooks\Training Data\Manually_Annotated_data_new.csv'
df = pd.read_csv(file_path, encoding='UTF-8', engine='python')

X = df['Preprocessed_Text'].astype(str)  # Features
y_true = df['Labelled_sentiment']  # True Labels
y_true = np.nan_to_num(y_true, nan=0)  # Replace NaN values with 0 for neutral sentiment

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_true, test_size=0.2, random_state=42)

# Initialize AFINN
afinn = Afinn()

# Define the number of iterations
num_iterations = 10

# Initialize lists to store predicted labels for each iteration
y_pred_train_list = []
y_pred_test_list = []

# Train the model for multiple iterations
for i in range(num_iterations):
    # Train the model using the training data
    y_pred_train = [afinn.score(text) for text in X_train]
    y_pred_train = np.where(np.array(y_pred_train) > 0, 1, np.where(np.array(y_pred_train) < 0, -1, 0))
    y_pred_train_list.append(y_pred_train)

    # Test the model using the testing data
    y_pred_test = [afinn.score(text) for text in X_test]
    y_pred_test = np.where(np.array(y_pred_test) > 0, 1, np.where(np.array(y_pred_test) < 0, -1, 0))
    y_pred_test_list.append(y_pred_test)

# Test the model using the testing data
y_pred_test = [afinn.score(text) for text in X_test]
y_pred_test = np.where(np.array(y_pred_test) > 0, 1, np.where(np.array(y_pred_test) < 0, -1, 0))

# Calculate evaluation metrics for both training and testing sets
accuracy_test = accuracy_score(y_test, y_pred_test)
precision_test = precision_score(y_test, y_pred_test, average='weighted')
recall_test = recall_score(y_test, y_pred_test, average='weighted')
f1_test = f1_score(y_test, y_pred_test, average='weighted')

print("\nTesting Set:")
print("Accuracy:", accuracy_test)
print("Precision:", precision_test)
print("Recall:", recall_test)
print("F1 score:", f1_test)



Testing Set:
Accuracy: 0.7216666666666667
Precision: 0.7163596077054599
Recall: 0.7216666666666667
F1 score: 0.6755466133322444
