In [1]:
import pandas as pd
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from torch.nn import functional as F
import torch
import os
import zipfile

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device('cuda')

# Unzip the zip files
with zipfile.ZipFile('saved_model.zip', 'r') as zip_ref:
    zip_ref.extractall('sentiment_analysis/model')

with zipfile.ZipFile('saved_tokenizer.zip', 'r') as zip_ref:
    zip_ref.extractall('sentiment_analysis/model')

# Load the model and tokenizer
classifier = AutoModelForSequenceClassification.from_pretrained('sentiment_analysis/model/saved_model').to(device)
tokenizer = AutoTokenizer.from_pretrained('sentiment_analysis/model/saved_tokenizer')

# Define the labels
labels = ['Sadness', 'Joy', 'Love', 'Anger', 'Fear', 'Surprise', 'Neutral']

In [3]:
def classify_comment(comment):

    tokenized_comment = tokenizer(comment, return_tensors='pt', padding='max_length', truncation=True).to(device)

    # Perform classification
    with torch.no_grad():
        outputs = classifier(**tokenized_comment)
        logits = outputs.logits

    # Get the predicted class and the maximum score
    score = F.softmax(logits, dim=-1)
    prediction = torch.argmax(score, dim=-1).item()
    max_score = torch.max(score).item()

    # If the maximum score low, classify as 'Neutral'
    if max_score < 0.5:
        prediction = 6

    print(f'Prediction: {labels[prediction]} | Score: {max_score}')

    return labels[prediction], max_score

def classify_file(file_name):
    input_file = f'csv_files/comments_by_state/{file_name}'
    output_file = f'csv_files/classified_comments_by_state/{file_name}'
    comments_df = pd.read_csv(input_file)

    # Apply the classifier to each comment
    sentiments = comments_df['Comment'].apply(lambda comment: classify_comment(comment))

    # Extract sentiment and intensity from the results
    comments_df['Sentiment'] = sentiments.apply(lambda x: x[0])
    comments_df['Intensity'] = sentiments.apply(lambda x: x[1])

    # Save the classified comments
    comments_df.to_csv(output_file, index=False)

In [4]:
os.chdir('../..')

# Specify the folder containing the files
folder = 'csv_files/comments_by_state'
files = os.listdir(folder)

# Classify comments in each file
for file_name in files:
    classify_file(file_name)

# Concatenate all classified data into a single CSV
data = pd.concat([pd.read_csv(f'csv_files/classified_comments_by_state/{file}') for file in files])
data.to_csv('csv_files/classified_comments.csv', index=False)

Prediction: Anger | Score: 0.9799314141273499
Prediction: Sadness | Score: 0.8221680521965027
Prediction: Sadness | Score: 0.993691086769104
Prediction: Sadness | Score: 0.8822131156921387
Prediction: Fear | Score: 0.9705154895782471
Prediction: Fear | Score: 0.5592760443687439
Prediction: Anger | Score: 0.503092348575592
Prediction: Sadness | Score: 0.9451637268066406
Prediction: Sadness | Score: 0.5820180773735046
Prediction: Fear | Score: 0.6287156939506531
Prediction: Anger | Score: 0.9774733781814575
Prediction: Anger | Score: 0.996532678604126
Prediction: Sadness | Score: 0.9974331259727478
Prediction: Anger | Score: 0.9932531714439392
Prediction: Sadness | Score: 0.7296431660652161
Prediction: Fear | Score: 0.9985871315002441
Prediction: Fear | Score: 0.6808261275291443
Prediction: Sadness | Score: 0.983805775642395
Prediction: Fear | Score: 0.9974330067634583
Prediction: Fear | Score: 0.9838252067565918
Prediction: Sadness | Score: 0.9209326505661011
Prediction: Sadness | Score