In [1]:
import pandas as pd
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from torch.nn import functional as F
import torch
import os
import zipfile

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device('cuda')

# Unzip the zip files
with zipfile.ZipFile('saved_model.zip', 'r') as zip_ref:
    zip_ref.extractall()

with zipfile.ZipFile('saved_tokenizer.zip', 'r') as zip_ref:
    zip_ref.extractall()

# Load the model and tokenizer
classifier = AutoModelForSequenceClassification.from_pretrained('saved_model').to(device)
tokenizer = AutoTokenizer.from_pretrained('saved_tokenizer')

# Define the labels
labels = ['Sadness', 'Joy', 'Love', 'Anger', 'Fear', 'Surprise', 'Neutral']

In [3]:
def classify_comment(comment):

    tokenized_comment = tokenizer(comment, return_tensors='pt', padding='max_length', truncation=True).to(device)

    # Perform classification
    with torch.no_grad():
        outputs = classifier(**tokenized_comment)
        logits = outputs.logits

    # Get the predicted class and the maximum score
    score = F.softmax(logits, dim=-1)
    prediction = torch.argmax(score, dim=-1).item()
    max_score = torch.max(score).item()

    # If the maximum score low, classify as 'Neutral'
    if max_score < 0.5:
        prediction = 6

    print(f'Prediction: {labels[prediction]} | Score: {max_score}')

    return labels[prediction], max_score

def classify_file(file_name):
    input_file = f'csv_files/comments_by_state/{file_name}'
    output_file = f'csv_files/classified_comments_by_state/{file_name}'
    comments_df = pd.read_csv(input_file)

    # Apply the classifier to each comment
    sentiments = comments_df['Comment'].apply(lambda comment: classify_comment(comment))

    # Extract sentiment and intensity from the results
    comments_df['Sentiment'] = sentiments.apply(lambda x: x[0])
    comments_df['Intensity'] = sentiments.apply(lambda x: x[1])

    # Save the classified comments
    comments_df.to_csv(output_file, index=False)

In [4]:
os.chdir('../..')

# Specify the folder containing the files
folder = 'csv_files/comments_by_state'
files = os.listdir(folder)

# Classify comments in each file
for file_name in files:
    classify_file(file_name)

# Concatenate all classified data into a single CSV
data = pd.concat([pd.read_csv(f'csv_files/classified_comments_by_state/{file}') for file in files])
data.to_csv('csv_files/classified_comments.csv', index=False)

Prediction: Fear | Score: 0.9997187256813049
Prediction: Fear | Score: 0.6781787276268005
Prediction: Sadness | Score: 0.7851782441139221
Prediction: Joy | Score: 0.9912152290344238
Prediction: Sadness | Score: 0.9771139025688171
Prediction: Sadness | Score: 0.8824933767318726
Prediction: Sadness | Score: 0.9134264588356018
Prediction: Anger | Score: 0.9950592517852783
Prediction: Sadness | Score: 0.9878331422805786
Prediction: Anger | Score: 0.9474474787712097
Prediction: Fear | Score: 0.6556318998336792
Prediction: Joy | Score: 0.9989008903503418
Prediction: Neutral | Score: 0.48169878125190735
Prediction: Joy | Score: 0.7857049703598022
Prediction: Sadness | Score: 0.5378010869026184
Prediction: Sadness | Score: 0.764089822769165
Prediction: Joy | Score: 0.9964397549629211
Prediction: Fear | Score: 0.9086893796920776
Prediction: Sadness | Score: 0.5110182762145996
Prediction: Anger | Score: 0.9827573299407959
Prediction: Anger | Score: 0.9918221831321716
Prediction: Anger | Score: 0