In [357]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
from tqdm.notebook import tqdm
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
from scipy.special import softmax
import joblib as jbl
response_df_filename='last_response.csv'
polarity_df_filename='polarity_df.csv'
new_polarity_df_filename='new_polarity_df.csv'

In [358]:
response_df = pd.read_csv(response_df_filename)

In [2]:
vader_model = SentimentIntensityAnalyzer()

In [6]:
def vader_polarity(df):
    vader_res={}
    n = 0
    for i, row in tqdm(df.iterrows(), total=len(df)):
        text = row['Text']
        vader_res[n] = vader_model.polarity_scores(text)
        n += 1
    return vader_res

In [8]:
roberta_model = f"cardiffnlp/twitter-roberta-base-sentiment"
tokenizer = AutoTokenizer.from_pretrained(roberta_model)
roberta_model = AutoModelForSequenceClassification.from_pretrained(roberta_model)

In [16]:
def polarity_scores_roberta(df):
    res = {}
    n = 0
    
    for i, row in tqdm(df.iterrows(), total=len(df)):
        text = row['Text']
        
        encoded_text = tokenizer(text, return_tensors='pt')
        output = roberta_model(**encoded_text)
        scores = output[0][0].detach().numpy()
        scores = softmax(scores)
        scores_dict = {
            'roberta_neg': scores[0],
            'roberta_neu': scores[1],
            'roberta_pos': scores[2]
        }
        res[n] = scores_dict
        n+= 1
    return res

In [359]:
roberta_polarity = polarity_scores_roberta(response_df)
roberta_polarity = pd.DataFrame(roberta_polarity).T

  0%|          | 0/6590 [00:00<?, ?it/s]

In [354]:
new_polarity_df = response_df.join(roberta_polarity, how='right')
new_polarity_df.head()

Unnamed: 0,Text,Date,roberta_neg,roberta_neu,roberta_pos
0,ChatGPT is cool. But soon you'll be able to wr...,2023-02-04 01:28:43+00:00,0.005148,0.133562,0.861291
1,You have got to have the absolute worst self-c...,2023-02-04 01:28:37+00:00,0.969344,0.027905,0.002751
2,ChatGPT is the modern tech version of papal fo...,2023-02-04 01:28:35+00:00,0.27945,0.637331,0.083218
3,ChatGPT can probably churn out believable Link...,2023-02-04 01:28:35+00:00,0.258296,0.477726,0.263978
4,I was playing Genshin earlier and came upon th...,2023-02-04 01:28:27+00:00,0.565488,0.373092,0.06142


In [356]:
old_polarity_df = pd.read_csv(polarity_df_filename)
polarity_df = pd.concat([old_polarity_df, new_polarity_df], ignore_index=True)
polarity_df.drop_duplicates(subset=['Text'], ignore_index=True, inplace=True)
new_polarity_df.to_csv(new_polarity_df_filename, index=False)
polarity_df.to_csv(polarity_df_filename, index=False)