In [1]:
!pip install -q transformers

In [4]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
from transformers import pipeline

None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.


In [3]:
curr_tweets_df = pd.DataFrame(pd.read_csv('curr_tweets.csv'))
curr_tweets_df.head()

Unnamed: 0,body,Target
0,well washing your tesla cybertruck in a drive ...,0.0
1,do you all have a system for the model 3 premi...,0.0
2,yup will drop tesla eps to near zero powerwall...,0.0
3,is paying now for decision in 2019 tesla secre...,0.0
4,as a proud german tesla investor and happy mod...,0.0


In [None]:
def get_sentiments(df):
    tweet_text = list(df['body'].to_numpy(dtype=str))
    sentiment_pipeline = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest")
    sentiments = sentiment_pipeline(tweet_text)

    return sentiments

In [8]:
#curr_sentiments = get_sentiments(curr_tweets_df)
curr_sentiments = np.load("curr_sentiments.npy", allow_pickle=True)

In [11]:
def add_sentiments_df(df, sentiments):
    pos_indicator = []
    neut_indicator = []
    neg_indicator = []
    scores = []

    for tweet in sentiments:
        if tweet['label'] == 'positive':
            pos_indicator.append(1)
            neut_indicator.append(0)
            neg_indicator.append(0)
        elif tweet['label'] == 'neutral':
            pos_indicator.append(0)
            neut_indicator.append(1)
            neg_indicator.append(0)
        else:
            pos_indicator.append(0)
            neut_indicator.append(0)
            neg_indicator.append(1)
        scores.append(tweet['score'])

    scores_pos_prod = np.multiply(pos_indicator, scores)
    scores_neut_prod = np.multiply(neut_indicator, scores)
    scores_neg_prod = np.multiply(neg_indicator, scores)

    new_columns_data = {'I(positive)': pos_indicator,
                    'I(neutral)': neut_indicator,
                    'I(negative)': neg_indicator,
                    'I(positive)*score': scores_pos_prod,
                    'I(neutral)*score': scores_neut_prod,
                    'I(negative)*score': scores_neg_prod}
    
    new_columns_df = pd.DataFrame(new_columns_data)
    df_updated = pd.concat([df.reset_index(), new_columns_df], axis=1)

    return df_updated

In [14]:
curr_tweets_df_updated = add_sentiments_df(curr_tweets_df, curr_sentiments)
curr_tweets_df_updated

Unnamed: 0,body,Target,I(positive),I(neutral),I(negative),I(positive)*score,I(neutral)*score,I(negative)*score
0,well washing your tesla cybertruck in a drive ...,0.0,0,1,0,0.000000,0.529800,0.000000
1,do you all have a system for the model 3 premi...,0.0,0,1,0,0.000000,0.881811,0.000000
2,yup will drop tesla eps to near zero powerwall...,0.0,1,0,0,0.664899,0.000000,0.000000
3,is paying now for decision in 2019 tesla secre...,0.0,0,1,0,0.000000,0.856915,0.000000
4,as a proud german tesla investor and happy mod...,0.0,0,0,1,0.000000,0.000000,0.885586
...,...,...,...,...,...,...,...,...
7574,gillion avoigt man i hope whoever you have rep...,0.0,1,0,0,0.835626,0.000000,0.000000
7575,as of march 271 billion in cash amp 24 billion...,0.0,0,0,1,0.000000,0.000000,0.578163
7576,i can see tesla tesla around 2000 by 2027 easy,0.0,0,1,0,0.000000,0.634903,0.000000
7577,market close tesla closed at 1719700high 17562...,0.0,0,1,0,0.000000,0.905242,0.000000


In [16]:
def run_log_reg(df):
    X_train, X_test, y_train, y_test = train_test_split(df[['I(positive)', 'I(neutral)', 'I(negative)', 'I(positive)*score', 
                                                            'I(neutral)*score', 'I(negative)*score']],
                                                        df['Target'], test_size=0.2, random_state=42)

    clf = LogisticRegression(random_state=0).fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    classification_rep = classification_report(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)
    
    print("Classification Report:")
    print(classification_rep)
    
    print("Confusion Matrix:")
    print(conf_matrix)

In [18]:
run_log_reg(curr_tweets_df_updated)

Classification Report:
              precision    recall  f1-score   support

         0.0       0.53      0.70      0.60       793
         1.0       0.49      0.32      0.38       723

    accuracy                           0.52      1516
   macro avg       0.51      0.51      0.49      1516
weighted avg       0.51      0.52      0.50      1516

Confusion Matrix:
[[552 241]
 [494 229]]
