In [12]:
import os
import numpy as np
import pandas as pd
import flair
import datetime
from textblob import TextBlob
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from tqdm import tqdm

In [13]:
df = pd.read_csv('reddit_data_Ethereum.csv')

In [14]:
df = df[['title','selftext','publish_date']]
df['date'] = df['publish_date']
df.drop('publish_date',axis=1,inplace=True)
df = df.fillna('')
df['text'] = df['title'] + ' ' + df['selftext']
df.set_index('date', inplace=True)

In [15]:
df.drop(['title','selftext'], axis=1, inplace=True)

In [16]:
flair_sentiment = flair.models.TextClassifier.load('en-sentiment')
fmt = '%Y-%m-%d %H:00:00'
vader = SentimentIntensityAnalyzer()

2022-05-02 12:48:40,117 loading file C:\Users\mpmur\.flair\models\sentiment-en-mix-distillbert_4.pt


In [17]:
def sentiment_val_flair(flair_tag,flair_score):
    neg = 'NEGATIVE' in flair_tag
    val = float(flair_score)
    if neg:
        return -val
    return val

In [18]:
def get_sentiment_report(df):
    tb_sentiment_polarity_ = []
    tb_sentiment_subjectivity_ = []
    flair_sentiment_ = []
    pos_ = []
    neg_ = []
    neu_ = []
    com_ = []
    for i,j in enumerate(tqdm(df['text'])):
        v = vader.polarity_scores(j)
        pos_.append(v['pos'])
        neg_.append(v['neg'])
        neu_.append(v['neu'])
        com_.append(v['compound'])
        flair_s = flair.data.Sentence(j)
        flair_sentiment.predict(flair_s)
        flair_tag = flair_s.tag
        flair_score = flair_s.score
        flair_val = sentiment_val_flair(flair_tag,flair_score)
        flair_sentiment_.append(flair_val)
        tb_sentiment_polarity_.append(TextBlob(j).sentiment[0])
        tb_sentiment_subjectivity_.append(TextBlob(j).sentiment[1])
        
    
    df['Reddit_Flair'] = flair_sentiment_
    df['Reddit_TB_Polarity'] = tb_sentiment_polarity_
    df['Reddit_TB_Subjectivity'] = tb_sentiment_subjectivity_
    df['Reddit_Vader_Pos'] = pos_
    df['Reddit_Vader_Neg'] = neg_
    df['Reddit_Vader_Neu'] = neu_
    df['Reddit_Vader_Compound'] = com_
    df.to_csv('Reddit_Sentiment_ETH.csv')

In [19]:
get_sentiment_report(df)

100%|██████████████████████████████████████████████████████████████████████████████| 2780/2780 [03:32<00:00, 13.11it/s]


In [10]:
df.head()

Unnamed: 0_level_0,text,Reddit_Flair,Reddit_TB_Polarity,Reddit_TB_Subjectivity,Reddit_Vader_Pos,Reddit_Vader_Neg,Reddit_Vader_Neu,Reddit_Vader_Compound
publish_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2022-01-01 00:19:11,Transferring matic on the ethereum network. I ...,-0.999178,0.0625,0.722222,0.034,0.027,0.939,-0.2507
2022-01-01 05:52:00,Use cases where user wouldn’t even know it’s b...,-0.999674,0.0,0.0,0.0,0.0,1.0,0.0
2022-01-01 06:47:35,Should I invest in Ethereum now?,-0.989686,0.0,0.0,0.0,0.0,1.0,0.0
2022-01-01 08:37:33,Eminem buys Bored Ape Yacht Club NFT for 450k ...,-0.992684,-0.5,1.0,0.0,0.174,0.826,-0.2732
2022-01-01 12:55:33,Happy New Year and May Ethereum reach millions...,0.995823,0.501894,0.575758,0.424,0.0,0.576,0.9098


In [11]:
df.dtypes

text                       object
Reddit_Flair              float64
Reddit_TB_Polarity        float64
Reddit_TB_Subjectivity    float64
Reddit_Vader_Pos          float64
Reddit_Vader_Neg          float64
Reddit_Vader_Neu          float64
Reddit_Vader_Compound     float64
dtype: object