In [1]:
import os
import numpy as np
import pandas as pd
import flair
import datetime
from textblob import TextBlob
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from tqdm import tnrange, tqdm_notebook, tqdm

In [2]:
df = pd.read_csv('reddit_data_Bitcoin.csv')

In [3]:
df = df[['title','selftext','publish_date']]
df['date'] = df['publish_date']
df.drop('publish_date',axis=1,inplace=True)
df = df.fillna('')
df['text'] = df['title'] + ' ' + df['selftext']
df.set_index('date', inplace=True)

In [4]:
df.drop(['title','selftext'], axis=1, inplace=True)

In [5]:
flair_sentiment = flair.models.TextClassifier.load('en-sentiment')
fmt = '%Y-%m-%d %H:00:00'
vader = SentimentIntensityAnalyzer()

2022-05-03 13:02:05,017 loading file C:\Users\mpmur\.flair\models\sentiment-en-mix-distillbert_4.pt


In [6]:
def sentiment_val_flair(flair_tag,flair_score):
    neg = 'NEGATIVE' in flair_tag
    val = float(flair_score)
    if neg:
        return -val
    return val

In [7]:
def get_sentiment_report(df):
    tb_sentiment_polarity_ = []
    tb_sentiment_subjectivity_ = []
    flair_sentiment_ = []
    pos_ = []
    neg_ = []
    neu_ = []
    com_ = []
    for i,j in enumerate(tqdm(df['text'])):
        v = vader.polarity_scores(j)
        pos_.append(v['pos'])
        neg_.append(v['neg'])
        neu_.append(v['neu'])
        com_.append(v['compound'])
        flair_s = flair.data.Sentence(j)
        flair_sentiment.predict(flair_s)
        flair_tag = flair_s.tag
        flair_score = flair_s.score
        flair_val = sentiment_val_flair(flair_tag,flair_score)
        flair_sentiment_.append(flair_val)
        tb_sentiment_polarity_.append(TextBlob(j).sentiment[0])
        tb_sentiment_subjectivity_.append(TextBlob(j).sentiment[1])
        
    
    df['Reddit_Flair'] = flair_sentiment_
    df['Reddit_TB_Polarity'] = tb_sentiment_polarity_
    df['Reddit_TB_Subjectivity'] = tb_sentiment_subjectivity_
    df['Reddit_Vader_Pos'] = pos_
    df['Reddit_Vader_Neg'] = neg_
    df['Reddit_Vader_Neu'] = neu_
    df['Reddit_Vader_Compound'] = com_
    df.to_csv('Reddit_Sentiment_BTC.csv')

In [8]:
get_sentiment_report(df)

100%|██████████████████████████████████████████████████████████████████████████████| 7697/7697 [12:28<00:00, 10.28it/s]
