In [1]:
import pandas as pd
from sklearn.metrics import accuracy_score, f1_score

In [2]:
symbols = ['aapl', 'btc', 'eth', 'spx']

In [3]:
scores = []
for symbol in symbols:
    data = pd.read_pickle('data/stocktwits/datasets_clean/' + symbol + '_twits_012018_072019_clean.pickle')
    on_sent = pd.read_pickle('data/stocktwits/on_sentiment/on_' + symbol + '_sentiment.pickle')
    vader_sent = pd.read_pickle('data/stocktwits/vader_sentiment/vader_' + symbol + '_sentiment.pickle')
    bert_sent = pd.read_pickle('data/stocktwits/bert_sentiment/bert_' + symbol + '_sentiment.pickle')
    bert_unb_sent = pd.read_pickle('data/stocktwits/bert_unb_sentiment/bert_unb_' + symbol + '_sentiment.pickle')
    
    data = data.replace({'sentiment_declared' : {'Bearish' : 0, 'Bullish' : 1}})
    data['sentiment_on'] = (on_sent.sentiment >= 0) + 0
    data['sentiment_vader'] = (vader_sent.score >= 0) + 0
    data['sentiment_bert'] = bert_sent.replace({'pred_label' : {'Negative' : 0, 'Positive': 1}}).pred_label
    data['sentiment_bert_unb'] = bert_unb_sent.replace({'pred_label' : {'Negative' : 0, 'Positive': 1}}).pred_label
    
    data = data.dropna()
    
    pos_ratio = round(data.sentiment_declared.sum() / len(data), 3)
    on_acc = accuracy_score(data.sentiment_declared, data.sentiment_on)
    vader_acc = accuracy_score(data.sentiment_declared, data.sentiment_vader)
    bert_acc = accuracy_score(data.sentiment_declared, data.sentiment_bert)
    bert_unb_acc = accuracy_score(data.sentiment_declared, data.sentiment_bert_unb)
    
    on_f1 = f1_score(data.sentiment_declared, data.sentiment_on)
    vader_f1 = f1_score(data.sentiment_declared, data.sentiment_vader)
    bert_f1 = f1_score(data.sentiment_declared, data.sentiment_bert)
    bert_unb_f1 = f1_score(data.sentiment_declared, data.sentiment_bert_unb)
    scores.append([len(data), pos_ratio, on_acc, vader_acc, bert_acc, bert_unb_acc, on_f1, vader_f1, bert_f1, bert_unb_f1])                             

In [4]:
scores_df = pd.DataFrame(scores, index=symbols, columns=['data_size', 'pos_ratio', 'on_acc', 'vader_acc', 'bert_acc', 'bert_unb_acc', 'on_f1', 'vader_f1', 'bert_f1', 'bert_unb_f1'])

In [5]:
scores_df

Unnamed: 0,data_size,pos_ratio,on_acc,vader_acc,bert_acc,bert_unb_acc,on_f1,vader_f1,bert_f1,bert_unb_f1
aapl,204710,0.682,0.593786,0.663729,0.77578,0.824493,0.683834,0.769666,0.820881,0.881354
btc,407992,0.721,0.632091,0.677739,0.749083,0.830913,0.730944,0.784805,0.806952,0.890202
eth,63619,0.816,0.693488,0.757997,0.792578,0.893475,0.796947,0.852954,0.860749,0.936759
spx,36483,0.516,0.569608,0.595592,0.755667,0.73659,0.613118,0.678268,0.745707,0.784915
