Using : https://www.kaggle.com/shashank1558/preprocessed-twitter-tweets : for testing trained model

In [16]:
# handle imports
import pandas as pd
from TweetProcessor import TweetProcessor

from gensim.models import Word2Vec
from gensim.models import FastText
from joblib import load

import scipy as sp
import numpy as np

from sklearn.metrics import accuracy_score

In [8]:
# load data sets
negatives = pd.read_csv('_resources/tweets/_test-set/negative.csv', names=['content', 'target'])
neutrals = pd.read_csv('_resources/tweets/_test-set/neutral.csv', names=['content', 'target'])
positives = pd.read_csv('_resources/tweets/_test-set/positive.csv', names=['content', 'target'])

In [9]:
# clean tweets
negatives['content'] = negatives.apply(lambda row: TweetProcessor.cleanTweet(str(row['content'])), axis=1)
neutrals['content'] = neutrals.apply(lambda row: TweetProcessor.cleanTweet(str(row['content'])), axis=1)
positives['content'] = positives.apply(lambda row: TweetProcessor.cleanTweet(str(row['content'])), axis=1)

In [10]:
# load the fasttext model
fasttext_model = FastText.load('_resources/models/fasttext.model')

# load the fasttext kmeans model
km_model_fasttext = load('_resources/models/km_fasttext.model')

In [33]:
# define cluster labels
cluster_labels = ['neutral_cluster', 'negative_cluster', 'positive_cluster']

tweet_processor = TweetProcessor()

# define an analyzing function
def analyze_sentiment(tweet):
    # phrase the tweet
    phrases = tweet_processor.phrase_model[tweet.split()]
    
    #if no text was provided, default to neutral
    if(len(phrases) == 0):
        return 1

    # predict sentiment using fasttext clusters
    fasttext_vector = fasttext_model.wv[phrases]
    fasttext_res = km_model_fasttext.predict(fasttext_vector)
    fasttext_mode = int(sp.stats.mode(fasttext_res)[0])

    # get counts
    neu = list(fasttext_res).count(0)
    neg = list(fasttext_res).count(1)
    pos = list(fasttext_res).count(2)

    # if pos == neg result is neutral
    if(pos == neg):
        return 0

    # if neu == (pos || neg) result is (pos || neg)
    if(neu == pos and pos > neg):
        return 0
    if(neu == neg and neg > pos):
        return 0

    # else result is mode
    return fasttext_mode

In [34]:
neg_sentiment = pd.DataFrame(negatives.apply(lambda row: analyze_sentiment(str(row['content'])), axis=1), columns=['prediction'])
neg_acc = accuracy_score(negatives['target'], neg_sentiment['prediction'])

neu_sentiment = pd.DataFrame(neutrals.apply(lambda row: analyze_sentiment(str(row['content'])), axis=1), columns=['prediction'])
neu_acc = accuracy_score(neutrals['target'], neu_sentiment['prediction'])

pos_sentiment = pd.DataFrame(positives.apply(lambda row: analyze_sentiment(str(row['content'])), axis=1), columns=['prediction'])
pos_acc = accuracy_score(positives['target'], pos_sentiment['prediction'])

print(f"accuracy_neutral: {neu_acc}, accuracy_negative: {neg_acc}, accuracy_positive: {pos_acc}")

0.10295434198746643 0.16379859783301465 0.8026981450252951
                                                content  target  prediction
0                               unhappy dog like though       1           2
1     talking driver im goinghe said hed love go new...       1           1
2     anybody know rand likely fall dollar ? got mon...       1           1
3                      miss going gig liverpool unhappy       1           2
4                  isnt new riverdale tonight ? unhappy       1           2
...                                                 ...     ...         ...
1112  wish knock lang talaga new school year good co...       1           2
1113                                  miss much unhappy       1           2
1114                                            unhappy       1           2
1115              hi instant message friend friend lang       1           2
1116                       hindi close friend ? unhappy       1           2

[1117 rows x 3 columns]
