In [None]:
"""
Sentiment Analysis using Azure Cognitive Services and Vader Sentiment Analysis


Installaion Guide.

Azure:

Run pip install azure-ai-textanalytics --pre in terminal. For further details consult the documentation below

https://docs.microsoft.com/en-us/azure/cognitive-services/text-analytics/quickstarts/client-libraries-rest-api?tabs=version-3-1&pivots=programming-language-python


Vader:

Run pip install vaderSentiment

Github: https://github.com/cjhutto/vaderSentiment

"""


In [2]:
import pandas as pd
import json
import requests
import ast
import time

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from azure.ai.textanalytics import TextAnalyticsClient
from azure.core.credentials import AzureKeyCredential

In [3]:
# Azure 

key = "45e24571af424350b38bea509af82e42"
endpoint = "https://shahnawazkhalid.cognitiveservices.azure.com/"

def authenticate_client():
    ta_credential = AzureKeyCredential(key)
    text_analytics_client = TextAnalyticsClient(
            endpoint=endpoint, 
            credential=ta_credential)
    return text_analytics_client

client = authenticate_client()


In [4]:
# Vader
analyzer = SentimentIntensityAnalyzer()


In [45]:
# Load individual tweets

df = pd.read_csv('TWEETS_individual.csv')
tweets = df['text']

  interactivity=interactivity, compiler=compiler, result=result)


In [46]:
# Clean up tweets for sentiment analysis
def Clean_Tweet(data):
    """Function removes hashtags, cashtags, mentions and urls"""
    data_list = data.split()
    
    for elem in data.split():
        if elem[0] in ['#', '$', '@']:
            data_list.remove(elem)
        if elem[:4] == 'http':
            data_list.remove(elem)
    return ' '.join(data_list)
            
tweets_clean = pd.DataFrame(tweets.apply(Clean_Tweet))
tweets_clean

Unnamed: 0,text
0,GameStop is more of a momentum play than a Vol...
1,GameStop Corporation Comm Top stock up 635% fr...
2,GameStop Corp. Class A Shares Close the Week 4...
3,Don't Touch GameStop Stock Until This Happens
4,GameStop Corporation Comm Top stock up 635% fr...
...,...
72828,Hadn’t checked GameStop for months. It somehow...
72829,Hey Kenny! Time is on my side!! CANT STOP WONT...
72830,💎🙌🏽🦍🚀New Rap Music 2021 GameStop (GME) Ft Jim ...
72831,"I know you're AMC, but thought you'd like what..."


In [77]:
test = tweets_clean.sample(frac=.1, random_state = 440)
#test = tweets_clean.iloc[:100]

df_test = pd.DataFrame(test, columns=['text', 'Vader', 'Vader_Sent', 'Azure', 'V_pos', 'V_neu', 'V_neg', 'A_pos', 'A_neu', 'A_neg'])
df_test

Unnamed: 0,text,Vader,Vader_Sent,Azure,V_pos,V_neu,V_neg,A_pos,A_neu,A_neg
72279,"I’m ready, how about you?!",,,,,,,,,
66751,⚔️🛡 📜🦾🇺🇸#AmericasMost 420🚀#PowerToThePlayers 💎...,,,,,,,,,
7512,Ho hum. GameStop is up another in the premarke...,,,,,,,,,
30037,Keith Gill Drove the GameStop Reddit Mania. He...,,,,,,,,,
68010,all your stocks on and,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...
26769,This is WHY.,,,,,,,,,
17325,Today's daily candle on GameStop is insane...,,,,,,,,,
67559,"GameStop Rallies, But Here's Why You Shouldn't...",,,,,,,,,
25398,Dow slips while GameStop and others continue t...,,,,,,,,,


In [78]:
# Computing sentiment of tweets using Azure. 
# Running this can be slow. Also please be careful with the size of the dataset and avoid very large ones. 
# response.sentiment = {'positive', 'neutral', 'negative', 'mixed'}
# response.confidence_scores - confidence score for each sentiment

for index, row in df_test.iterrows():
    
    if len(row['text']) == 0:
        continue
        
    response = client.analyze_sentiment(documents=[row['text']])[0]
    df_test.loc[index, 'Azure'] = response.sentiment
    df_test.loc[index, 'A_pos'] = response.confidence_scores.positive
    df_test.loc[index, 'A_neu'] = response.confidence_scores.neutral
    df_test.loc[index, 'A_neg'] = response.confidence_scores.negative



In [79]:
# Computing sentiment of tweets using Vader. 
# Score is between -1 and 1.
# Individual scores for each sentiment also provided, though not as frequently used. 

def V_Sent(score):
    """
    Threshold for turning Vader score to 'positive', 'neutral' or 'negative'.
    """
    threshold = 0.05 
    if score > threshold:
        sent = 'positive'
    elif score < - threshold:
        sent = 'negative'
    else:
        sent = 'neutral'
    return sent

for index, row in df_test.iterrows():
    
    res = analyzer.polarity_scores(row['text'])
    df_test.loc[index, 'Vader'] = res['compound']
    df_test.loc[index, 'Vader_Sent'] = V_Sent(res['compound'])
    df_test.loc[index, 'V_pos'] = res['pos']
    df_test.loc[index, 'V_neu'] = res['neu']
    df_test.loc[index, 'V_neg'] = res['neg']

    
    

In [82]:
df_test

Unnamed: 0,text,Vader,Vader_Sent,Azure,V_pos,V_neu,V_neg,A_pos,A_neu,A_neg
72279,"I’m ready, how about you?!",0.4199,positive,neutral,0.411,0.589,0.000,0.06,0.92,0.02
66751,⚔️🛡 📜🦾🇺🇸#AmericasMost 420🚀#PowerToThePlayers 💎...,0.0000,neutral,neutral,0.000,1.000,0.000,0.00,1.00,0.00
7512,Ho hum. GameStop is up another in the premarke...,0.0000,neutral,negative,0.000,1.000,0.000,0.05,0.29,0.66
30037,Keith Gill Drove the GameStop Reddit Mania. He...,0.0000,neutral,positive,0.000,1.000,0.000,0.78,0.21,0.01
68010,all your stocks on and,0.0000,neutral,neutral,0.000,1.000,0.000,0.05,0.94,0.01
...,...,...,...,...,...,...,...,...,...,...
26769,This is WHY.,0.0000,neutral,neutral,0.000,1.000,0.000,0.03,0.92,0.05
17325,Today's daily candle on GameStop is insane...,-0.4019,negative,negative,0.000,0.690,0.310,0.00,0.04,0.96
67559,"GameStop Rallies, But Here's Why You Shouldn't...",-0.3724,negative,negative,0.000,0.855,0.145,0.02,0.01,0.97
25398,Dow slips while GameStop and others continue t...,0.0000,neutral,neutral,0.000,1.000,0.000,0.08,0.75,0.17


In [80]:
# Azure classifies some tweets as mixed. Ideally, these should be few compared to the data.

print('Mixed Azure:',len(df_test.loc[df_test['Azure']=='mixed']))

Mixed Azure: 362


In [81]:
# Compare sentiment classificaiton using the two different methods.
df_test[['Azure','Vader_Sent']].value_counts()


Azure     Vader_Sent
neutral   neutral       1972
negative  negative      1264
neutral   positive      1138
positive  positive      1094
negative  positive       577
          neutral        386
neutral   negative       264
mixed     positive       241
positive  neutral        137
mixed     negative        98
positive  negative        65
mixed     neutral         23
dtype: int64