
Compare the sentiment between Azure API and given sentiment label
-----------------

In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline

import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import tensorflow as tf

In [2]:
import azureml.core
from azureml.core import Workspace
from azureml.core.model import Model
from azureml.core import Experiment
from azureml.core.webservice import Webservice
from azureml.core.image import ContainerImage
from azureml.core.webservice import AciWebservice
from azureml.core.conda_dependencies import CondaDependencies
print("Azure ML SDK Version: ", azureml.core.VERSION)

Azure ML SDK Version:  1.32.0


In [3]:
ws = Workspace.get(name="myworkspace",
               subscription_id='1d0c433f-f1fd-400f-abd1-7622138c35c1',
               resource_group='MyConginitiverviceS')

In [4]:
df=pd.read_csv('tweet.csv')
df.head()

Unnamed: 0,sentiment_label,tweet_text
0,0,visited my friends and had a great time!!! Tom...
1,0,vicar of dibley always makes me cry love them...
2,0,Cleaning house and fighting fatigue
3,0,I can't get to sleep. I have an urge to make f...
4,0,HELLO ALL TWITTERS!! I've missed u. Been away ...


In [5]:
df.shape

(1600, 2)

In [6]:
df['sentiment_label'].value_counts()

0    800
4    800
Name: sentiment_label, dtype: int64

In [7]:
df_preprocess = df.copy()

In [8]:
tweets = df_preprocess.tweet_text
tweets.head()

0    visited my friends and had a great time!!! Tom...
1    vicar of dibley always makes me cry  love them...
2                 Cleaning house and fighting fatigue 
3    I can't get to sleep. I have an urge to make f...
4    HELLO ALL TWITTERS!! I've missed u. Been away ...
Name: tweet_text, dtype: object

#### Remove Characters

In [9]:
def removeWordWithChar(text, char_list):
    #Remove words in a text that contains a char from the list.
    text = text.split()
    res = [ele for ele in text if all(ch not in ele for ch in char_list)]
    res = ' '.join(res)
    return res

char_list = ['@', '#', 'http', 'www', '/']

removeWordWithChar(tweets[0], char_list)

'visited my friends and had a great time!!! Tomorrow, up at 6:00 a.m.'

In [10]:
tweets_cleaned = []
for t in tweets:
    tweets_cleaned.append(removeWordWithChar(t, char_list))

#### Tokenize

In [11]:
def tokenize(texts):
    tokenizer = nltk.RegexpTokenizer(r'\w+')

    texts_tokens = []
    for i, val in enumerate(texts):
        text_tokens = tokenizer.tokenize(val.lower())

        for i in range(len(text_tokens) - 1, -1, -1):
            if len(text_tokens[i]) < 4:
                del(text_tokens[i])

        texts_tokens.append(text_tokens)
        
    return texts_tokens

In [12]:
tweets_tokens = tokenize(tweets_cleaned)

#### Remove Stopword

In [13]:
def removeSW(texts_tokens):
    stopWords = set(stopwords.words('english'))
    texts_filtered = []

    for i, val in enumerate(texts_tokens):
        text_filtered = []
        for w in val:
            if w not in stopWords:
                text_filtered.append(w)
        texts_filtered.append(text_filtered)
        
    return texts_filtered

In [14]:
tweets_filtered = removeSW(tweets_tokens)

#### Lemmatisation

In [15]:
def lemma(texts_filtered):
    wordnet_lemmatizer = WordNetLemmatizer()
    texts_lem = []

    for i, val in enumerate(texts_filtered):
        text_lem = []
        for word in val:
            text_lem.append(wordnet_lemmatizer.lemmatize(word))
        texts_lem.append(text_lem)
    
    return texts_lem

In [16]:
tweets_lem = lemma(tweets_filtered)

In [17]:
tweets_ready = []
for tweet in tweets_lem:
    string = ' '
    string = string.join(tweet)
    tweets_ready.append(string)

In [18]:
df_preprocess['tweet'] = tweets_ready
df_preprocess['sentiment_label'] = df.sentiment_label.replace(4, 1)

In [19]:
df_preprocess= df_preprocess[['sentiment_label', 'tweet']]

In [20]:
df_preprocess.head()

Unnamed: 0,sentiment_label,tweet
0,0,visited friend great time tomorrow
1,0,vicar dibley always make love bit
2,0,cleaning house fighting fatigue
3,0,sleep urge make friendship bracelet
4,0,hello twitter missed away long much better tho...


#### Testing with inbuilt API of Azure.

In [21]:
import environ
import os
import requests
import json
import pandas as pd

In [22]:
from os import getenv
from azure.ai.textanalytics import TextAnalyticsClient
from azure.core.credentials import AzureKeyCredential

In [23]:
key = getenv('API_KEY')

In [24]:
def authenticate_client():
    ta_credential = AzureKeyCredential(key)
    text_analytics_client = TextAnalyticsClient(endpoint="https://mytextanalytics.cognitiveservices.azure.com", credential=ta_credential)
    return text_analytics_client

client = authenticate_client() 

In [25]:
X1=df_preprocess[df_preprocess['sentiment_label'] == 1][0:5]  

In [26]:
X2=df_preprocess[df_preprocess['sentiment_label'] == 0][0:5]  

In [27]:
frames = [X1, X2]
X = pd.concat(frames)

In [28]:
X=X.reset_index(drop=True)

In [80]:
nlp_azure_cognitive = X

### Let us test the sentiment  with respect to Azure for a simple sentence

In [81]:
sentence = 'Hello, my name is Ken and i like chocolate!'

res = client.analyze_sentiment(documents=[sentence])

In [82]:
print('Scores : {}'.format(res[0].confidence_scores))

Scores : {'positive': 0.08, 'neutral': 0.92, 'negative': 0.0}


In [83]:
print('Sentiment associated : {}'.format(res[0].sentiment))

Sentiment associated : neutral


### Compare the output of 'sentiment_label' with azure predicted label

In [84]:
def get_sentiment(client, sentence):
    res = client.analyze_sentiment(documents=[sentence])
    return 0 if res[0].confidence_scores.negative > res[0].confidence_scores.positive else 1

nlp_azure_cognitive['azure_prediction'] = nlp_azure_cognitive.apply(lambda x: get_sentiment(client, x.tweet), axis=1)

In [85]:
nlp_azure_cognitive

Unnamed: 0,sentiment_label,tweet,azure_prediction
0,1,nice much rain,1
1,1,vote opinion susan boyle,1
2,1,slip wanting icon change next name,0
3,1,sent friend request,1
4,1,night museum movie pretty good,1
5,0,visited friend great time tomorrow,1
6,0,vicar dibley always make love bit,1
7,0,cleaning house fighting fatigue,0
8,0,sleep urge make friendship bracelet,1
9,0,hello twitter missed away long much better tho...,0


In [86]:
Total = nlp_azure_cognitive.shape[0]
Total_Error = nlp_azure_cognitive[nlp_azure_cognitive['azure_prediction'] != nlp_azure_cognitive['sentiment_label']].shape[0]

Percent =   100 - Total_Error * 100 / Total

print('Accuracy : {} %'.format(Percent))

Accuracy : 60.0 %


#### As a result we see the accuracy for the Azure built in API is 60 percent.