# Twitter Sentiment Analysis using ML and NLP

In [39]:
from operator import index
from random import Random

# Importing libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import spacy
from Tools.demo.sortvisu import steps
from sklearn.model_selection import train_test_split

In [40]:
# Importing dataset
training_df = pd.read_csv('twitter_training.csv')
training_df

Unnamed: 0,2401,Borderlands,Positive,"im getting on borderlands and i will murder you all ,"
0,2401,Borderlands,Positive,I am coming to the borders and I will kill you...
1,2401,Borderlands,Positive,im getting on borderlands and i will kill you ...
2,2401,Borderlands,Positive,im coming on borderlands and i will murder you...
3,2401,Borderlands,Positive,im getting on borderlands 2 and i will murder ...
4,2401,Borderlands,Positive,im getting into borderlands and i can murder y...
...,...,...,...,...
74676,9200,Nvidia,Positive,Just realized that the Windows partition of my...
74677,9200,Nvidia,Positive,Just realized that my Mac window partition is ...
74678,9200,Nvidia,Positive,Just realized the windows partition of my Mac ...
74679,9200,Nvidia,Positive,Just realized between the windows partition of...


In [27]:
# Adding column names
header_list = ['Tweet ID', 'Entity', 'Sentiments', 'Tweet Content']
training_df.to_csv('trainingset.csv', header = header_list, index = False)

In [28]:
new_training_df = pd.read_csv('trainingset.csv')

In [29]:
new_training_df

Unnamed: 0,Tweet ID,Entity,Sentiments,Tweet Content
0,2401,Borderlands,Positive,I am coming to the borders and I will kill you...
1,2401,Borderlands,Positive,im getting on borderlands and i will kill you ...
2,2401,Borderlands,Positive,im coming on borderlands and i will murder you...
3,2401,Borderlands,Positive,im getting on borderlands 2 and i will murder ...
4,2401,Borderlands,Positive,im getting into borderlands and i can murder y...
...,...,...,...,...
74676,9200,Nvidia,Positive,Just realized that the Windows partition of my...
74677,9200,Nvidia,Positive,Just realized that my Mac window partition is ...
74678,9200,Nvidia,Positive,Just realized the windows partition of my Mac ...
74679,9200,Nvidia,Positive,Just realized between the windows partition of...


In [30]:
new_training_df.isnull().sum()

Tweet ID           0
Entity             0
Sentiments         0
Tweet Content    686
dtype: int64

In [31]:
new_training_df.dropna(inplace=True)

In [32]:
new_training_df.isnull().sum()

Tweet ID         0
Entity           0
Sentiments       0
Tweet Content    0
dtype: int64

In [33]:
# Performing tokenization
nlp = spacy.load('en_core_web_sm') 
def preprocess(text):
    doc = nlp(text)
    filtered_tokens = []
    for token in doc:
        if token.is_stop or token.is_punct:
            continue
        filtered_tokens.append(token.lemma_)
    
    return " ".join(filtered_tokens)

In [34]:
new_training_df['Processed Tweet'] = new_training_df['Tweet Content'].apply(preprocess)

In [35]:
new_training_df

Unnamed: 0,Tweet ID,Entity,Sentiments,Tweet Content,Processed Tweet
0,2401,Borderlands,Positive,I am coming to the borders and I will kill you...,come border kill
1,2401,Borderlands,Positive,im getting on borderlands and i will kill you ...,m get borderland kill
2,2401,Borderlands,Positive,im coming on borderlands and i will murder you...,m come borderland murder
3,2401,Borderlands,Positive,im getting on borderlands 2 and i will murder ...,m get borderland 2 murder
4,2401,Borderlands,Positive,im getting into borderlands and i can murder y...,m get borderland murder
...,...,...,...,...,...
74676,9200,Nvidia,Positive,Just realized that the Windows partition of my...,realize Windows partition Mac like 6 year Nvid...
74677,9200,Nvidia,Positive,Just realized that my Mac window partition is ...,realize Mac window partition 6 year Nvidia dri...
74678,9200,Nvidia,Positive,Just realized the windows partition of my Mac ...,realize window partition Mac 6 year Nvidia dri...
74679,9200,Nvidia,Positive,Just realized between the windows partition of...,realize window partition Mac like 6 year Nvidi...


In [36]:
# Performing label encoding
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
new_training_df['Sentiments'] = encoder.fit_transform(new_training_df['Sentiments'])
new_training_df

Unnamed: 0,Tweet ID,Entity,Sentiments,Tweet Content,Processed Tweet
0,2401,Borderlands,3,I am coming to the borders and I will kill you...,come border kill
1,2401,Borderlands,3,im getting on borderlands and i will kill you ...,m get borderland kill
2,2401,Borderlands,3,im coming on borderlands and i will murder you...,m come borderland murder
3,2401,Borderlands,3,im getting on borderlands 2 and i will murder ...,m get borderland 2 murder
4,2401,Borderlands,3,im getting into borderlands and i can murder y...,m get borderland murder
...,...,...,...,...,...
74676,9200,Nvidia,3,Just realized that the Windows partition of my...,realize Windows partition Mac like 6 year Nvid...
74677,9200,Nvidia,3,Just realized that my Mac window partition is ...,realize Mac window partition 6 year Nvidia dri...
74678,9200,Nvidia,3,Just realized the windows partition of my Mac ...,realize window partition Mac 6 year Nvidia dri...
74679,9200,Nvidia,3,Just realized between the windows partition of...,realize window partition Mac like 6 year Nvidi...


In [37]:
X = new_training_df['Processed Tweet']
y = new_training_df['Sentiments']

In [42]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

In [45]:
# Create Pipline
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier

In [46]:
pipline = Pipeline([
    ('vectorized_tri_grams', TfidfVectorizer()),
    ('random_forest', RandomForestClassifier())
])

In [47]:
pipline.fit(X_train, y_train)

In [48]:
prediction = pipline.predict(X_test)

In [50]:
# Evaluation of the model
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score

In [60]:
print('Accuracy_Score: ', accuracy_score(y_test, prediction))
print('F1_Score: ', f1_score(y_test, prediction, average='micro'))
print('Recall_Score: ', recall_score(y_test, prediction, average='micro'))
print('Precision_Score: ', precision_score(y_test, prediction, average='micro'))

Accuracy_Score:  0.9086437104708363
F1_Score:  0.9086437104708363
Recall_Score:  0.9086437104708363
Precision_Score:  0.9086437104708363


In [63]:
# Validating the model
validation_df = pd.read_csv('twitter_validation.csv')

In [64]:
validation_df

Unnamed: 0,3364,Facebook,Irrelevant,"I mentioned on Facebook that I was struggling for motivation to go for a run the other day, which has been translated by Tom’s great auntie as ‘Hayley can’t get out of bed’ and told to his grandma, who now thinks I’m a lazy, terrible person 🤣"
0,352,Amazon,Neutral,BBC News - Amazon boss Jeff Bezos rejects clai...
1,8312,Microsoft,Negative,@Microsoft Why do I pay for WORD when it funct...
2,4371,CS-GO,Negative,"CSGO matchmaking is so full of closet hacking,..."
3,4433,Google,Neutral,Now the President is slapping Americans in the...
4,6273,FIFA,Negative,Hi @EAHelp I’ve had Madeleine McCann in my cel...
...,...,...,...,...
994,4891,GrandTheftAuto(GTA),Irrelevant,⭐️ Toronto is the arts and culture capital of ...
995,4359,CS-GO,Irrelevant,tHIS IS ACTUALLY A GOOD MOVE TOT BRING MORE VI...
996,2652,Borderlands,Positive,Today sucked so it’s time to drink wine n play...
997,8069,Microsoft,Positive,Bought a fraction of Microsoft today. Small wins.


In [65]:
header_list = ['Tweet ID', 'Entity', 'Sentiments', 'Tweet Content']
validation_df.to_csv('validatingset.csv', header = header_list, index = False)

In [66]:
new_validation_df = pd.read_csv('validatingset.csv')

In [67]:
new_validation_df

Unnamed: 0,Tweet ID,Entity,Sentiments,Tweet Content
0,352,Amazon,Neutral,BBC News - Amazon boss Jeff Bezos rejects clai...
1,8312,Microsoft,Negative,@Microsoft Why do I pay for WORD when it funct...
2,4371,CS-GO,Negative,"CSGO matchmaking is so full of closet hacking,..."
3,4433,Google,Neutral,Now the President is slapping Americans in the...
4,6273,FIFA,Negative,Hi @EAHelp I’ve had Madeleine McCann in my cel...
...,...,...,...,...
994,4891,GrandTheftAuto(GTA),Irrelevant,⭐️ Toronto is the arts and culture capital of ...
995,4359,CS-GO,Irrelevant,tHIS IS ACTUALLY A GOOD MOVE TOT BRING MORE VI...
996,2652,Borderlands,Positive,Today sucked so it’s time to drink wine n play...
997,8069,Microsoft,Positive,Bought a fraction of Microsoft today. Small wins.


In [68]:
new_validation_df['Processed Tweet'] = new_validation_df['Tweet Content'].apply(preprocess)

In [69]:
new_validation_df

Unnamed: 0,Tweet ID,Entity,Sentiments,Tweet Content,Processed Tweet
0,352,Amazon,Neutral,BBC News - Amazon boss Jeff Bezos rejects clai...,BBC News Amazon boss Jeff Bezos reject claim c...
1,8312,Microsoft,Negative,@Microsoft Why do I pay for WORD when it funct...,@Microsoft pay WORD function poorly @samsungu ...
2,4371,CS-GO,Negative,"CSGO matchmaking is so full of closet hacking,...",csgo matchmaking closet hacking truly awful game
3,4433,Google,Neutral,Now the President is slapping Americans in the...,President slap Americans face commit unlawful ...
4,6273,FIFA,Negative,Hi @EAHelp I’ve had Madeleine McCann in my cel...,hi @EAHelp Madeleine McCann cellar past 13 yea...
...,...,...,...,...,...
994,4891,GrandTheftAuto(GTA),Irrelevant,⭐️ Toronto is the arts and culture capital of ...,⭐ ️ Toronto art culture capital Canada wonder ...
995,4359,CS-GO,Irrelevant,tHIS IS ACTUALLY A GOOD MOVE TOT BRING MORE VI...,ACTUALLY good TOT bring viewer \n\n people get...
996,2652,Borderlands,Positive,Today sucked so it’s time to drink wine n play...,today suck time drink wine n play borderland s...
997,8069,Microsoft,Positive,Bought a fraction of Microsoft today. Small wins.,buy fraction Microsoft today small win


In [70]:
new_validation_df['Sentiments'] = encoder.fit_transform(new_validation_df['Sentiments'])

In [71]:
new_validation_df

Unnamed: 0,Tweet ID,Entity,Sentiments,Tweet Content,Processed Tweet
0,352,Amazon,2,BBC News - Amazon boss Jeff Bezos rejects clai...,BBC News Amazon boss Jeff Bezos reject claim c...
1,8312,Microsoft,1,@Microsoft Why do I pay for WORD when it funct...,@Microsoft pay WORD function poorly @samsungu ...
2,4371,CS-GO,1,"CSGO matchmaking is so full of closet hacking,...",csgo matchmaking closet hacking truly awful game
3,4433,Google,2,Now the President is slapping Americans in the...,President slap Americans face commit unlawful ...
4,6273,FIFA,1,Hi @EAHelp I’ve had Madeleine McCann in my cel...,hi @EAHelp Madeleine McCann cellar past 13 yea...
...,...,...,...,...,...
994,4891,GrandTheftAuto(GTA),0,⭐️ Toronto is the arts and culture capital of ...,⭐ ️ Toronto art culture capital Canada wonder ...
995,4359,CS-GO,0,tHIS IS ACTUALLY A GOOD MOVE TOT BRING MORE VI...,ACTUALLY good TOT bring viewer \n\n people get...
996,2652,Borderlands,3,Today sucked so it’s time to drink wine n play...,today suck time drink wine n play borderland s...
997,8069,Microsoft,3,Bought a fraction of Microsoft today. Small wins.,buy fraction Microsoft today small win


In [72]:
new_prediction = pipline.predict(new_validation_df['Processed Tweet'])

In [73]:
new_prediction

array([2, 1, 1, 2, 1, 3, 3, 3, 1, 3, 3, 1, 2, 1, 3, 3, 1, 3, 1, 1, 2, 3,
       1, 2, 2, 1, 1, 3, 1, 3, 3, 1, 3, 1, 2, 2, 0, 3, 2, 3, 2, 2, 2, 3,
       2, 1, 1, 1, 2, 3, 1, 1, 3, 3, 3, 3, 1, 1, 0, 1, 3, 3, 0, 1, 2, 1,
       2, 2, 1, 3, 1, 3, 3, 3, 0, 3, 0, 2, 2, 2, 3, 3, 2, 3, 2, 1, 0, 1,
       2, 2, 2, 3, 0, 0, 1, 1, 1, 2, 2, 2, 1, 3, 3, 2, 3, 2, 3, 1, 2, 2,
       2, 1, 2, 1, 2, 2, 3, 3, 2, 1, 1, 3, 2, 2, 2, 3, 2, 1, 2, 0, 3, 2,
       3, 3, 0, 2, 2, 2, 0, 0, 2, 2, 2, 0, 0, 3, 2, 1, 0, 3, 1, 2, 2, 2,
       2, 2, 1, 1, 3, 1, 2, 1, 0, 0, 0, 2, 2, 1, 1, 3, 3, 3, 2, 2, 3, 0,
       2, 2, 2, 3, 2, 1, 1, 2, 3, 3, 0, 0, 2, 3, 3, 2, 0, 2, 1, 1, 1, 1,
       3, 2, 2, 3, 3, 3, 3, 1, 3, 3, 0, 2, 0, 1, 1, 0, 3, 1, 1, 3, 1, 0,
       1, 3, 3, 1, 0, 0, 3, 3, 1, 3, 0, 2, 0, 0, 1, 2, 2, 3, 1, 0, 0, 3,
       3, 0, 0, 2, 3, 1, 1, 3, 3, 3, 3, 2, 2, 3, 1, 2, 3, 2, 1, 2, 3, 1,
       3, 3, 0, 1, 2, 0, 3, 2, 0, 1, 2, 1, 3, 3, 1, 1, 1, 3, 1, 2, 3, 2,
       2, 1, 3, 1, 3, 1, 0, 2, 2, 3, 1, 2, 1, 0, 3,

In [102]:
accuracy_score = accuracy_score(new_validation_df['Sentiments'], new_prediction)
accuracy_score

0.944944944944945

In [103]:
f1_score = f1_score(new_validation_df['Sentiments'], new_prediction, average='micro')
f1_score

np.float64(0.944944944944945)

In [104]:
recall_score = recall_score(new_validation_df['Sentiments'], new_prediction, average='micro')
recall_score

np.float64(0.944944944944945)

In [105]:
precision_score = precision_score(new_validation_df['Sentiments'], new_prediction, average='micro')
precision_score

np.float64(0.944944944944945)