# Sentiment analysis : Model Training and Prediction 

This notebook walks through the process of training sentiment analysis models for different sources and making predictions.

In [2]:
# import neccessary libraries
import pandas as pd 
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer 
from sklearn.metrics import classification_report
from sklearn.svm import LinearSVC
import re 
import nltk
from nltk.tokenize import word_tokenize 
from nltk.corpus import stopwords

In [3]:
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\pkkar\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\pkkar\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\pkkar\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [5]:
train_data= pd.read_csv("C:/sentiment analysis/data/twitter_training.csv",names=['serial_number','source','sentiment','text'])
val_data= pd.read_csv("C:/sentiment analysis/data/twitter_validation.csv",names=['serial_number','source','sentiment','text'])

In [6]:
print(f"Training data shape:{train_data.shape}")
print(f"Validation data shape: {val_data.shape}")
print(f"\nTraining data sample:")
display(train_data.head())

Training data shape:(74682, 4)
Validation data shape: (1000, 4)

Training data sample:


Unnamed: 0,serial_number,source,sentiment,text
0,2401,Borderlands,Positive,im getting on borderlands and i will murder yo...
1,2401,Borderlands,Positive,I am coming to the borders and I will kill you...
2,2401,Borderlands,Positive,im getting on borderlands and i will kill you ...
3,2401,Borderlands,Positive,im coming on borderlands and i will murder you...
4,2401,Borderlands,Positive,im getting on borderlands 2 and i will murder ...


In [7]:
def preprocess_text(text):
    text=str(text).lower()
    text=re.sub(r'[^a-zA-Z\s]','',text)
    tokens = word_tokenize(text)
    stop_words = set(stopwords.words('english'))
    tokens = [token for token in tokens if token not in stop_words]
    return ' '.join(tokens)
    
    

In [9]:
train_data['processed_text'] = train_data['text'].apply(preprocess_text)

In [12]:
display(train_data[['text','processed_text']])

Unnamed: 0,text,processed_text
0,im getting on borderlands and i will murder yo...,im getting borderlands murder
1,I am coming to the borders and I will kill you...,coming borders kill
2,im getting on borderlands and i will kill you ...,im getting borderlands kill
3,im coming on borderlands and i will murder you...,im coming borderlands murder
4,im getting on borderlands 2 and i will murder ...,im getting borderlands murder
...,...,...
74677,Just realized that the Windows partition of my...,realized windows partition mac like years behi...
74678,Just realized that my Mac window partition is ...,realized mac window partition years behind nvi...
74679,Just realized the windows partition of my Mac ...,realized windows partition mac years behind nv...
74680,Just realized between the windows partition of...,realized windows partition mac like years behi...


In [13]:
val_data['processed_text'] = val_data['text'].apply(preprocess_text)
display(val_data[['text','processed_text']].head())

Unnamed: 0,text,processed_text
0,I mentioned on Facebook that I was struggling ...,mentioned facebook struggling motivation go ru...
1,BBC News - Amazon boss Jeff Bezos rejects clai...,bbc news amazon boss jeff bezos rejects claims...
2,@Microsoft Why do I pay for WORD when it funct...,microsoft pay word functions poorly samsungus ...
3,"CSGO matchmaking is so full of closet hacking,...",csgo matchmaking full closet hacking truly awf...
4,Now the President is slapping Americans in the...,president slapping americans face really commi...
