## Twitter Sentiment Analysis

In [1]:
import numpy as np
import pandas as pd

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.feature_extraction.text import TfidfVectorizer

In [3]:
df = pd.read_csv('twitter30k_cleaned.csv')

In [4]:
df.head()

Unnamed: 0,twitts,sentiment
0,robbiebronniman sounds like a great night,1
1,damn the person who stolde my wallet may karma...,1
2,greetings from the piano bench photo,1
3,drewryanscott i love it i love you haha forget...,1
4,kissthestars pretty pretty pretty please pakid...,0


In [5]:
df.isnull().sum()

twitts       0
sentiment    0
dtype: int64

In [6]:
df['sentiment'].value_counts()

1    15000
0    15000
Name: sentiment, dtype: int64

## SVM model preparation

In [39]:
def run_svm():
    x = df['twitts']
    y = df['sentiment']

    tfidf = TfidfVectorizer(norm='l1',ngram_range=(1,2),analyzer='word',max_features=5000)
    X = tfidf.fit_transform(x)
    x_train,x_test,y_train,y_test=train_test_split(X,y,test_size=0.25,random_state=0, stratify=y)
    clf = LinearSVC(loss="hinge",penalty='l2')
    clf.fit(x_train,y_train)
    print(x_train.shape)
    print(accuracy_score(y_test,clf.predict(x_test)))
    print(classification_report(y_test,clf.predict(x_test)))
    return tfidf,clf

In [40]:
tfidf,clf = run_svm()

(22500, 5000)
0.7504
              precision    recall  f1-score   support

           0       0.74      0.77      0.75      3750
           1       0.76      0.73      0.75      3750

    accuracy                           0.75      7500
   macro avg       0.75      0.75      0.75      7500
weighted avg       0.75      0.75      0.75      7500



In [27]:
x = "I am really happy thanks a lot for coming with me"

In [28]:
clf.predict(tfidf.transform([x]))

array([1], dtype=int64)

In [41]:
import joblib

In [42]:
joblib.dump(tfidf,'tfidf.pkl')

['tfidf.pkl']

In [43]:
joblib.dump(clf,'clf.pkl')

['clf.pkl']

### Load Model & Predict

In [44]:
import joblib

In [45]:
tfidf = joblib.load('tfidf.pkl')
clf = joblib.load('clf.pkl')

In [46]:
x = "I am a good boy"

In [48]:
clf.predict(tfidf.transform([x]))

array([1], dtype=int64)

### Real-Time Twitter Sentiment Analysis

In [49]:
!pip install tweepy


Collecting tweepy
  Downloading tweepy-3.9.0-py2.py3-none-any.whl (30 kB)
Installing collected packages: tweepy
Successfully installed tweepy-3.9.0
