In [1]:
#!wget https://raw.githubusercontent.com/laxmimerit/twitter-data/master/twitt30k.csv

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC,LinearSVC
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
from sklearn.feature_extraction.text import TfidfVectorizer

In [4]:
df = pd.read_csv("twitt30k.csv")
df.head()

Unnamed: 0,twitts,sentiment
0,@robbiebronniman Sounds like a great night.,1
1,Damn the person who stolde my wallet !!!!! Ma...,1
2,Greetings from the piano bench (photo) http:/...,1
3,@drewryanscott i love it!! i love you!! haha f...,1
4,"@kissthestars Pretty pretty pretty please, pak...",0


In [5]:
df.shape

(30000, 2)

In [6]:
df["sentiment"].value_counts()

1    15000
0    15000
Name: sentiment, dtype: int64

## SVM Model and Data Preparation

In [7]:
def run_svm(df):
    
    X = df["twitts"]
    y = df["sentiment"]

    tfidf = TfidfVectorizer()
    X = tfidf.fit_transform(X)

    X_train,X_test,y_train,y_test = \
        train_test_split(X,y ,test_size=0.2,random_state=0,stratify=y)

    print("X Shape : {}".format(X.shape))

    clf = LinearSVC()
    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)
    
    print()
    print("Report")
    print(classification_report(y_test, y_pred))
    
    return tfidf,clf

In [8]:
%%time
tfidf,clf = run_svm(df)

X Shape : (30000, 40854)

Report
              precision    recall  f1-score   support

           0       0.75      0.74      0.75      3000
           1       0.74      0.75      0.75      3000

    accuracy                           0.75      6000
   macro avg       0.75      0.75      0.75      6000
weighted avg       0.75      0.75      0.75      6000

CPU times: user 361 ms, sys: 9.34 ms, total: 370 ms
Wall time: 369 ms


In [9]:
x = "i am really happy. thanks a lot for coming with me"

clf.predict(tfidf.transform([x]))

array([1])

## Data Cleaning and Retraining SVM

In [10]:
import Preprocess_gokhanEr as pp

In [11]:
df.head()

Unnamed: 0,twitts,sentiment
0,@robbiebronniman Sounds like a great night.,1
1,Damn the person who stolde my wallet !!!!! Ma...,1
2,Greetings from the piano bench (photo) http:/...,1
3,@drewryanscott i love it!! i love you!! haha f...,1
4,"@kissthestars Pretty pretty pretty please, pak...",0


In [12]:
df["twitts"] = df["twitts"].apply(lambda words : pp.get_lower_convert(words))
df.head()

Unnamed: 0,twitts,sentiment
0,@robbiebronniman sounds like a great night.,1
1,damn the person who stolde my wallet !!!!! ma...,1
2,greetings from the piano bench (photo) http:/...,1
3,@drewryanscott i love it!! i love you!! haha f...,1
4,"@kissthestars pretty pretty pretty please, pak...",0


In [13]:
df["twitts"] = df["twitts"].apply(lambda words : pp.cont_exp(words))
df.head()

Unnamed: 0,twitts,sentiment
0,@robbiebronniman sounds like a great night.,1
1,damn the person who stolde my wallet !!!!! ma...,1
2,greetings from the piano bench (photo) http:/...,1
3,@drewryanscott i love it!! i love you!! haha f...,1
4,"@kissthestars pretty pretty pretty please, pak...",0


In [18]:
df["twitts"] = df["twitts"].apply(lambda words : pp.remove_emails(words))
df["twitts"] = df["twitts"].apply(lambda words : pp.remove_urls(words))
df["twitts"] = df["twitts"].apply(lambda words : pp.remove_rt(words))
df["twitts"] = df["twitts"].apply(lambda words : pp.remove_html_tags(words))
df["twitts"] = df["twitts"].apply(lambda words : pp.remove_special_chars(words))
df["twitts"] = df["twitts"].apply(lambda words : pp.remove_accented_chars(words))

In [19]:
df.head()

Unnamed: 0,twitts,sentiment
0,robbiebronniman sounds like a great night,1
1,damn the person who stolde my wallet may karma...,1
2,greetings from the piano bench photo,1
3,drewryanscott i love it i love you haha forget...,1
4,kissthestars pretty pretty pretty please pakid...,0


## Fine Tune Your ML Model

In [37]:
def run_svm(df):
    
    X = df["twitts"]
    y = df["sentiment"]

    tfidf = TfidfVectorizer(norm = "l2", ngram_range=(1,2),analyzer="word")
    X = tfidf.fit_transform(X)

    X_train,X_test,y_train,y_test = \
        train_test_split(X,y ,test_size=0.2,random_state=0,stratify=y)

    print("X Shape : {}".format(X.shape))

    clf = LinearSVC()
    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)
    
    print()
    print("Report")
    print(classification_report(y_test, y_pred))
    
    return tfidf,clf

tfidf, clf = run_svm(df)

X Shape : (30000, 219355)

Report
              precision    recall  f1-score   support

           0       0.76      0.75      0.76      3000
           1       0.76      0.77      0.76      3000

    accuracy                           0.76      6000
   macro avg       0.76      0.76      0.76      6000
weighted avg       0.76      0.76      0.76      6000



## Saving and Loading ML Model

In [39]:
import pickle

In [40]:
pickle.dump(clf, open("clf.pkl","wb"))
pickle.dump(tfidf, open("tfidf.pkl","wb"))

In [41]:
del clf,tfidf

In [42]:
clf = pickle.load(open("clf.pkl","rb"))
tfidf = pickle.load(open("tfidf.pkl","rb"))

In [44]:
x = 'i am really happy. thanks a lot for coming with me'

'i am really happy. thanks a lot for coming with me'

In [47]:
clf.predict(tfidf.transform([x]))

array([1])

---

In [48]:
consumer_key = 'R7DGimRNkT11sbngA0MRqLmNE'
consumer_secret = 'w5Axtw43feejwgmPIhqPhPOt1aHso1Guw1yuFwlmijtlh0vguK'
access_token = '1279486577656295425-l3gaKqKuHQdKl44rPXUc0WYcc26wgq'
access_token_secret = '80dGAdcx6LuoWM1mSt669V5NESP0EOuX1dK8Mianjqxi2'

In [50]:
import tweepy

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)

api = tweepy.API(auth)

public_tweets = api.home_timeline()

In [54]:
for tweet in public_tweets:
    print(tweet.text)

🎙 Briefing by Foreign Ministry Spokeswoman Maria #Zakharova, October 14, 2021.

🔸#Lavrov
🔸#EWF
🔸#Afghanistan
🔸… https://t.co/v6jpQTfniL
An encounter has started at Wahibug area of Pulwama. Police &amp; security forces are on the job. Details shall follow:… https://t.co/sUwN05VOXf
RT @RussianEmbassy: Just imagine how many more bright sides of #Russia @BBC &amp; Co. have been keeping secret from the British public! https:/…
#HaryanaPolice said on Friday that an FIR was lodged in connection with the murder of a man belonging to #Punjab ne… https://t.co/HO4PcWpYy0
RT @SuperJaxicle: @netflix When you about to leave the family function, but they bring out a fresh pan of mac &amp; cheese... https://t.co/NmH2…
"The study is significant in that it points to the increased infection risk that coughing in the same direction as… https://t.co/9lRSa18ROp
AI for Earth Monitoring Massive Open Online Course https://t.co/BuFP62oTVR
The condition of former Prime Minister #ManmohanSingh, who is admitted to