# Application 4: Twitter Sentiment Analysis

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn import datasets, preprocessing 
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import *
from sklearn.feature_extraction.text import TfidfVectorizer

import warnings
warnings.filterwarnings('ignore')

In [2]:
import ipywidgets as widgets
from IPython.display import clear_output

In [3]:
import re # for regular expressions 
import nltk # for text manipulation 
from nltk.stem.porter import * 

In [4]:
# pip install wordcloud

In [5]:
tweets = pd.read_csv('data.csv')

In [6]:
def remove_pattern(input_txt, pattern):
    r = re.findall(pattern, input_txt)
    for i in r:
        input_txt = re.sub(i, '', input_txt)
    return input_txt 

In [7]:
tweets['tidy_tweet'] = np.vectorize(remove_pattern)(tweets['tweet'],"@[\w]*") 

In [8]:
tweets['tidy_tweet'] = tweets['tidy_tweet'].str.replace('[^a-zA-Z# ]', " ", regex=True) 

In [9]:
tweets['tidy_tweet'] = tweets['tidy_tweet'].apply(lambda x:' '.join([w for w in x.split() if len(w)>3]))

In [10]:
tokenized_tweet = tweets['tidy_tweet'].apply(lambda x: x.split()) #tokenizing 

In [11]:
stemmer = PorterStemmer() 
tokenized_tweet = tokenized_tweet.apply(lambda x: [stemmer.stem(i) for i in x]) # stemming

In [12]:
for i in range(len(tokenized_tweet)):
    tokenized_tweet[i] = ' '.join(tokenized_tweet[i]) 
tweets['tidy_tweet'] = tokenized_tweet

In [13]:
tfidf_vectorizer = TfidfVectorizer(max_df=0.90, 
                                   min_df=2, 
                                   max_features=1000, 
                                   stop_words='english') 

tfidf = tfidf_vectorizer.fit_transform(tweets['tidy_tweet'])
vocab = tfidf_vectorizer.vocabulary_

In [14]:
X_train, X_test, Y_train, Y_test = train_test_split(tfidf, tweets['label'], random_state=42, test_size=0.3)

In [15]:
data = pd.DataFrame(tfidf)

In [16]:
classifiers = []

lr_classifier = LogisticRegression()#
classifiers.append(lr_classifier)
lda_classifier = LinearDiscriminantAnalysis()
classifiers.append(lda_classifier)
svc_classifier = SVC(probability=True)#
classifiers.append(svc_classifier)
kn_classifier = KNeighborsClassifier()#
classifiers.append(kn_classifier)
dt_classifier = DecisionTreeClassifier(max_depth = 10) #
classifiers.append(dt_classifier)
rf_classifier = RandomForestClassifier()#
classifiers.append(rf_classifier)

In [17]:
mnb_classifier = MultinomialNB()

In [18]:
mnb_classifier.fit(X_train, Y_train)

MultinomialNB()

In [19]:
lr_classifier.fit(X_train, Y_train)

LogisticRegression()

In [20]:
lda_classifier.fit(X_train.toarray(), Y_train)

LinearDiscriminantAnalysis()

In [21]:
svc_classifier.fit(X_train, Y_train)

SVC(probability=True)

In [22]:
kn_classifier.fit(X_train, Y_train)

KNeighborsClassifier()

In [23]:
dt_classifier.fit(X_train, Y_train)

DecisionTreeClassifier(max_depth=10)

In [24]:
rf_classifier.fit(X_train, Y_train)

RandomForestClassifier()

In [25]:
tweet = widgets.Text(description="tweet")

In [26]:
print('Please enter a tweet:')
display(tweet)

Please enter a tweet:


Text(value='', description='tweet')

In [27]:
algorithm = widgets.Dropdown(
    options = [('Logistic Regression', 'LR'), 
               ('Linear Discriminant Analysis ', 'LDA'), 
               ('Support Vector Machines', 'SVM'),
               ('K-Nearest Neighbors', 'KN'),
               ('Multinomial Naive Bayes', 'MNB'),
               ('Decision Trees', 'DT'),
               ('Random Forest', 'RF'),
              ],
    disabled = False,
)

print('Select Algorithm')
display(algorithm)

Select Algorithm


Dropdown(options=(('Logistic Regression', 'LR'), ('Linear Discriminant Analysis ', 'LDA'), ('Support Vector Ma…

In [28]:
prediction = widgets.Output()

button_predict = widgets.Button(description="Predict")

def on_button_predict_clicked(b):
    
    input_data = {}
    input_data['tweet'] = tweet.value
    
    user_input = pd.DataFrame(input_data, columns = ['tweet'], index=[0])
    #print(user_input)
    user_input['tidy_tweet'] = np.vectorize(remove_pattern)(user_input['tweet'],"@[\w]*") 
    user_input['tidy_tweet'] = user_input['tidy_tweet'].str.replace('[^a-zA-Z# ]', " ", regex=True) 
    user_input['tidy_tweet'] = user_input['tidy_tweet'].apply(lambda x:' '.join([w for w in x.split() if len(w)>3]))
    tokenized_tweet = user_input['tidy_tweet'].apply(lambda x: x.split()) #tokenizing 
    tokenized_tweet = tokenized_tweet.apply(lambda x: [stemmer.stem(i) for i in x]) # stemming
    detokenized_tweet = ' '.join(tokenized_tweet[0]) 
    user_input['tidy_tweet'] = detokenized_tweet
    vectorizer = TfidfVectorizer(max_df=0.90, 
                                   min_df=2, 
                                   max_features=1000, 
                                   stop_words='english', vocabulary=vocab) 
    tf_idf = vectorizer.fit_transform(tweets['tidy_tweet']) 
    
    selected_algorithm = algorithm.value
    
    if selected_algorithm == 'LR':
        classifier = lr_classifier
    elif selected_algorithm == 'LDA':
        classifier = lda_classifier
    elif selected_algorithm == 'SVM':
        classifier = svc_classifier        
    elif selected_algorithm == 'KN':
        classifier = kn_classifier
    elif selected_algorithm == 'MNB':
        classifier = mnb_classifier
    elif selected_algorithm == 'DT':
        classifier = dt_classifier
    elif selected_algorithm == 'RF':
        classifier = rf_classifier
        
    with prediction:
        clear_output(True)
        print(f'Selected Algorithm = {selected_algorithm}')
        print(classifier.predict(tf_idf)[0])
        if classifier.predict(tf_idf)[0] == 0:
            print('Postive Tweet')
        else:
            print('Negative Tweet')
        
button_predict.on_click(on_button_predict_clicked)

In [29]:
display(button_predict)
display(prediction)

Button(description='Predict', style=ButtonStyle())

Output()