# Twitter Text Classification using Naive Bayes

In [1]:
#pip install tweepy
import pandas as pd
import csv
import tweepy 

## Performing Data Scraping using Tweepy

Scraping data using the python library called tweepy. After data is retrieved and converted into a dataframe, they will be saved to data.csv.After that dataset is selected and labeled manually by creating a new column indicating which tweets are positive or negative.

In [None]:
# Authenticate to Twitter
auth = tweepy.OAuthHandler("CONSUMER_KEY", 
    "CONSUMER_SECRET")
auth.set_access_token("ACCESS_TOKEN", 
    "ACCESS_TOKEN_SECRET")

# Create API object
api = tweepy.API(auth)

api.verify_credentials()

In [2]:
# Data Scraping
#result = tweepy.Cursor(api.search,q="iphone 12"+" -filter:retweets",lang="en").items(200)

# Removing Hashtag, @user, and link of tweets
#data = pd.DataFrame(data=[' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)"," ",tweet.text).split()) for tweet in result], columns=['Tweets'])

#data.to_csv('data.csv') 

data = pd.read_csv("data.csv")
display(data)

Unnamed: 0,Tweets,SA
0,9to5mac iPhone 12 Pro Max loses to OnePlus 8T ...,-1
1,9to5mac Some iPhone 12 users experiencing exce...,-1
2,The lidar sensor on the iPhone 12 offers a peo...,-1
3,iPhone 12 suffers excessive power drain and no...,-1
4,Tech News iPhone 12 Pro Max loses to OnePlus 8...,-1
5,Service was shit with iPhone 11 LTE iPhone 12 ...,-1
6,Bro Even the iPhone 12 Pro Max OnePlus 8T or a...,-1
7,I hated the iPhone 11 I m sure I ll hate the i...,-1
8,Apple forced to ship iPhone 12 series with pow...,-1
9,I will never swap LG v40 thin Q for iPhone 12,-1


## Splitting data into training and testing sets

Because we do not specify the train size, it will take the test data with 25 percent of all data.

In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data["Tweets"], data["SA"], test_size=0.50)

## Converting into count vectors

In [4]:
from nltk import word_tokenize          
from nltk.stem import WordNetLemmatizer 
class LemmaTokenizer:
    def __init__(self):
            self.wnl = WordNetLemmatizer()
    def __call__(self, doc):
        return [self.wnl.lemmatize(t) for t in word_tokenize(doc)]

from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(strip_accents="ascii", tokenizer=LemmaTokenizer(), lowercase=True)
X_train_cv = cv.fit_transform(X_train)
X_test_cv = cv.transform(X_test)

## Fit the model to create predictions

In [5]:
from sklearn.naive_bayes import MultinomialNB
naive_bayes = MultinomialNB()
naive_bayes.fit(X_train_cv, y_train)
predictions = naive_bayes.predict(X_test_cv)

## Check the results

In [6]:
from sklearn.metrics import accuracy_score, precision_score, recall_score
print("Accuracy score: ", accuracy_score(y_test, predictions))
print("Precision score: ", precision_score(y_test, predictions))
print("Recall score: ", recall_score(y_test, predictions))

Accuracy score:  0.8
Precision score:  0.75
Recall score:  0.9230769230769231


## Confusion Matrix

In [7]:
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
cm = confusion_matrix(y_test, predictions)
sns.heatmap(cm, square=True, annot=True, cmap="RdBu", cbar=False,
xticklabels=["positive", "negative"], yticklabels=["positive", "negative"])
plt.xlabel("true label")
plt.ylabel("predicted label")

Text(50.722222222222214, 0.5, 'predicted label')

## Investigate model's misses

In [9]:
testing_predictions = []
for i in range(len(X_test)):
    if predictions[i] == 1:
        testing_predictions.append("Positive")
    else:
        testing_predictions.append("Negative")
check_df = pd.DataFrame({"actual_label": list(y_test), "prediction": testing_predictions, "Tweets":list(X_test)})
check_df.replace(to_replace=-1, value="Negative", inplace=True)
check_df.replace(to_replace=1, value="Positive", inplace=True)

In [10]:
display(check_df)

Unnamed: 0,actual_label,prediction,Tweets
0,Positive,Positive,I edit all my YouTube videos on my iPhone 12 P...
1,Negative,Positive,I hated the iPhone 11 I m sure I ll hate the i...
2,Negative,Negative,Tech News iPhone 12 Pro Max loses to OnePlus 8...
3,Positive,Positive,Me and my grandpa are about to get matching iP...
4,Negative,Negative,9to5mac iPhone 12 Pro Max loses to OnePlus 8T ...
5,Positive,Negative,I was amazed too The new iPhone pro max 12
6,Negative,Negative,iPhone 12 Owners Facing Excessive Standby Batt...
7,Positive,Positive,Impressive HDR on iPhone 12 Pro
8,Negative,Negative,iPhone 12 suffers excessive power drain and no...
9,Negative,Positive,About this it s been a week since I had the iP...
