# Import

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import csv
import sklearn

from collections import Counter

# Load data

In [2]:
def load(f):
    data = []
    with open(f, encoding="latin-1") as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            data.append((row['SentimentText'], int(row['Sentiment'])))
    return data

In [3]:
train = load('data/train.csv')

In [4]:
labels = {
    0: "negative",
    1: "positive",
}

# Train / test split

In [5]:
from sklearn.model_selection import train_test_split

In [6]:
x = [i[0] for i in train]
y = [i[1] for i in train]
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=42)

# Text vectorization

In [7]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [8]:
vectorizer = TfidfVectorizer()
x_train_vec = vectorizer.fit_transform(x_train)

In [9]:
x_train_vec

<79991x90893 sparse matrix of type '<class 'numpy.float64'>'
	with 952953 stored elements in Compressed Sparse Row format>

# Train model

In [10]:
from sklearn.naive_bayes import MultinomialNB

In [11]:
model = MultinomialNB()
model.fit(x_train_vec, y_train)

MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)

# Test model

In [12]:
def test_it(i):
    print(x_test[i])
    print("{} ({})".format(labels[model.predict(vectorizer.transform([x_test[i]]))[0]], labels[y_test[i]]))

In [13]:
for i in range(10):
    test_it(i)
    print()

@Allieandra wheeee! 
negative (positive)

@a02toyota Thank you for the FF! Good to meet ya 
positive (positive)

@ electricbath Eewwww. Gross! So sorry hayward hates you like that. 
negative (negative)

#followfriday - I'm a little late, but here's a special shoutout for @SomersetMarcy - my missus! 
positive (positive)

#icanhelp in shopping (deals), personal assistant, event planning!! I own GET IT TOGETHER, those are my services  jennifer.git@gmail.com
positive (positive)

 broken hearts will heal with time...
negative (negative)

..I've already listened to all the S4 commentary except the finale 
positive (negative)

&quot;Everybody make mistakes.&quot; I'm gonna go get some sleep because I have an other show tomorrow night and I want it to be peeeeerfect! 
positive (positive)

#I Believe...that if you smile at someone, friend or stranger, you will make TWO people feel good.  
positive (positive)

@andreacFOD I think I'm done at twitterland too. I will tweet David one last time tomo

# Evaluate model

In [14]:
from sklearn.metrics import classification_report

In [15]:
y_pred = model.predict(vectorizer.transform(x_test))
print(classification_report(y_test, y_pred, target_names=labels.values()))

             precision    recall  f1-score   support

   negative       0.79      0.56      0.66      8750
   positive       0.72      0.89      0.80     11248

avg / total       0.75      0.74      0.74     19998

