In [1]:
#all module's import

from nltk.tokenize import word_tokenize, RegexpTokenizer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import confusion_matrix, f1_score
from sklearn.naive_bayes import MultinomialNB
from nltk.stem import WordNetLemmatizer
from nltk.stem import PorterStemmer
from collections import Counter
from nltk.corpus import stopwords
from nltk import classify
from nltk import NaiveBayesClassifier
from bs4 import BeautifulSoup
from sklearn import metrics
import pandas as pd
import numpy as np
import string
import nltk
import csv
import re, urllib

stemming = PorterStemmer()
stops = set(stopwords.words("english"))
tokenizer = nltk.RegexpTokenizer(r"\w+")

In [2]:
#positive_tweets csv file

data1 = pd.read_csv('Dataset/positive_tweets.csv', error_bad_lines=False)

In [3]:
#negative_tweets csv file

data2 = pd.read_csv('Dataset/negative_tweets.csv', error_bad_lines=False)

In [4]:
#test_tweets csv file

data3 = pd.read_csv('Dataset/test.csv', error_bad_lines=False)

In [5]:
#data in upper case

data1[2923:2930]

Unnamed: 0,positive_tweets
2923,haha dating the enemy!
2924,I'm trying to build a little momentum
2925,I'm up. sleeping is over rated.
2926,in the words of marv albert &quot;YES!!&quot;...
2927,intrigued about tomorrows @rhcp_universe anno...
2928,INVESTING IS THE KEY TO SUCCESS JUST NEED THE...
2929,is bored but for some reason oddly happy


In [6]:
#convert uppercase into lowercase

#positive data
data1.positive_tweets = data1.positive_tweets.str.lower()
#negative data
data2.negative_tweets = data2.negative_tweets.str.lower()
#test data
data3.tweet = data3.tweet.str.lower()

In [7]:
#data in lower case

data1[2923:2930]

Unnamed: 0,positive_tweets
2923,haha dating the enemy!
2924,i'm trying to build a little momentum
2925,i'm up. sleeping is over rated.
2926,in the words of marv albert &quot;yes!!&quot;...
2927,intrigued about tomorrows @rhcp_universe anno...
2928,investing is the key to success just need the...
2929,is bored but for some reason oddly happy


In [8]:
#data containing urls

data1[2918:2922]

Unnamed: 0,positive_tweets
2918,group shot from prom: http://bit.ly/13bgd9
2919,guitar in a weeeeek
2920,ha i love you too betch.
2921,- had a great time at the 'block party' - so d...


In [9]:
#remove urls

def remove_urls(text):
    url_pattern = re.compile(r'https?://\S+|www\.\S+')      
    return url_pattern.sub(r'', text)                        

#positive data
data1.positive_tweets = data1.positive_tweets.apply(lambda text: remove_urls(text))
#negative data
data2.negative_tweets = data2.negative_tweets.apply(lambda text: remove_urls(text))
#test data
data3.tweet = data3.tweet.apply(lambda text: remove_urls(text))

In [10]:
#remove urls

data1[2918:2922]

Unnamed: 0,positive_tweets
2918,group shot from prom:
2919,guitar in a weeeeek
2920,ha i love you too betch.
2921,- had a great time at the 'block party' - so d...


In [11]:
# Load smileys

emoticons = pd.read_csv('Dataset/smileys.csv')
positive_emoticons = emoticons[emoticons.Sentiment == 1]
negative_emoticons = emoticons[emoticons.Sentiment == 0]
emoticons.head(5)

Unnamed: 0,Smiley,Sentiment
0,:-),1
1,:),1
2,:D,1
3,:o),1
4,:],1


In [12]:
#find positive and negative emoticons of tweets and remove it

def make_emoticon_pattern(emoticons):                      
    pattern = "|".join(map(re.escape, emoticons.Smiley))
    pattern = "(?<=\s)(" + pattern + ")(?=\s)"
    return pattern

def find_with_pattern(pattern, data, replace=False, tag=None):   
    if replace and tag == None:                            
        raise Exception("Parameter error", "If replace=True you should add the tag by which the pattern will be replaced")
    regex = re.compile(pattern)
    if replace:
        return data.apply(lambda tweet: re.sub(pattern, tag, " " + tweet + " "))
    return data.apply(lambda tweet: re.findall(pattern, " " + tweet + " "))

In [13]:
#find num of pos and neg emoticons in positive data

pos_emoticons_found = find_with_pattern(make_emoticon_pattern(positive_emoticons),data1.positive_tweets)
neg_emoticons_found = find_with_pattern(make_emoticon_pattern(negative_emoticons),data1.positive_tweets)

#this will check the quantity of positive and negative emoticons

nb_pos_emoticons = len(pos_emoticons_found[pos_emoticons_found.map(lambda emoticons : len(emoticons) > 0)])
nb_neg_emoticons = len(neg_emoticons_found[neg_emoticons_found.map(lambda emoticons : len(emoticons) > 0)])
print("Number of positive emoticons: " + str(nb_pos_emoticons) + " Number of negative emoticons: " + str(nb_neg_emoticons))

Number of positive emoticons: 37 Number of negative emoticons: 5


In [14]:
#data containing emoticons

data1[2240:2249]

Unnamed: 0,positive_tweets
2240,"makes you ask yourself, who am i? then am i a..."
2241,"@user #sikh #temple vandalised in in #calgary,..."
2242,omg its already 7:30 :o
2243,juuuuuuuuuuuuuuuuussssst chillin!!
2244,handed in my uniform today . i miss you ...
2245,hmmmm.... i wonder how she my number @-)
2246,thanks to all the haters up in my face a...
2247,feeling strangely fine. now i'm gonna go l...
2248,you're the only one who can see this cause...


In [15]:
#replace all emoticons of positive data

data1.positive_tweets = find_with_pattern(make_emoticon_pattern(positive_emoticons),data1.positive_tweets, True, '')
data1.positive_tweets = find_with_pattern(make_emoticon_pattern(negative_emoticons),data1.positive_tweets, True, '')
data1[2240:2249]

Unnamed: 0,positive_tweets
2240,"makes you ask yourself, who am i? then am i..."
2241,@user #sikh #temple vandalised in in #calgar...
2242,omg its already 7:30
2243,juuuuuuuuuuuuuuuuussssst chillin!!
2244,handed in my uniform today . i miss yo...
2245,hmmmm.... i wonder how she my number
2246,thanks to all the haters up in my face...
2247,feeling strangely fine. now i'm gonna go...
2248,you're the only one who can see this cau...


In [16]:
#find positive and negative emoticons of negative tweets

pos_emoticons_found = find_with_pattern(make_emoticon_pattern(positive_emoticons),data2.negative_tweets)
neg_emoticons_found = find_with_pattern(make_emoticon_pattern(negative_emoticons),data2.negative_tweets)

#this will check the quantity of positive and negative emoticons

nb_pos_emoticons = len(pos_emoticons_found[pos_emoticons_found.map(lambda emoticons : len(emoticons) > 0)])
nb_neg_emoticons = len(neg_emoticons_found[neg_emoticons_found.map(lambda emoticons : len(emoticons) > 0)])
print("Number of positive emoticons: " + str(nb_pos_emoticons) + " Number of negative emoticons: " + str(nb_neg_emoticons))

Number of positive emoticons: 375 Number of negative emoticons: 166


In [17]:
#data of neg tweets containing emoticons

data2[57:62]

Unnamed: 0,negative_tweets
57,you've really hu my feelings :(
58,@user my wife whom i adore had to miss your po...
59,@user i am so jealous of you right now.... #ch...
60,i celebrate every man that has played it's fat...
61,i'm sure they are just as happy.... ðð½ð...


In [18]:
#remove emoticons from neg tweets

data2.negative_tweets = find_with_pattern(make_emoticon_pattern(positive_emoticons),data2.negative_tweets, True, '')
data2.negative_tweets = find_with_pattern(make_emoticon_pattern(negative_emoticons),data2.negative_tweets, True, '')
data2[57:62]

Unnamed: 0,negative_tweets
57,you've really hu my feelings
58,@user my wife whom i adore had to miss your ...
59,@user i am so jealous of you right now.... #...
60,i celebrate every man that has played it's f...
61,i'm sure they are just as happy.... ðð½...


In [19]:
#find positive and negative emoticons of test tweets

pos_emoticons_found = find_with_pattern(make_emoticon_pattern(positive_emoticons),data3.tweet)
neg_emoticons_found = find_with_pattern(make_emoticon_pattern(negative_emoticons),data3.tweet)

#this will check the quantity of positive and negative emoticons

nb_pos_emoticons = len(pos_emoticons_found[pos_emoticons_found.map(lambda emoticons : len(emoticons) > 0)])
nb_neg_emoticons = len(neg_emoticons_found[neg_emoticons_found.map(lambda emoticons : len(emoticons) > 0)])
print("Number of positive emoticons: " + str(nb_pos_emoticons) + " Number of negative emoticons: " + str(nb_neg_emoticons))

Number of positive emoticons: 6 Number of negative emoticons: 2


In [20]:
#remove emoticons from test data

data3.tweet = find_with_pattern(make_emoticon_pattern(positive_emoticons),data3.tweet, True, '')
data3.tweet = find_with_pattern(make_emoticon_pattern(negative_emoticons),data3.tweet, True, '')

In [21]:
#data containing html entities

data1[2279:2286]

Unnamed: 0,positive_tweets
2279,&lt;3 goooood timessss
2280,&quot;i know you hungry so lets go outside ...
2281,&quot;the 10 men a uk female internet entre...
2282,&quot;the truth is hiding in your eyes&quot...
2283,-- *hummin* i say &quot;shut up &amp; put yo...
2284,-- . *sigh* you cant love someone that doesn...
2285,"-- . aiqht goodniqht . tri, neena, malcolm, ..."


In [22]:
#Remove Html entities

def strip_html_tags(text):
    soup = BeautifulSoup(text, "html.parser")
    stripped_text = soup.get_text(separator=" ")
    return stripped_text

#positive data
data1.positive_tweets = data1.positive_tweets.apply(lambda text: strip_html_tags(text))
#negative data
data2.negative_tweets = data2.negative_tweets.apply(lambda text: strip_html_tags(text))
#test data
data3.tweet = data3.tweet.apply(lambda text: strip_html_tags(text))

In [23]:
#tweets without html entities

data1[2279:2286]

Unnamed: 0,positive_tweets
2279,<3 goooood timessss
2280,"""i know you hungry so lets go outside on th..."
2281,"""the 10 men a uk female internet entreprene..."
2282,"""the truth is hiding in your eyes"" @patita ..."
2283,"-- *hummin* i say ""shut up & put your money ..."
2284,-- . *sigh* you cant love someone that doesn...
2285,"-- . aiqht goodniqht . tri, neena, malcolm, ..."


In [24]:
#tweets containing unicodes

data1[2379:2384]

Unnamed: 0,positive_tweets
2379,a dog riding the bicycle
2380,a problem shared is a buck passed
2381,à¸”à¸¹à¹?à¸šà¹„à¸•à¹‹ hitech
2382,am quite tired can't wait till wwe at burs...
2383,0


In [25]:
#remove unicodes

def remove_unicode(string):
    try:
        string = string.encode('ascii', 'ignore').decode("utf-8")
    except UnicodeDecodeError:
        pass
    return string

#positive data
data1.positive_tweets = data1.positive_tweets.apply(lambda tweet: remove_unicode(tweet))
#negative data
data2.negative_tweets = data2.negative_tweets.apply(lambda tweet: remove_unicode(tweet))
#test data
data3.tweet = data3.tweet.apply(lambda tweet: remove_unicode(tweet))

In [26]:
#tweets without unicodes

data1[2379:2384]

Unnamed: 0,positive_tweets
2379,a dog riding the bicycle
2380,a problem shared is a buck passed
2381,? hitech
2382,am quite tired can't wait till wwe at burs...
2383,0


In [27]:
#tweets containing @....

data1[2537:2547]

Unnamed: 0,positive_tweets
2537,@essentially_me funny fact: @sween has abou...
2538,@fernandofelman am gonna miss you too. if i...
2539,@grahamapalooza @ronibryantmusic @rob_in_gr...
2540,- @hannahradford is taking me to lunch today
2541,@jessdelight: lmao @liveguy
2542,@joeschmitt twitter maintenance on a friday...
2543,"@johnmaeda are you a .exe (do-er), .ppt (ta..."
2544,@krystynchong that's a lovely compliment. t...
2545,@ksymmonds thankyou
2546,- @melbournevixens awesome 4th quarter ladie...


In [28]:
#replace all @ with ||targets||

pattern_usernames = "@\w{1,}";

#positive data
data1.positive_tweets = find_with_pattern(pattern_usernames,data1.positive_tweets, True, '')
#negative data
data2.negative_tweets = find_with_pattern(pattern_usernames,data2.negative_tweets, True, '')
#test data
data3.tweet = find_with_pattern(pattern_usernames,data3.tweet, True, '')

In [29]:
#remove @... from tweets 

data1[2537:2547]

Unnamed: 0,positive_tweets
2537,funny fact: has about 3x his town's popu...
2538,am gonna miss you too. if it's really gon...
2539,
2540,- is taking me to lunch today
2541,: lmao
2542,twitter maintenance on a friday night? th...
2543,"are you a .exe (do-er), .ppt (talk-er), o..."
2544,that's a lovely compliment. thx much!
2545,thankyou
2546,- awesome 4th quarter ladies! great game


In [30]:
#load set of acronyms

acronyms = pd.read_csv('Dataset/acronyms.csv')
acronyms.tail(5)

Unnamed: 0,Acronym,Translation
5459,tomoz,tomorrow
5460,gpytfaht,gladly pay you tuesday for a hamburger today
5461,l8rz,later
5462,sase,self addressed stamped envelope
5463,bwoc,big woman on campus


In [31]:
#tweets containing punctuations

data1[2404:2412]

Unnamed: 0,positive_tweets
2404,anoron.reedcourty.operaunite.com/webserver...
2405,another friday night of rock band
2406,: d a million making a difference. i can'...
2407,good morning all! have a great day!!!
2408,having a luvly day
2409,he is so hot !
2410,hmm what to do!
2411,- iowa no. 2 in happy! yea!


In [32]:
#remove all punctuations from tweets

def remove_punctuations(string):
    string = re.sub(r'[^\w\s]', '', string)
    return string

#positive data
data1.positive_tweets = data1.positive_tweets.apply(lambda tweet: remove_punctuations(tweet))
#negative data
data2.negative_tweets = data2.negative_tweets.apply(lambda tweet: remove_punctuations(tweet))
#test data
data3.tweet = data3.tweet.apply(lambda tweet: remove_punctuations(tweet))

In [33]:
#data removing punctuations

data1[2404:2412]

Unnamed: 0,positive_tweets
2404,anoronreedcourtyoperaunitecomwebservercont...
2405,another friday night of rock band
2406,d a million making a difference i cant h...
2407,good morning all have a great day
2408,having a luvly day
2409,he is so hot
2410,hmm what to do
2411,iowa no 2 in happy yea


In [34]:
#data containing extra spaces

data1[2242:2252]

Unnamed: 0,positive_tweets
2242,omg its already 730
2243,juuuuuuuuuuuuuuuuussssst chillin
2244,handed in my uniform today i miss yo...
2245,hmmmm i wonder how she my number
2246,thanks to all the haters up in my fac...
2247,feeling strangely fine now im gonna go ...
2248,youre the only one who can see this cau...
2249,goodbye exams hello alcohol tonight
2250,uploading pictures on friendster
2251,so i wrote something last week and i g...


In [35]:
#removing multiple space from text

def remove_spaces(string):
    string = re.sub(r"^\s+", "", string)
    return string

#positive data
data1.positive_tweets = data1.positive_tweets.apply(lambda tweet: remove_spaces(tweet))
#negative data
data2.negative_tweets = data2.negative_tweets.apply(lambda tweet: remove_spaces(tweet))
#test data
data3.tweet = data3.tweet.apply(lambda tweet: remove_spaces(tweet))

In [36]:
#data without spaces

data1[2242:2252]

Unnamed: 0,positive_tweets
2242,omg its already 730
2243,juuuuuuuuuuuuuuuuussssst chillin
2244,handed in my uniform today i miss you already
2245,hmmmm i wonder how she my number
2246,thanks to all the haters up in my face all day...
2247,feeling strangely fine now im gonna go listen ...
2248,youre the only one who can see this cause no o...
2249,goodbye exams hello alcohol tonight
2250,uploading pictures on friendster
2251,so i wrote something last week and i got a cal...


In [37]:
#data containing acronyms

data1[2242:2249]

Unnamed: 0,positive_tweets
2242,omg its already 730
2243,juuuuuuuuuuuuuuuuussssst chillin
2244,handed in my uniform today i miss you already
2245,hmmmm i wonder how she my number
2246,thanks to all the haters up in my face all day...
2247,feeling strangely fine now im gonna go listen ...
2248,youre the only one who can see this cause no o...


In [38]:
#replace all acronyms with their translation

# Create a dictionary of acronym which will be used to get translations
acronym_dictionary = dict(zip(acronyms.Acronym, acronyms.Translation))

punctuation = '!"#$%&()*+,-./:;<=>?@[\\]^_`{}~'

# Frequency table for acronyms
acronyms_counter = Counter()

def acronym_to_translation(tweet, acronyms_counter):
    table = str.maketrans(punctuation," " * len(punctuation))
    tweet = str(tweet).translate(table)
    words = tweet.split()
    new_words = []
    for i, word in enumerate(words):
        if word in acronym_dictionary:
            acronyms_counter[word] += 1
            new_words.extend(acronym_dictionary[word].split())
        else:
            new_words.append(word)
    return new_words

#positive data
data1.positive_tweets = data1.positive_tweets.apply(lambda tweet: acronym_to_translation(tweet, acronyms_counter))
#negative data
data2.negative_tweets = data2.negative_tweets.apply(lambda tweet: acronym_to_translation(tweet, acronyms_counter))
#test data
data3.tweet = data3.tweet.apply(lambda tweet: acronym_to_translation(tweet, acronyms_counter))

In [39]:
#data with tokenization and replace acronym

data1[2242:2249]

Unnamed: 0,positive_tweets
2242,"[oh, my, god, its, already, 730]"
2243,"[juuuuuuuuuuuuuuuuussssst, relaxing]"
2244,"[handed, in, my, uniform, today, i, miss, you,..."
2245,"[hmmmm, i, wonder, how, she, my, number]"
2246,"[thanks, to, all, the, haters, up, in, my, fac..."
2247,"[feeling, strangely, fine, now, instant, messa..."
2248,"[youre, the, only, one, who, can, see, this, c..."


In [40]:
#replace sequence of repeated characters with two characters

pattern = re.compile(r'(.)\1*')

def reduce_sequence_word(word):
    return ''.join([match.group()[:2] if len(match.group()) > 2 else match.group() for match in pattern.finditer(word)])

def reduce_sequence_tweet(tweet):
    return [reduce_sequence_word(word) for word in tweet]

#positive data
data1.positive_tweets = data1.positive_tweets.apply(lambda tweet: reduce_sequence_tweet(tweet))
#negative data
data2.negative_tweets = data2.negative_tweets.apply(lambda tweet: reduce_sequence_tweet(tweet))
#test data
data3.tweet = data3.tweet.apply(lambda tweet: reduce_sequence_tweet(tweet))

In [41]:
#data with replace all sequence of words with two char's 

data1[2242:2249]

Unnamed: 0,positive_tweets
2242,"[oh, my, god, its, already, 730]"
2243,"[juusst, relaxing]"
2244,"[handed, in, my, uniform, today, i, miss, you,..."
2245,"[hmm, i, wonder, how, she, my, number]"
2246,"[thanks, to, all, the, haters, up, in, my, fac..."
2247,"[feeling, strangely, fine, now, instant, messa..."
2248,"[youre, the, only, one, who, can, see, this, c..."


In [42]:
#removing stop words

def remove_stops(row):
    my_list = row
    meaningful_words = [w for w in my_list if not w in stops]
    return (meaningful_words)

#positive data
data1.positive_tweets = data1.positive_tweets.apply(lambda tweet: remove_stops(tweet))
#negative data
data2.negative_tweets = data2.negative_tweets.apply(lambda tweet: remove_stops(tweet))
#test data
data3.tweet = data3.tweet.apply(lambda tweet: remove_stops(tweet))

In [43]:
#data without stopwords

data1[2242:2249]

Unnamed: 0,positive_tweets
2242,"[oh, god, already, 730]"
2243,"[juusst, relaxing]"
2244,"[handed, uniform, today, miss, already]"
2245,"[hmm, wonder, number]"
2246,"[thanks, haters, face, day, 112102]"
2247,"[feeling, strangely, fine, instant, message, g..."
2248,"[youre, one, see, cause, one, else, following,..."


In [44]:
#doing stemming of words

def stem_list(row):
    my_list = row
    stem_list = [stemming.stem(word) for word in my_list]
    return (stem_list)

#positive data
data1.positive_tweets = data1.positive_tweets.apply(lambda tweet: stem_list(tweet))
#negative data
data2.negative_tweets = data2.negative_tweets.apply(lambda tweet: stem_list(tweet))
#test data
data3.tweet = data3.tweet.apply(lambda tweet: stem_list(tweet))

In [45]:
#data with stemming

data1[2242:2249]

Unnamed: 0,positive_tweets
2242,"[oh, god, alreadi, 730]"
2243,"[juusst, relax]"
2244,"[hand, uniform, today, miss, alreadi]"
2245,"[hmm, wonder, number]"
2246,"[thank, hater, face, day, 112102]"
2247,"[feel, strang, fine, instant, messag, go, go, ..."
2248,"[your, one, see, caus, one, els, follow, your,..."


In [46]:
# remove remaining tokens that are not alphabetic

def remove_nonalpha(row):
    my_list = row
    remove_alpha = [word for word in my_list if word.isalpha()]
    return (remove_alpha)

#positive data
data1.positive_tweets = data1.positive_tweets.apply(lambda tweet: remove_nonalpha(tweet))
#negative data
data2.negative_tweets = data2.negative_tweets.apply(lambda tweet: remove_nonalpha(tweet))
#test data
data3.tweet = data3.tweet.apply(lambda tweet: remove_nonalpha(tweet))

In [47]:
#data without non alphabets values

data1[2242:2249]

Unnamed: 0,positive_tweets
2242,"[oh, god, alreadi]"
2243,"[juusst, relax]"
2244,"[hand, uniform, today, miss, alreadi]"
2245,"[hmm, wonder, number]"
2246,"[thank, hater, face, day]"
2247,"[feel, strang, fine, instant, messag, go, go, ..."
2248,"[your, one, see, caus, one, els, follow, your,..."


In [48]:
#insert all data into lists

data_pos_clean = []
data_neg_clean = []
data_test_clean = []

#positive list of tweets
for sent in data1.positive_tweets:
    data_pos_clean.append(sent)
    
#negative list of tweets
for sent in data2.negative_tweets:
    data_neg_clean.append(sent)

#test list of tweets
for sent in data3.tweet:
    data_test_clean.append(sent)

In [49]:
#test tweets for testing

data_test = data_neg_clean[0:200]

In [50]:
#insert each data in the dictionary with value = True

def get_tweets_for_model(cleaned_tokens_list):
    for tweet_tokens in cleaned_tokens_list:
        yield dict([token, True] for token in tweet_tokens)

#positive data
positive_tokens_for_model = get_tweets_for_model(data_pos_clean)
#negative data
negative_tokens_for_model = get_tweets_for_model(data_neg_clean)
#test data
test_tokens_for_model = get_tweets_for_model(data_test_clean)

In [51]:
#insert tag with 

import random

#insert positive tag with positive tweets
positive_dataset = [(tweet_dict, "Positive")
                     for tweet_dict in positive_tokens_for_model]
#insert negative tag with negative tweets
negative_dataset = [(tweet_dict, "Negative")
                     for tweet_dict in negative_tokens_for_model]

In [52]:
#data of positive dataset 

positive_dataset[2]

({'retweet': True, 'agre': True}, 'Positive')

In [53]:
#combine all tweets with each other

dataset = positive_dataset + negative_dataset

In [54]:
#shuffle them

random.shuffle(dataset)

In [55]:
#lenght of dataset

len(dataset)

32938

In [56]:
#creating training set of tweets

train_data = dataset[:25000]

In [57]:
#creating testing set of tweets

test_data = dataset[25000:32938]

In [65]:
#train model using naive bayes classifier

classifier = NaiveBayesClassifier.train(train_data)

In [59]:
#find its accuracy

print("Accuracy is:", classify.accuracy(classifier, test_data))

Accuracy is: 0.8830939783320736


In [60]:
#most informative features with their occurrence ratio of positive and negative tweets

print(classifier.show_most_informative_features(10))

Most Informative Features
                     blm = True           Positi : Negati =    168.6 : 1.0
                   bigot = True           Positi : Negati =    162.5 : 1.0
                  hispan = True           Positi : Negati =    144.1 : 1.0
              misogynist = True           Positi : Negati =    137.9 : 1.0
                altright = True           Positi : Negati =    131.8 : 1.0
                 libtard = True           Positi : Negati =    121.6 : 1.0
               antisemit = True           Positi : Negati =    119.6 : 1.0
                     jew = True           Positi : Negati =    119.6 : 1.0
                 boricua = True           Positi : Negati =    113.4 : 1.0
                   stomp = True           Positi : Negati =    101.2 : 1.0
None


In [61]:
#check sentiment of tweets using naive bayes trained model

for custom_tokens in data_test:
    print(classifier.classify(dict([token, True] for token in custom_tokens)))

Negative
Positive
Negative
Negative
Negative
Negative
Negative
Positive
Negative
Negative
Negative
Negative
Negative
Negative
Negative
Negative
Negative
Negative
Negative
Negative
Negative
Negative
Negative
Positive
Negative
Negative
Negative
Negative
Negative
Negative
Negative
Negative
Positive
Negative
Negative
Negative
Negative
Negative
Negative
Positive
Negative
Negative
Negative
Negative
Negative
Negative
Negative
Negative
Negative
Negative
Negative
Negative
Negative
Negative
Negative
Negative
Negative
Negative
Positive
Positive
Negative
Negative
Negative
Negative
Negative
Negative
Negative
Negative
Negative
Negative
Positive
Negative
Negative
Positive
Negative
Negative
Negative
Negative
Negative
Negative
Negative
Negative
Negative
Negative
Negative
Negative
Negative
Negative
Positive
Negative
Negative
Negative
Positive
Negative
Negative
Negative
Negative
Negative
Negative
Negative
Negative
Negative
Negative
Negative
Negative
Negative
Negative
Negative
Negative
Negative
Negative
N

In [62]:
#lower
#space remove
#urls
#emoticons
#html
#enicode
#@ targets
#punctuations
#acronyms
#sequence
#stem
#stopwords
#nonalpha


# custom_tweet = "i am so happy i love my life"
# custom_tweet = custom_tweet.lower()

# # custom_tweet = remove_spaces(custom_tweet)
# # custom_tweet = custom_tweet

# custom_tweet = remove_urls(custom_tweet)
# custom_tweet

# custom_tweet = strip_html_tags(custom_tweet)
# custom_tweet

# custom_tweet = remove_unicode(custom_tweet)
# custom_tweet

# custom_tweet = token = re.sub("(@[A-Za-z0-9_]+)","", custom_tweet)
# custom_tweet

# custom_tweet = remove_punctuations(custom_tweet)
# custom_tweet

# custom_tweet = remove_spaces(custom_tweet)
# custom_tweet

# custom_tweet = acronym_to_translation(custom_tweet, acronyms_counter)
# custom_tweet

# custom_tweet = reduce_sequence_tweet(custom_tweet)
# custom_tweet

# custom_tweet = remove_stops(custom_tweet)
# custom_tweet

# custom_tweet = stem_list(custom_tweet)
# custom_tweet

# custom_tweet = remove_nonalpha(custom_tweet)
# custom_tweet

In [63]:
# # custom_tokens = custom_tweet
# for custom_tokens in data_pos:
#     print(classifier.classify(dict([token, True] for token in custom_tokens)))