In [47]:
import nltk

In [48]:
from pymongo import MongoClient
client = MongoClient('localhost', 27017)
db = client.local

In [49]:
trainData = db.trainData
trainTweets = []
for tweet in trainData.find():
    pair = ()
    pair = pair + (tweet['processed_words'], tweet['category'])
    trainTweets.append(pair)

In [50]:
with open('opinion-lexicon-English/positive-words.txt') as pos_words:
    positive_words = [word.strip('\n') for word in pos_words.readlines()]
with open('opinion-lexicon-English/negative-words.txt') as neg_words:
    negative_words = [word.strip('\n') for word in neg_words.readlines()]

In [51]:
def polarity_ratio(tweet_word_list):
    num_pos = 0
    num_neg = 0
    for word in tweet_word_list:
        if str(word) in positive_words:
            num_pos += 1
        if str(word) in negative_words:
            num_neg += 1
    return {'num_pos': num_pos, 'num_neg': num_neg}

In [52]:
def validate_polarity(classification, ratio):
    if classification == 'positive':
        if ratio['num_pos'] > ratio['num_neg']:
            return True
    elif classification == 'negative':
        if ratio['num_neg'] > ratio['num_pos']:
            return True
    elif classification == 'neutral':
        if ratio['num_pos'] == ratio['num_neg']:
            return True
    else:
        return False

In [53]:
def get_words_in_tweets(tweets):
    allWords = []
    for (processed_words, category) in tweets:
        allWords.extend(processed_words)
    return allWords

In [54]:
def get_word_features(wordList):
    wordList = nltk.FreqDist(wordList)
    wordFeatures = wordList.keys()
    return wordFeatures

In [55]:
word_features = get_word_features(get_words_in_tweets(trainTweets))

In [56]:
def extract_features(document):
    document_words = set(document)
    features = {}
    for word in word_features:
        features['contains(%s)' % word] = (word in document_words)
    return features

In [57]:
training_set = nltk.classify.apply_features(extract_features, trainTweets)

In [58]:
# Train a Naive Bayes classifier with training data
classifier = nltk.NaiveBayesClassifier.train(training_set)

In [72]:
#print classifier.show_most_informative_features(10)

In [82]:
testData = db.testTweets
testTweets = []
for tweet in testData.find():
    testTweets.append({'processed_words': tweet['processed_words'],
                       'text': tweet['text']})

In [79]:
def classify(tweet):
    classification = classifier.classify(extract_features(tweet))
    ratio = polarity_ratio(tweet)
    final_sentiment = ""

    # Does the ratio of positive to negative words match the classifier output?
    if validate_polarity(classification, ratio):
        final_sentiment = classification
    # If neutral and polarity was not validated, set final sentiment to sentiment with higher word count
    else:
        if ratio['num_pos'] > ratio['num_neg']:
            final_sentiment = 'positive'
        elif ratio['num_pos'] < ratio['num_neg']:
            final_sentiment = 'negative'
        else:
            final_sentiment = 'neutral'  
    
    return final_sentiment

In [85]:
'''
for i in range(800,830):
    print(testTweets[i]['text'])
    print(classify(testTweets[i]['processed_words']))
'''

@coastaldann @SouthwestAir I just stopped booking with them 💁🏻
neutral
@CharlieWSM @SouthwestAir We're flying SWA to Reagan in March, so it is possible. You need to have a talk with Jean… https://t.co/esTBpqI2P5
neutral
Thanks Zach and @SouthwestAir team for a great flight from BWI to Denver! #smoothholidaytravel… https://t.co/aWlsiE4gos
positive
First time flying with you guys . I don't want no huff shit going down and the pilot better be cool with me flying the plane. @SouthwestAir
positive
You haven't met Drill Diaz yet huh!!! https://t.co/hBg04paPjR
neutral
@SouthwestAir I was quick to criticize, but I also want to say thank you for quickly finding an item I left on boar… https://t.co/Apjw2ObiGQ
neutral
@southwestair Been having an issue since the 8th of October. My golf clubs are gone. I’ve been getting the run arou… https://t.co/dFUcra3iRQ
negative
@southwestair Pls find a way to reward the flight crew, Pilot &amp; FO on  1842 MDW &gt; BWI tonight. They took everything… https://t