In [1]:
import sys
!conda install --yes --prefix {sys.prefix} -c anaconda nltk

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

## Package Plan ##

  environment location: D:\Anaconda3

  added / updated specs:
    - nltk


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    conda-4.7.12               |           py37_0         3.0 MB  anaconda
    nltk-3.4.4                 |           py37_0         2.1 MB  anaconda
    ------------------------------------------------------------
                                           Total:         5.1 MB

The following packages will be SUPERSEDED by a higher-priority channel:

  conda                                         conda-forge --> anaconda
  nltk                                            pkgs/main --> anaconda



Downloading and Extracting Packages

nltk-3.4.4           | 2.1 MB    |            |   0% 
nltk-3.4.4           | 2.1 MB    |            |   1% 

In [1]:
import nltk
import os
# uncomment to download nltk library
#nltk.download()
os.environ["NLTK_DATA"] = "D:\nltk_data"

In [1]:
# this section gets the initial data frame from the sentiment140 file

import pandas as pd
import numpy as np
# load the sentiment 140 dataset to pandas

sentiment140dataset = "testdata.manual.2009.06.14.csv"

s140tweets = pd.read_csv(sentiment140dataset)

s140tweets.drop("id", axis=1, inplace=True)
s140tweets.drop("date", axis=1, inplace=True)
s140tweets.drop("topic", axis=1, inplace=True)
s140tweets.drop("account", axis=1, inplace=True)

s140tweets.head()

Unnamed: 0,emotion,post
0,4,@stellargirl I loooooooovvvvvveee my Kindle2. ...
1,4,Reading my kindle2... Love it... Lee childs i...
2,4,"Ok, first assesment of the #kindle2 ...it fuck..."
3,4,@kenburbary You'll love your Kindle2. I've had...
4,4,@mikefish Fair enough. But i have the Kindle2...


In [32]:
# this section gets the data prepared to be put through the classifier

# this gives the (post,emotion) tuple
def translate_dataframe_to_post_format(frame):
    posts = []
    for index, row in frame.iterrows():
        words = row['post']
        post = [e.lower() for e in words.split() if len(e) >= 3]
        posts.append((post, row['emotion']))
    return posts

def get_words_in_tweets(posts):
    all_words = []
    for (words, sentiment) in posts:
      all_words.extend(words)
    return all_words

def get_word_features(wordlist):
    wordlist = nltk.FreqDist(wordlist)
    word_features = wordlist.keys()
    return word_features

posts = translate_dataframe_to_post_format(s140tweets)

words = get_words_in_tweets(posts)
word_features = get_word_features(words)

print(word_features)

dict_keys(['@stellargirl', 'loooooooovvvvvveee', 'kindle2.', 'not', 'that', 'the', 'cool,', 'but', 'fantastic', 'its', 'own', 'right.', 'reading', 'kindle2...', 'love', 'it...', 'lee', 'childs', 'good', 'read.', 'ok,', 'first', 'assesment', '#kindle2', '...it', 'fucking', 'rocks!!!', '@kenburbary', "you'll", 'your', "i've", 'had', 'mine', 'for', 'few', 'months', 'and', 'never', 'looked', 'back.', 'new', 'big', 'one', 'huge!', 'need', 'remorse!', '@mikefish', 'fair', 'enough.', 'have', 'kindle2', 'think', "it's", 'perfect', '@richardebaker', 'no.', 'too', 'big.', "i'm", 'quite', 'happy', 'with', 'fuck', 'this', 'economy.', 'hate', 'aig', 'their', 'non', 'loan', 'given', 'asses.', 'jquery', 'best', 'friend.', 'loves', 'twitter', 'how', 'can', 'you', 'obama?', 'makes', 'jokes', 'about', 'himself.', 'check', 'video', 'out', 'president', 'obama', 'white', 'house', "correspondents'", 'dinner', 'http://bit.ly/imxum', '@karoli', 'firmly', 'believe', 'obama/pelosi', 'zero', 'desire', 'civil.', 

In [38]:
# this section builds the classifier

def extract_features(document):
    document_words = set(document)
    features = {}
    for word in word_features:
        features['contains(%s)' % word] = (word in document_words)
    return features

training_set = nltk.classify.apply_features(extract_features, posts)

classifier = nltk.NaiveBayesClassifier.train(training_set)

In [39]:
# this section shows some information about the classifier

print(classifier.show_most_informative_features(32))

Most Informative Features
          contains(hate) = True                0 : 4      =     10.6 : 1.0
        contains(jquery) = True                2 : 0      =     10.6 : 1.0
          contains(time) = True                0 : 4      =      9.8 : 1.0
           contains(not) = True                0 : 2      =      9.2 : 1.0
          contains(love) = True                4 : 0      =      8.8 : 1.0
        contains(warner) = True                0 : 2      =      7.7 : 1.0
        contains(lebron) = True                4 : 2      =      6.4 : 1.0
         contains(great) = True                4 : 2      =      6.4 : 1.0
          contains(good) = True                4 : 2      =      5.7 : 1.0
           contains(his) = True                4 : 0      =      5.5 : 1.0
           contains(are) = True                0 : 2      =      5.5 : 1.0
           contains(one) = True                4 : 0      =      4.9 : 1.0
           contains(san) = True                2 : 4      =      4.8 : 1.0

In [45]:
# this section shows some test tweets that convey how fragile the classifier is

tweet = 'Larry is my friend'
print(classifier.classify(extract_features(tweet.split())))

tweet2 = 'Larry is my worst friend'
print(classifier.classify(extract_features(tweet2.split())))

tweet3 = 'Larry is not my friend'
print(classifier.classify(extract_features(tweet3.split())))

4
0
4


In [52]:
reddit_posts_file = "test-posts.xlsx"

reddit_posts = pd.read_excel(reddit_posts_file)

posts = []
for index, row in reddit_posts.iterrows():
    posts.append(row['post'])
    
for post in posts:
    print("emotion: " + str(classifier.classify(extract_features(post.split()))) + ", post: " + str(post))

emotion: 4, post: Local multiplayer had people with different tools out performing different tasks. This is great to hear! As it was pretty gimped before.
emotion: 0, post: That darn pole is something I want but didn’t even know I wanted...
emotion: 0, post: We can sell weeds now! Finally they won't be a chore to pick!
emotion: 0, post: This might be a weird desire, but I'd like to be able to keep photos that I've taken in-game displayed somewhere in my lil home or tent
emotion: 4, post: I know there are a ton of comments and mine is probably going to get buried, but did anyone notice when Tom Nook was talking in the beginning there was a cloud’s shadow floating by? It’s the little attention to details like this that make me SO excited for the game. It really is beautiful. Albeit, different from the others. It has its own charm. (secretly hoping Celeste is back with her telescope for constellations like in WW when we saw the character sitting outside at night looking at the sky)
emotio