In [6]:
# Loading the reviews
import pandas as pd

labeled_train_review = pd.read_csv("./data/movie_reviews/labeledTrainData.tsv", header=0, delimiter="\t", quoting=3)
test_reviews = pd.read_csv("./data/movie_reviews/testData.tsv", header=0, delimiter="\t", quoting=3)
unlabeled_train_reviews = pd.read_csv("./data/movie_reviews/unlabeledTrainData.tsv", header=0,
                                      delimiter="\t", quoting=3)

# Verify the number of reviews that were read (100,000 in total)
print "labeled reviews: {}".format(labeled_train_review.shape)
print "test reviews: {}".format(test_reviews.shape)
print "unlabeled reviews: {}".format(unlabeled_train_reviews.shape)

print "\nlabeled reviews columns: {}".format(labeled_train_review.columns.values)


labeled reviews: (25000, 3)
test reviews: (25000, 2)
unlabeled reviews: (50000, 2)

labeled reviews columns: ['id' 'sentiment' 'review']


In [7]:
# Review to clean text
from bs4 import BeautifulSoup
import re
from nltk.corpus import stopwords

def review_to_wordlist(review, remove_stopwords=False):
    # Function to convert a document to a sequence of words,
    # optionally removing stop words.  Returns a list of words.
    #
    # 1. Remove HTML
    review_text = BeautifulSoup(review, 'html5').get_text()
    #  
    # 2. Remove non-letters
    review_text = re.sub("[^a-zA-Z]", " ", review_text)
    #
    # 3. Convert words to lower case and split them
    words = review_text.lower().split()
    #
    # 4. Optionally remove stop words (false by default)
    if remove_stopwords:
        stops = set(stopwords.words("english"))
        words = [w for w in words if w not in stops]
    #
    # 5. Return a list of words
    return words


In [8]:
# Review to list of sentences

def review_to_sentences(review, sentence_tokenizer, remove_stopwords=False):
    # Function to split a review into parsed sentences. Returns a 
    # list of sentences, where each sentence is a list of words
    #
    # 1. Use the NLTK tokenizer to split the paragraph into sentences
    raw_sentences = sentence_tokenizer.tokenize(review.decode('utf-8').strip())
    #
    # 2. Loop over each sentence
    review_sentences = []
    for raw_sentence in raw_sentences:
        # If a sentence is empty, skip it
        if len(raw_sentence) > 0:
            # Otherwise, call review_to_wordlist to get a list of words
            review_sentences.append(review_to_wordlist(raw_sentence, remove_stopwords))

    # Return the list of sentences (each sentence is a list of words,
    # so this returns a list of lists
    return review_sentences

In [9]:
# Review to list of sentences (word2vec expect to get the sentences separated)

import nltk

# NLTK has a tokenizer allow us to split text by sentences
sentence_tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')

sentences = []

print "Parsing sentences from labeled training set"
for review in labeled_train_review["review"]:
    sentences += review_to_sentences(review, sentence_tokenizer)

print "Parsing sentences from unlabeled set"
for review in unlabeled_train_reviews["review"]:
    sentences += review_to_sentences(review, sentence_tokenizer)


Parsing sentences from training set


Parsing sentences from unlabeled set


  ' that document to Beautiful Soup.' % decoded_markup


  ' that document to Beautiful Soup.' % decoded_markup


  ' that document to Beautiful Soup.' % decoded_markup


  ' Beautiful Soup.' % markup)


  ' that document to Beautiful Soup.' % decoded_markup


  ' that document to Beautiful Soup.' % decoded_markup


KeyboardInterrupt: 

In [None]:
# Word2vec training
from gensim.models import word2vec
import logging

logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

# Set values for various parameters
num_features = 300  # Word vector dimensionality                      
min_word_count = 40  # Minimum word count                        
num_workers = 4  # Number of threads to run in parallel
context = 10  # Context window size                                                                                    
downsampling = 1e-3  # Downsample setting for frequent words

model = word2vec.Word2Vec(sentences, workers=num_workers, size=num_features, min_count=min_word_count,
                          window=context, sample=downsampling)

# If you don't plan to train the model any further, calling 
# init_sims will make the model much more memory-efficient.
model.init_sims(replace=True)

# It can be helpful to create a meaningful model name and 
# save the model for later use. You can load it later using Word2Vec.load()
model_name = "{0}features_{1}minwords_{2}context".format(num_features, min_word_count, context)
model.save(model_name)

2018-03-03 11:59:37,611 : INFO : 'pattern' package not found; tag filters are not available for English


2018-03-03 11:59:37,614 : INFO : collecting all words and their counts


2018-03-03 11:59:37,615 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types


2018-03-03 11:59:37,668 : INFO : PROGRESS: at sentence #10000, processed 225803 words, keeping 17776 word types


2018-03-03 11:59:37,750 : INFO : PROGRESS: at sentence #20000, processed 451892 words, keeping 24948 word types


Training model...


2018-03-03 11:59:37,819 : INFO : PROGRESS: at sentence #30000, processed 671315 words, keeping 30034 word types


2018-03-03 11:59:37,886 : INFO : PROGRESS: at sentence #40000, processed 897815 words, keeping 34348 word types


2018-03-03 11:59:37,950 : INFO : PROGRESS: at sentence #50000, processed 1116963 words, keeping 37761 word types


2018-03-03 11:59:38,001 : INFO : PROGRESS: at sentence #60000, processed 1338404 words, keeping 40723 word types


2018-03-03 11:59:38,054 : INFO : PROGRESS: at sentence #70000, processed 1561580 words, keeping 43333 word types


2018-03-03 11:59:38,111 : INFO : PROGRESS: at sentence #80000, processed 1780887 words, keeping 45714 word types


2018-03-03 11:59:38,166 : INFO : PROGRESS: at sentence #90000, processed 2004996 words, keeping 48135 word types


2018-03-03 11:59:38,222 : INFO : PROGRESS: at sentence #100000, processed 2226967 words, keeping 50207 word types


2018-03-03 11:59:38,279 : INFO : PROGRESS: at sentence #110000, processed 2446581 words, keeping 52081 word types


2018-03-03 11:59:38,344 : INFO : PROGRESS: at sentence #120000, processed 2668776 words, keeping 54119 word types


2018-03-03 11:59:38,401 : INFO : PROGRESS: at sentence #130000, processed 2894304 words, keeping 55847 word types


2018-03-03 11:59:38,459 : INFO : PROGRESS: at sentence #140000, processed 3107006 words, keeping 57346 word types


2018-03-03 11:59:38,517 : INFO : PROGRESS: at sentence #150000, processed 3332628 words, keeping 59055 word types


2018-03-03 11:59:38,579 : INFO : PROGRESS: at sentence #160000, processed 3555316 words, keeping 60617 word types


2018-03-03 11:59:38,642 : INFO : PROGRESS: at sentence #170000, processed 3778656 words, keeping 62077 word types


2018-03-03 11:59:38,700 : INFO : PROGRESS: at sentence #180000, processed 3999237 words, keeping 63496 word types


2018-03-03 11:59:38,753 : INFO : PROGRESS: at sentence #190000, processed 4224450 words, keeping 64794 word types


2018-03-03 11:59:38,814 : INFO : PROGRESS: at sentence #200000, processed 4448604 words, keeping 66087 word types


2018-03-03 11:59:38,870 : INFO : PROGRESS: at sentence #210000, processed 4669968 words, keeping 67390 word types


2018-03-03 11:59:38,923 : INFO : PROGRESS: at sentence #220000, processed 4894969 words, keeping 68697 word types


2018-03-03 11:59:38,977 : INFO : PROGRESS: at sentence #230000, processed 5117546 words, keeping 69958 word types


2018-03-03 11:59:39,033 : INFO : PROGRESS: at sentence #240000, processed 5345051 words, keeping 71167 word types


2018-03-03 11:59:39,086 : INFO : PROGRESS: at sentence #250000, processed 5559166 words, keeping 72351 word types


2018-03-03 11:59:39,140 : INFO : PROGRESS: at sentence #260000, processed 5779147 words, keeping 73478 word types


2018-03-03 11:59:39,195 : INFO : PROGRESS: at sentence #270000, processed 6000436 words, keeping 74767 word types


2018-03-03 11:59:39,251 : INFO : PROGRESS: at sentence #280000, processed 6226315 words, keeping 76369 word types


2018-03-03 11:59:39,303 : INFO : PROGRESS: at sentence #290000, processed 6449475 words, keeping 77839 word types


2018-03-03 11:59:39,356 : INFO : PROGRESS: at sentence #300000, processed 6674078 words, keeping 79171 word types


2018-03-03 11:59:39,407 : INFO : PROGRESS: at sentence #310000, processed 6899392 words, keeping 80480 word types


2018-03-03 11:59:39,460 : INFO : PROGRESS: at sentence #320000, processed 7124279 words, keeping 81808 word types


2018-03-03 11:59:39,510 : INFO : PROGRESS: at sentence #330000, processed 7346022 words, keeping 83030 word types


2018-03-03 11:59:39,562 : INFO : PROGRESS: at sentence #340000, processed 7575534 words, keeping 84280 word types


2018-03-03 11:59:39,613 : INFO : PROGRESS: at sentence #350000, processed 7798804 words, keeping 85425 word types


2018-03-03 11:59:39,667 : INFO : PROGRESS: at sentence #360000, processed 8019467 words, keeping 86596 word types


2018-03-03 11:59:39,730 : INFO : PROGRESS: at sentence #370000, processed 8246659 words, keeping 87708 word types


2018-03-03 11:59:39,781 : INFO : PROGRESS: at sentence #380000, processed 8471806 words, keeping 88878 word types


2018-03-03 11:59:39,833 : INFO : PROGRESS: at sentence #390000, processed 8701556 words, keeping 89907 word types


2018-03-03 11:59:39,891 : INFO : PROGRESS: at sentence #400000, processed 8924505 words, keeping 90916 word types


2018-03-03 11:59:39,945 : INFO : PROGRESS: at sentence #410000, processed 9145855 words, keeping 91880 word types


2018-03-03 11:59:40,000 : INFO : PROGRESS: at sentence #420000, processed 9366935 words, keeping 92912 word types


2018-03-03 11:59:40,058 : INFO : PROGRESS: at sentence #430000, processed 9594472 words, keeping 93932 word types


2018-03-03 11:59:40,116 : INFO : PROGRESS: at sentence #440000, processed 9821225 words, keeping 94906 word types


2018-03-03 11:59:40,172 : INFO : PROGRESS: at sentence #450000, processed 10044987 words, keeping 96036 word types


2018-03-03 11:59:40,233 : INFO : PROGRESS: at sentence #460000, processed 10277747 words, keeping 97088 word types


2018-03-03 11:59:40,288 : INFO : PROGRESS: at sentence #470000, processed 10505672 words, keeping 97933 word types


2018-03-03 11:59:40,344 : INFO : PROGRESS: at sentence #480000, processed 10726056 words, keeping 98862 word types


2018-03-03 11:59:40,399 : INFO : PROGRESS: at sentence #490000, processed 10952800 words, keeping 99871 word types


2018-03-03 11:59:40,455 : INFO : PROGRESS: at sentence #500000, processed 11174456 words, keeping 100765 word types


2018-03-03 11:59:40,513 : INFO : PROGRESS: at sentence #510000, processed 11399731 words, keeping 101699 word types


2018-03-03 11:59:40,568 : INFO : PROGRESS: at sentence #520000, processed 11623082 words, keeping 102598 word types


2018-03-03 11:59:40,626 : INFO : PROGRESS: at sentence #530000, processed 11847480 words, keeping 103400 word types


2018-03-03 11:59:40,679 : INFO : PROGRESS: at sentence #540000, processed 12072095 words, keeping 104265 word types


2018-03-03 11:59:40,737 : INFO : PROGRESS: at sentence #550000, processed 12297646 words, keeping 105133 word types


2018-03-03 11:59:40,797 : INFO : PROGRESS: at sentence #560000, processed 12518936 words, keeping 105997 word types


2018-03-03 11:59:40,855 : INFO : PROGRESS: at sentence #570000, processed 12748083 words, keeping 106787 word types


2018-03-03 11:59:40,909 : INFO : PROGRESS: at sentence #580000, processed 12969579 words, keeping 107665 word types


2018-03-03 11:59:40,971 : INFO : PROGRESS: at sentence #590000, processed 13195104 words, keeping 108501 word types


2018-03-03 11:59:41,025 : INFO : PROGRESS: at sentence #600000, processed 13417302 words, keeping 109218 word types


2018-03-03 11:59:41,076 : INFO : PROGRESS: at sentence #610000, processed 13638325 words, keeping 110092 word types


2018-03-03 11:59:41,132 : INFO : PROGRESS: at sentence #620000, processed 13864650 words, keeping 110837 word types


2018-03-03 11:59:41,185 : INFO : PROGRESS: at sentence #630000, processed 14088936 words, keeping 111610 word types


2018-03-03 11:59:41,242 : INFO : PROGRESS: at sentence #640000, processed 14309719 words, keeping 112416 word types


2018-03-03 11:59:41,294 : INFO : PROGRESS: at sentence #650000, processed 14535475 words, keeping 113196 word types


2018-03-03 11:59:41,344 : INFO : PROGRESS: at sentence #660000, processed 14758265 words, keeping 113945 word types


2018-03-03 11:59:41,395 : INFO : PROGRESS: at sentence #670000, processed 14981658 words, keeping 114643 word types


2018-03-03 11:59:41,447 : INFO : PROGRESS: at sentence #680000, processed 15206490 words, keeping 115354 word types


2018-03-03 11:59:41,498 : INFO : PROGRESS: at sentence #690000, processed 15428683 words, keeping 116131 word types


2018-03-03 11:59:41,555 : INFO : PROGRESS: at sentence #700000, processed 15657389 words, keeping 116943 word types


2018-03-03 11:59:41,610 : INFO : PROGRESS: at sentence #710000, processed 15880378 words, keeping 117596 word types


2018-03-03 11:59:41,664 : INFO : PROGRESS: at sentence #720000, processed 16105665 words, keeping 118221 word types


2018-03-03 11:59:41,722 : INFO : PROGRESS: at sentence #730000, processed 16332046 words, keeping 118954 word types


2018-03-03 11:59:41,780 : INFO : PROGRESS: at sentence #740000, processed 16553079 words, keeping 119668 word types


2018-03-03 11:59:41,833 : INFO : PROGRESS: at sentence #750000, processed 16771406 words, keeping 120295 word types


2018-03-03 11:59:41,888 : INFO : PROGRESS: at sentence #760000, processed 16990810 words, keeping 120930 word types


2018-03-03 11:59:41,943 : INFO : PROGRESS: at sentence #770000, processed 17217947 words, keeping 121703 word types


2018-03-03 11:59:42,000 : INFO : PROGRESS: at sentence #780000, processed 17448093 words, keeping 122402 word types


2018-03-03 11:59:42,055 : INFO : PROGRESS: at sentence #790000, processed 17675169 words, keeping 123066 word types


2018-03-03 11:59:42,084 : INFO : collected 123504 word types from a corpus of 17798270 raw words and 795538 sentences


2018-03-03 11:59:42,085 : INFO : Loading a fresh vocabulary


2018-03-03 11:59:42,205 : INFO : min_count=40 retains 16490 unique words (13% of original 123504, drops 107014)


2018-03-03 11:59:42,207 : INFO : min_count=40 leaves 17239125 word corpus (96% of original 17798270, drops 559145)


2018-03-03 11:59:42,251 : INFO : deleting the raw counts dictionary of 123504 items


2018-03-03 11:59:42,257 : INFO : sample=0.001 downsamples 48 most-common words


2018-03-03 11:59:42,260 : INFO : downsampling leaves estimated 12749798 word corpus (74.0% of prior 17239125)


2018-03-03 11:59:42,318 : INFO : estimated required memory for 16490 words and 300 dimensions: 47821000 bytes


2018-03-03 11:59:42,319 : INFO : resetting layer weights


2018-03-03 11:59:42,529 : INFO : training model with 4 workers on 16490 vocabulary and 300 features, using sg=0 hs=0 sample=0.001 negative=5 window=10


2018-03-03 11:59:43,538 : INFO : EPOCH 1 - PROGRESS: at 7.30% examples, 928223 words/s, in_qsize 7, out_qsize 0


2018-03-03 11:59:44,543 : INFO : EPOCH 1 - PROGRESS: at 14.89% examples, 940946 words/s, in_qsize 8, out_qsize 0


2018-03-03 11:59:45,543 : INFO : EPOCH 1 - PROGRESS: at 22.26% examples, 937090 words/s, in_qsize 8, out_qsize 0


2018-03-03 11:59:46,560 : INFO : EPOCH 1 - PROGRESS: at 29.76% examples, 938383 words/s, in_qsize 7, out_qsize 0


2018-03-03 11:59:47,573 : INFO : EPOCH 1 - PROGRESS: at 37.28% examples, 938365 words/s, in_qsize 7, out_qsize 0


2018-03-03 11:59:48,581 : INFO : EPOCH 1 - PROGRESS: at 44.73% examples, 939233 words/s, in_qsize 8, out_qsize 0


2018-03-03 11:59:49,586 : INFO : EPOCH 1 - PROGRESS: at 51.92% examples, 936509 words/s, in_qsize 7, out_qsize 1


2018-03-03 11:59:50,586 : INFO : EPOCH 1 - PROGRESS: at 59.28% examples, 938161 words/s, in_qsize 8, out_qsize 0


2018-03-03 11:59:51,591 : INFO : EPOCH 1 - PROGRESS: at 66.39% examples, 934363 words/s, in_qsize 7, out_qsize 0


2018-03-03 11:59:52,591 : INFO : EPOCH 1 - PROGRESS: at 73.55% examples, 932567 words/s, in_qsize 8, out_qsize 0


2018-03-03 11:59:53,598 : INFO : EPOCH 1 - PROGRESS: at 80.76% examples, 930396 words/s, in_qsize 8, out_qsize 0


2018-03-03 11:59:54,603 : INFO : EPOCH 1 - PROGRESS: at 87.74% examples, 927047 words/s, in_qsize 7, out_qsize 0


2018-03-03 11:59:55,605 : INFO : EPOCH 1 - PROGRESS: at 94.77% examples, 924379 words/s, in_qsize 8, out_qsize 0


2018-03-03 11:59:56,321 : INFO : worker thread finished; awaiting finish of 3 more threads


2018-03-03 11:59:56,327 : INFO : worker thread finished; awaiting finish of 2 more threads


2018-03-03 11:59:56,335 : INFO : worker thread finished; awaiting finish of 1 more threads


2018-03-03 11:59:56,339 : INFO : worker thread finished; awaiting finish of 0 more threads


2018-03-03 11:59:56,341 : INFO : EPOCH - 1 : training on 17798270 raw words (12751842 effective words) took 13.8s, 923716 effective words/s


2018-03-03 11:59:57,353 : INFO : EPOCH 2 - PROGRESS: at 6.90% examples, 872907 words/s, in_qsize 7, out_qsize 0


2018-03-03 11:59:58,358 : INFO : EPOCH 2 - PROGRESS: at 14.11% examples, 887803 words/s, in_qsize 8, out_qsize 0


2018-03-03 11:59:59,370 : INFO : EPOCH 2 - PROGRESS: at 21.24% examples, 888705 words/s, in_qsize 7, out_qsize 0


2018-03-03 12:00:00,373 : INFO : EPOCH 2 - PROGRESS: at 28.31% examples, 891070 words/s, in_qsize 8, out_qsize 0


2018-03-03 12:00:01,382 : INFO : EPOCH 2 - PROGRESS: at 35.50% examples, 892552 words/s, in_qsize 6, out_qsize 0


2018-03-03 12:00:02,384 : INFO : EPOCH 2 - PROGRESS: at 42.52% examples, 893840 words/s, in_qsize 8, out_qsize 0


2018-03-03 12:00:03,385 : INFO : EPOCH 2 - PROGRESS: at 49.66% examples, 896884 words/s, in_qsize 8, out_qsize 0


2018-03-03 12:00:04,388 : INFO : EPOCH 2 - PROGRESS: at 56.88% examples, 899643 words/s, in_qsize 8, out_qsize 1


2018-03-03 12:00:05,393 : INFO : EPOCH 2 - PROGRESS: at 63.67% examples, 896072 words/s, in_qsize 7, out_qsize 0


2018-03-03 12:00:06,397 : INFO : EPOCH 2 - PROGRESS: at 70.82% examples, 897666 words/s, in_qsize 7, out_qsize 0


2018-03-03 12:00:07,415 : INFO : EPOCH 2 - PROGRESS: at 77.94% examples, 897660 words/s, in_qsize 7, out_qsize 1


2018-03-03 12:00:08,410 : INFO : EPOCH 2 - PROGRESS: at 84.94% examples, 897339 words/s, in_qsize 7, out_qsize 0


2018-03-03 12:00:09,415 : INFO : EPOCH 2 - PROGRESS: at 91.92% examples, 896810 words/s, in_qsize 8, out_qsize 0


2018-03-03 12:00:10,421 : INFO : EPOCH 2 - PROGRESS: at 99.05% examples, 897223 words/s, in_qsize 7, out_qsize 0


2018-03-03 12:00:10,539 : INFO : worker thread finished; awaiting finish of 3 more threads


2018-03-03 12:00:10,547 : INFO : worker thread finished; awaiting finish of 2 more threads


2018-03-03 12:00:10,554 : INFO : worker thread finished; awaiting finish of 1 more threads


2018-03-03 12:00:10,559 : INFO : worker thread finished; awaiting finish of 0 more threads


2018-03-03 12:00:10,560 : INFO : EPOCH - 2 : training on 17798270 raw words (12751602 effective words) took 14.2s, 896998 effective words/s


2018-03-03 12:00:11,568 : INFO : EPOCH 3 - PROGRESS: at 6.68% examples, 846923 words/s, in_qsize 8, out_qsize 0


2018-03-03 12:00:12,577 : INFO : EPOCH 3 - PROGRESS: at 13.82% examples, 872023 words/s, in_qsize 8, out_qsize 0


2018-03-03 12:00:13,579 : INFO : EPOCH 3 - PROGRESS: at 20.95% examples, 879834 words/s, in_qsize 8, out_qsize 0


2018-03-03 12:00:14,582 : INFO : EPOCH 3 - PROGRESS: at 28.08% examples, 886256 words/s, in_qsize 7, out_qsize 0


2018-03-03 12:00:15,592 : INFO : EPOCH 3 - PROGRESS: at 35.21% examples, 887255 words/s, in_qsize 7, out_qsize 0


2018-03-03 12:00:16,600 : INFO : EPOCH 3 - PROGRESS: at 42.30% examples, 889681 words/s, in_qsize 7, out_qsize 0


2018-03-03 12:00:17,607 : INFO : EPOCH 3 - PROGRESS: at 49.55% examples, 894407 words/s, in_qsize 8, out_qsize 0


2018-03-03 12:00:18,617 : INFO : EPOCH 3 - PROGRESS: at 56.66% examples, 895044 words/s, in_qsize 7, out_qsize 0


2018-03-03 12:00:19,622 : INFO : EPOCH 3 - PROGRESS: at 63.78% examples, 896787 words/s, in_qsize 8, out_qsize 0


2018-03-03 12:00:20,628 : INFO : EPOCH 3 - PROGRESS: at 70.71% examples, 895299 words/s, in_qsize 7, out_qsize 0


2018-03-03 12:00:21,634 : INFO : EPOCH 3 - PROGRESS: at 77.72% examples, 894693 words/s, in_qsize 7, out_qsize 0


2018-03-03 12:00:22,634 : INFO : EPOCH 3 - PROGRESS: at 84.67% examples, 893996 words/s, in_qsize 7, out_qsize 0


2018-03-03 12:00:23,652 : INFO : EPOCH 3 - PROGRESS: at 91.76% examples, 893945 words/s, in_qsize 8, out_qsize 0


2018-03-03 12:00:24,654 : INFO : EPOCH 3 - PROGRESS: at 98.77% examples, 893903 words/s, in_qsize 7, out_qsize 0


2018-03-03 12:00:24,819 : INFO : worker thread finished; awaiting finish of 3 more threads


2018-03-03 12:00:24,825 : INFO : worker thread finished; awaiting finish of 2 more threads


2018-03-03 12:00:24,831 : INFO : worker thread finished; awaiting finish of 1 more threads


2018-03-03 12:00:24,835 : INFO : worker thread finished; awaiting finish of 0 more threads


2018-03-03 12:00:24,836 : INFO : EPOCH - 3 : training on 17798270 raw words (12751683 effective words) took 14.3s, 893394 effective words/s


2018-03-03 12:00:25,850 : INFO : EPOCH 4 - PROGRESS: at 6.62% examples, 834220 words/s, in_qsize 7, out_qsize 0


2018-03-03 12:00:26,857 : INFO : EPOCH 4 - PROGRESS: at 13.77% examples, 864890 words/s, in_qsize 6, out_qsize 0


2018-03-03 12:00:27,869 : INFO : EPOCH 4 - PROGRESS: at 21.07% examples, 880673 words/s, in_qsize 7, out_qsize 0


2018-03-03 12:00:28,869 : INFO : EPOCH 4 - PROGRESS: at 28.03% examples, 881902 words/s, in_qsize 8, out_qsize 1


2018-03-03 12:00:29,873 : INFO : EPOCH 4 - PROGRESS: at 35.21% examples, 886107 words/s, in_qsize 8, out_qsize 0


2018-03-03 12:00:30,874 : INFO : EPOCH 4 - PROGRESS: at 42.24% examples, 888660 words/s, in_qsize 7, out_qsize 0


2018-03-03 12:00:31,876 : INFO : EPOCH 4 - PROGRESS: at 49.33% examples, 891239 words/s, in_qsize 8, out_qsize 0


2018-03-03 12:00:32,878 : INFO : EPOCH 4 - PROGRESS: at 56.49% examples, 893936 words/s, in_qsize 6, out_qsize 0


2018-03-03 12:00:33,881 : INFO : EPOCH 4 - PROGRESS: at 63.39% examples, 892834 words/s, in_qsize 8, out_qsize 0


2018-03-03 12:00:34,890 : INFO : EPOCH 4 - PROGRESS: at 70.16% examples, 889370 words/s, in_qsize 7, out_qsize 0


2018-03-03 12:00:35,893 : INFO : EPOCH 4 - PROGRESS: at 77.32% examples, 891557 words/s, in_qsize 8, out_qsize 0


2018-03-03 12:00:36,916 : INFO : EPOCH 4 - PROGRESS: at 84.34% examples, 890436 words/s, in_qsize 8, out_qsize 2


2018-03-03 12:00:37,913 : INFO : EPOCH 4 - PROGRESS: at 91.32% examples, 890526 words/s, in_qsize 6, out_qsize 0


2018-03-03 12:00:38,920 : INFO : EPOCH 4 - PROGRESS: at 98.34% examples, 890768 words/s, in_qsize 7, out_qsize 1


2018-03-03 12:00:39,142 : INFO : worker thread finished; awaiting finish of 3 more threads


2018-03-03 12:00:39,146 : INFO : worker thread finished; awaiting finish of 2 more threads


2018-03-03 12:00:39,156 : INFO : worker thread finished; awaiting finish of 1 more threads


2018-03-03 12:00:39,162 : INFO : worker thread finished; awaiting finish of 0 more threads


2018-03-03 12:00:39,164 : INFO : EPOCH - 4 : training on 17798270 raw words (12751038 effective words) took 14.3s, 890154 effective words/s


2018-03-03 12:00:40,181 : INFO : EPOCH 5 - PROGRESS: at 6.74% examples, 845890 words/s, in_qsize 8, out_qsize 0


2018-03-03 12:00:41,185 : INFO : EPOCH 5 - PROGRESS: at 13.88% examples, 871750 words/s, in_qsize 8, out_qsize 0


2018-03-03 12:00:42,186 : INFO : EPOCH 5 - PROGRESS: at 20.90% examples, 876390 words/s, in_qsize 7, out_qsize 0


2018-03-03 12:00:43,202 : INFO : EPOCH 5 - PROGRESS: at 28.03% examples, 880595 words/s, in_qsize 8, out_qsize 1


2018-03-03 12:00:44,205 : INFO : EPOCH 5 - PROGRESS: at 35.16% examples, 884057 words/s, in_qsize 7, out_qsize 0


2018-03-03 12:00:45,207 : INFO : EPOCH 5 - PROGRESS: at 42.08% examples, 884329 words/s, in_qsize 8, out_qsize 0


2018-03-03 12:00:46,211 : INFO : EPOCH 5 - PROGRESS: at 49.10% examples, 886130 words/s, in_qsize 7, out_qsize 0


2018-03-03 12:00:47,217 : INFO : EPOCH 5 - PROGRESS: at 56.00% examples, 884719 words/s, in_qsize 8, out_qsize 0


2018-03-03 12:00:48,227 : INFO : EPOCH 5 - PROGRESS: at 63.10% examples, 887048 words/s, in_qsize 7, out_qsize 0


2018-03-03 12:00:49,235 : INFO : EPOCH 5 - PROGRESS: at 70.05% examples, 886506 words/s, in_qsize 8, out_qsize 2


2018-03-03 12:00:50,248 : INFO : EPOCH 5 - PROGRESS: at 77.10% examples, 887246 words/s, in_qsize 7, out_qsize 1


2018-03-03 12:00:51,247 : INFO : EPOCH 5 - PROGRESS: at 84.23% examples, 888601 words/s, in_qsize 8, out_qsize 0


2018-03-03 12:00:52,251 : INFO : EPOCH 5 - PROGRESS: at 91.43% examples, 890890 words/s, in_qsize 7, out_qsize 0


2018-03-03 12:00:53,255 : INFO : EPOCH 5 - PROGRESS: at 98.61% examples, 892442 words/s, in_qsize 7, out_qsize 0


2018-03-03 12:00:53,443 : INFO : worker thread finished; awaiting finish of 3 more threads


2018-03-03 12:00:53,448 : INFO : worker thread finished; awaiting finish of 2 more threads


2018-03-03 12:00:53,459 : INFO : worker thread finished; awaiting finish of 1 more threads


2018-03-03 12:00:53,467 : INFO : worker thread finished; awaiting finish of 0 more threads


2018-03-03 12:00:53,469 : INFO : EPOCH - 5 : training on 17798270 raw words (12750957 effective words) took 14.3s, 891496 effective words/s


2018-03-03 12:00:53,471 : INFO : training on a 88991350 raw words (63757122 effective words) took 70.9s, 898747 effective words/s


2018-03-03 12:00:53,472 : INFO : precomputing L2-norms of word weight vectors


2018-03-03 12:00:53,623 : INFO : saving Word2Vec object under 300features_40minwords_10context, separately None


2018-03-03 12:00:53,624 : INFO : not storing attribute vectors_norm


2018-03-03 12:00:53,626 : INFO : not storing attribute cum_table


2018-03-03 12:00:53,826 : INFO : saved 300features_40minwords_10context


In [1]:
# Here we can test our model

# Sentiment analysis
model.most_similar("good")

NameError: name 'model' is not defined