In [1]:
import pandas as pd
import numpy as np
import ast

In [2]:
save_file = "saved.csv"
small_df = pd.read_csv(save_file)

In [3]:
small_df['stemmed_tokens'] = pd.Series(small_df['stemmed_tokens'])
small_df['sentiment'] = pd.Series(small_df['sentiment'])

In [4]:
small_df.stemmed_tokens = small_df.stemmed_tokens.map(lambda x: ast.literal_eval(x))

# Implementing the Word2Vec Model

In this Notebook, an implementation of the Word2Vec original paper model will be presented,
to produce a vectorial space using the previously obtained stemmed tokens, as to allow a Random Forest decision tree algorithm
to classify tweets, based on their lexical content

To see more about an implementation of the Word2Vec model, access the [official tutorial from PyTorch](https://pytorch.org/tutorials/beginner/nlp/word_embeddings_tutorial.html).
If you'd like to read the original paper, access its [arxiv page](https://arxiv.org/abs/1301.3781).

In [None]:
from models import word2vec
import time
# Skip-gram model (sg = 1)
size = 400
window = 3
min_count = 1
workers = 3
sg = 1

model_file = 'word2vec.model'
start_time = time.time()
stemmed_tokens = small_df['stemmed_tokens']

w2v_model = Word2Vec(stemmed_tokens, vector_size=size, min_count = min_count, workers = workers, window = window, sg = sg)
print("Time taken to train word2vec model: " + str(time.time() - start_time))
w2v_model.save(model_file)

In [None]:
w2v_model.wv.key_to_index.keys()

In [62]:
w2v_model.wv.key_to_index.keys()

dict_keys(['to', 'unit', 'the', 'you', 'flight', 'for', 'on', 'and', 'jetblu', 'my', 'thank', 'southwestair', 'in', 'it', 'is', 'of', 'co', 'http', 'me', 'your', 'have', 'that', 'can', 'with', 'wa', 'at', 'get', 'be', 'thi', 'from', 'not', 'virginamerica', 'we', 'but', 'ar', 'just', 'no', 'so', 'do', 'help', 'servic', 'will', 'time', 'us', 'fly', 'now', 'delai', 'an', 'custom', 'our', 'usairwai', 'americanair', 'out', 'what', 'bag', 'there', 'thei', 'up', 'all', 'airlin', 'plane', 'if', 'would', 'hour', 'great', 'go', 'how', 'amp', 'cancel', 'when', 'pleas', 'need', 'seat', 'check', 'as', 'been', 'wait', 'book', 'gate', 'make', 'about', 'love', 'dai', 'or', 'am', 'ani', 'like', 'why', 'back', 'got', 'gui', 'still', 'ua', 'ha', 'ticket', 'todai', 'know', 'work', 'had', 'new', 'dm', 'chang', 'good', 'see', 'after', 'travel', 're', 'don', 'wai', 'board', 'miss', 'by', 'more', 'take', 'look', 'again', 'much', 'll', 'call', 'airport', 'did', 'late', 'agent', 'crew', 'try', 'last', 've', 'be