# Dependencies

In [None]:
!pip install binary gensim==3.6.0

# Libraries

In [1]:
import pandas as pd
import numpy as np
import csv
import multiprocessing
from gensim.models import Word2Vec
from gensim.models import KeyedVectors
from time import time  # To time our operations

#import logging  # Setting up the loggings to monitor gensim
#logging.basicConfig(format="%(levelname)s - %(asctime)s: %(message)s", datefmt= '%H:%M:%S', level=logging.INFO)

## Pipeline Training

In [None]:
cores = multiprocessing.cpu_count()   # Number of CPU cores used for training
t = time()  # Time of the process

for n in range(0, 9):
    for w in np.arange(0.0, 1.1, 0.1):
        w = round(w, 1)

        print('Training model (tuple of weight):', w)

        file_name = './austin-sl-tuple-n-itdl-' + str(n) + 'bin-wgt'+str(w)+'-p.csv'

        print("Loading file:", file_name)
        tuples = pd.read_csv(file_name, error_bad_lines=False)

        # Removing damaged rows
        tuples = tuples.dropna()

        # Using only the types of PoIs to create sentences in Word2Vec
        tuples = tuples[['center_poi', 'context_poi']]

        # Adapting to Word2Vec sentences
        sentencesTuples = tuples.values.tolist()

        # Creating skip-gram model
        p2v_modeltp = Word2Vec(min_count=1,
                                window=1,
                                sg=1,  # Skip-gram
                                size=70,
                                sample=6e-5,
                                alpha=0.03,
                                min_alpha=0.0007,
                                negative=20,
                                workers=cores-1)

        # Building vocabulary
        p2v_modeltp.build_vocab(sentencesTuples, progress_per=10000)

        # Training the model
        p2v_modeltp.train(sentencesTuples, total_examples=p2v_modeltp.corpus_count, epochs=15, report_delay=1)

        # Saving to a file
        model_name = './austin-sl-tuple-n-itdl-' + str(n) + 'bin-wgt'+str(w)+'-p.model'

        print('Saving file:', model_name)
        p2v_modeltp.save(model_name)

print('Process finished.')

In [None]:
p2v_modeltp.wv.most_similar(positive=['Parks'])