# PREPROCESSING

In [1]:
# processes a textfile according with skip-gram algorithm
# returns a list in the following format [[7,4],[42,44], ...]

from preprocessing import Preprocess
preprocess = Preprocess()

# TRAINING

In [None]:
# best trainable after extending the data_rate_limit
# use terminal cmd w/ MAC: "jupyter notebook --NotebookApp.iopub_data_rate_limit=10000000000"

import torch
from torch.utils.data import Dataset, DataLoader
from tensorboardX import SummaryWriter
from tqdm import tqdm

from dataset import LoadedDataSet
from model import Word2Vec
from utils import AverageMeter

dimensionSize = 200
num_epochs = 50
lr = 0.001
batchSize = 80

model = Word2Vec(preprocess.vocabulary_size, dimensionSize)
optimizer = torch.optim.Adam(model.parameters(), lr)
writer = SummaryWriter()
losses = AverageMeter()

# print('total amount of batches {}'.format(len(idx_pairs) / batchSize))
for e in range(num_epochs):
    losses.reset()

    dataset = LoadedDataSet(preprocess.idx_pairs)
    loader = DataLoader(dataset, batchSize, shuffle=True)    
    pbar = tqdm(loader)

    for i, (data, target) in enumerate(pbar):   
        n_iter = e * len(dataset) + i

        loss = model.forward(data, target)  
        
#         compute gradient and do optimizer step        
        optimizer.zero_grad()
        loss.backward()
#        think about adding clipping here
#         nn.utils.clip_grad_norm_(model.parameters(), 5)
        optimizer.step()     
    
        losses.update(loss.item(), batchSize)
        
        if i % 100 == 0:            
            writer.add_scalar('loss', losses.val, n_iter)
            writer.add_scalar('avg_loss', losses.avg, n_iter)
            
writer.export_scalars_to_json("./all_scalars.json")
writer.close

# SAVING TRAINED MODEL

In [None]:
import os
# change it to numpy

if not os.path.exists("SOFTMAX_trained"):
    os.makedirs("SOFTMAX_trained")
    
torch.save(model.input_embeddings(), os.path.join("SOFTMAX_trained", "attempt#1.w2v"))

# LOADING & EVALUATING TRAINED MODEL

In [2]:
# loads the weights of a saved model and calculates and prints the following metrics: SpearManRank, MSE, MAE 
# look @ the data with TensorBoardX "tensorboard --logdir runs"

import torch
import os
from evaluation import Metrics

loadedModel = torch.load(os.path.join("SOFTMAX_trained", "attempt#1.w2v"))
Metrics(loadedModel, preprocess)

ModuleNotFoundError: No module named 'torch'