In [1]:
import torch
import numpy as np
import pandas as pd
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor
from sklearn.model_selection import train_test_split

In [2]:
torch.cuda.is_available()

True

In [3]:
vocab_size = len(np.load('data100keys.npy', allow_pickle=True))

In [4]:
cos = nn.CosineSimilarity(dim=0)

In [5]:
cos1 = nn.CosineSimilarity(dim=1)

In [6]:
train = pd.read_csv('train100.csv')

In [7]:
def map_to_tensor(word_id):
    word_id = word_id.split(" ")
    res = torch.zeros(vocab_size)
    res[list(map(lambda x: int(x), word_id))] = 1
    return res

In [8]:
class dataset_holder(Dataset):
    def __init__(self,data):
        self.data = data.reset_index(drop=True)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        phrase = row['phrase']
        target = row['target']
        wrong = row['wrong']

        phrase = map_to_tensor(phrase)
        target = map_to_tensor(target)
        wrong = map_to_tensor(wrong)
        
        return phrase, target, wrong

In [9]:
def custom_loss(output, correct, wrong):
    return cos(output,wrong)*1.5 - cos(output, correct) 

In [10]:
def custom_loss1(output, correct, wrong):
    return cos1(output,wrong) - cos1(output, correct) + 1

In [14]:
dataset = dataset_holder(train)

dataloader = DataLoader(dataset, batch_size=64, shuffle=True)

In [16]:

device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

# Define model


Using cuda device


In [18]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(vocab_size, 4096),
            nn.ReLU(),
            nn.Linear(4096, 2048),
            nn.ReLU(),
            nn.Linear(2048, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.Tanh()
        )

    def forward(self, x):
        logits = self.linear_relu_stack(x)
        return logits


In [34]:
model = NeuralNetwork().to(device)
print(model)

loss_fn = custom_loss
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

NeuralNetwork(
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=9395, out_features=4096, bias=True)
    (1): ReLU()
    (2): Linear(in_features=4096, out_features=2048, bias=True)
    (3): ReLU()
    (4): Linear(in_features=2048, out_features=1024, bias=True)
    (5): ReLU()
    (6): Linear(in_features=1024, out_features=512, bias=True)
    (7): Tanh()
  )
)


In [35]:
27530744/64

430167.875

In [36]:
import os

In [37]:
def training(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (current, right, wrong) in enumerate(dataloader):
        current, right, wrong = current.to(device), right.to(device), wrong.to(device)

        # Compute prediction error
        pred = model(current)
        pred_right = model(right)
        
        loss = (-cos1(pred, pred_right)).sum()
        
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
        pred = model(current)
        pred_wrong = model(wrong)
        
        #print(pred)
        #print(pred_wrong)
        #print(cos1(pred, pred_wrong))
        loss2 = (1.5*cos1(pred, pred_wrong)).sum()
        
        loss2.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 2000 == 0:
            print(batch, loss.item(), loss2.item())
            torch.save(model.state_dict(), f"model2_{batch}.pth")


In [38]:
epochs = 1
for t in range(epochs):
    #print(f"Epoch {t+1}\n-------------------------------")
    training(dataloader, model, custom_loss1, optimizer)
print("Done!")

0 -63.9776496887207 95.98396301269531
2000 -24.46881866455078 -6.436469554901123
4000 -34.75173568725586 25.02072525024414
6000 -29.781246185302734 32.695308685302734
8000 -39.9375 33.65644836425781
10000 -55.54482650756836 47.48664093017578
12000 -47.824214935302734 47.48436737060547
14000 -43.078121185302734 51.33983612060547
16000 -50.5703125 48.99609375
18000 -51.0625 48.5625
20000 -51.484375 56.3671875
22000 -46.6171875 57.41015625
24000 -51.203121185302734 63.09374237060547
26000 -54.624996185302734 61.72264862060547
28000 -49.781246185302734 68.13275146484375
30000 -53.56617736816406 69.83787536621094
32000 -52.937496185302734 70.41796112060547
34000 -54.546871185302734 72.17071533203125
36000 -54.9765625 70.16600799560547
38000 -54.343746185302734 72.45116424560547
40000 -55.796871185302734 73.73435974121094
42000 -54.820308685302734 75.07030487060547
44000 -56.093746185302734 78.46874237060547
46000 -57.281246185302734 78.86717987060547
48000 -58.624996185302734 75.33983612060

KeyboardInterrupt: 

In [None]:
torch.save(model.state_dict(), "model2.pth")
print("Saved PyTorch Model State to model.pth")