In [1]:
import torch
import numpy as np
import pandas as pd
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor
from sklearn.model_selection import train_test_split

In [2]:
torch.cuda.is_available()

True

In [3]:
vocab_size = len(np.load('data100keys.npy', allow_pickle=True))

In [4]:
cos = nn.CosineSimilarity(dim=0)

In [5]:
cos1 = nn.CosineSimilarity(dim=1)

In [6]:
train = pd.read_csv('train100.csv')


In [7]:
def map_to_tensor(word_id):
    word_id = word_id.split(" ")
    res = torch.zeros(vocab_size)
    res[list(map(lambda x: int(x), word_id))] = 1
    return res

In [8]:
class dataset_holder(Dataset):
    def __init__(self,data):
        self.data = data.reset_index(drop=True)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        phrase = row['phrase']
        target = row['target']
        wrong = row['wrong']

        phrase = map_to_tensor(phrase)
        target = map_to_tensor(target)
        wrong = map_to_tensor(wrong)
        
        return phrase, target, wrong

In [9]:
def custom_loss(output, correct, wrong):
    return cos(output,wrong) - cos(output, correct) 

In [10]:
def custom_loss1(output, correct, wrong):
    return cos1(output,wrong) - cos1(output, correct) + 1

In [11]:
dataset = dataset_holder(train)

dataloader = DataLoader(dataset, batch_size=64, shuffle=True)

In [12]:

device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

# Define model


Using cuda device


In [13]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(vocab_size, 8192),
            nn.ReLU(),
            nn.Linear(8192, 8192),
            nn.ReLU(),
            nn.Linear(8192, 4096),
            nn.ReLU(),
            nn.Linear(4096, 2048),
            nn.ReLU(),
            nn.Linear(2048, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512)
        )

    def forward(self, x):
        logits = self.linear_relu_stack(x)
        return logits


In [14]:
model = NeuralNetwork().to(device)
print(model)

loss_fn = custom_loss
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

NeuralNetwork(
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=9395, out_features=8192, bias=True)
    (1): ReLU()
    (2): Linear(in_features=8192, out_features=8192, bias=True)
    (3): ReLU()
    (4): Linear(in_features=8192, out_features=4096, bias=True)
    (5): ReLU()
    (6): Linear(in_features=4096, out_features=2048, bias=True)
    (7): ReLU()
    (8): Linear(in_features=2048, out_features=1024, bias=True)
    (9): ReLU()
    (10): Linear(in_features=1024, out_features=512, bias=True)
  )
)


In [15]:
27530744/64

430167.875

In [16]:
import os

In [17]:
start_batch = 388000

In [18]:
def training(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (current, right, wrong) in enumerate(dataloader):
        if batch <= start_batch:
            if batch % 10000 == 0:
                print(batch)
            continue
        
        current, right, wrong = current.to(device), right.to(device), wrong.to(device)

        # Compute prediction error
        pred = model(current)
        pred_right = model(right)
        pred_wrong = model(wrong)

        loss = loss_fn(pred, pred_right, pred_wrong).sum()

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 2000 == 0:
            print(batch, loss.item())
            torch.save(model.state_dict(), f"model1_{batch}.pth")


In [19]:
model.load_state_dict(torch.load('model1_388000.pth'))

<All keys matched successfully>

In [20]:
epochs = 1
for t in range(epochs):
    #print(f"Epoch {t+1}\n-------------------------------")
    training(dataloader, model, custom_loss1, optimizer)
print("Done!")

0
10000
20000
30000
40000
50000
60000
70000
80000
90000
100000
110000
120000
130000
140000
150000
160000
170000
180000
190000
200000
210000
220000
230000
240000
250000
260000
270000
280000
290000
300000
310000
320000
330000
340000
350000
360000
370000
380000
390000 21.997217178344727
392000 22.00008773803711
394000 14.000761985778809
396000 9.999151229858398
398000 15.999038696289062
400000 21.930986404418945
402000 2.079652786254883
404000 -1.3883275985717773
406000 8.002119064331055
408000 15.167200088500977
410000 -3.9972333908081055
412000 28.000564575195312
414000 -5.9600934982299805
416000 12.066399574279785
418000 8.0001802444458
420000 7.978611946105957
422000 3.993929386138916
424000 12.001989364624023
426000 6.000189781188965
428000 -3.998137950897217
430000 32.000022888183594
Done!


In [21]:
torch.save(model.state_dict(), "model1_done.pth")
print("Saved PyTorch Model State to model.pth")

Saved PyTorch Model State to model.pth
