In [1]:
import torch
import numpy as np
import pandas as pd
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor
from sklearn.model_selection import train_test_split

In [2]:
torch.cuda.is_available()

True

In [3]:
vocab_size = len(np.load('data100keys.npy', allow_pickle=True))

In [4]:
cos = nn.CosineSimilarity(dim=0)

In [5]:
cos1 = nn.CosineSimilarity(dim=1)

In [6]:
data = pd.read_csv('all_data.csv')
data

Unnamed: 0,wrong,phrase,target
0,4151,1487,133
1,3570,750,4803
2,4873,2770,2552
3,6062,3664,2908
4,4108,312,4253
...,...,...,...
34413425,3379,1737,461
34413426,1427,2150,2150
34413427,8854,3721,366
34413428,5275,133,9006


In [7]:
data.iloc[0]['wrong'].split(" ")

['4151']

In [8]:
train, test = train_test_split(data, test_size=0.2, random_state=42)

In [9]:
train

Unnamed: 0,wrong,phrase,target
30628697,2929,1619,939
16471403,8085,1730,52
19974707,9270,1983,7225
29207325,3204,229,1645
28431238,2238,4638,1172
...,...,...,...
29210711,2447,506,3610
26301898,6290,1980,29
26735830,3709,1314,1060
13315092,1458,2336,2231


In [10]:
def map_to_tensor(word_id):
    word_id = word_id.split(" ")
    res = torch.zeros(vocab_size)
    res[list(map(lambda x: int(x), word_id))] = 1
    return res

In [11]:
class dataset_holder(Dataset):
    def __init__(self,data):
        self.data = data.reset_index(drop=True)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        phrase = row['phrase']
        target = row['target']
        wrong = row['wrong']

        phrase = map_to_tensor(phrase)
        target = map_to_tensor(target)
        wrong = map_to_tensor(wrong)
        
        return phrase, target, wrong

In [12]:
def custom_loss(output, correct, wrong):
    return cos(output,wrong) - cos(output, correct) 

In [13]:
def custom_loss1(output, correct, wrong):
    return cos1(output,wrong) - cos1(output, correct) + 1

In [14]:
dataset = dataset_holder(train)

dataloader = DataLoader(dataset, batch_size=64, shuffle=True)

In [15]:
test_dataset = dataset_holder(test)

test_dataloader = DataLoader(dataset, batch_size=64, shuffle=True)

In [16]:

device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

# Define model


Using cuda device


In [17]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(vocab_size, 8192),
            nn.ReLU(),
            nn.Linear(8192, 8192),
            nn.ReLU(),
            nn.Linear(8192, 4096),
            nn.ReLU(),
            nn.Linear(4096, 2048),
            nn.ReLU(),
            nn.Linear(2048, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512)
        )

    def forward(self, x):
        logits = self.linear_relu_stack(x)
        return logits


In [18]:
model = NeuralNetwork().to(device)
print(model)

loss_fn = custom_loss
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

NeuralNetwork(
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=9395, out_features=8192, bias=True)
    (1): ReLU()
    (2): Linear(in_features=8192, out_features=8192, bias=True)
    (3): ReLU()
    (4): Linear(in_features=8192, out_features=4096, bias=True)
    (5): ReLU()
    (6): Linear(in_features=4096, out_features=2048, bias=True)
    (7): ReLU()
    (8): Linear(in_features=2048, out_features=1024, bias=True)
    (9): ReLU()
    (10): Linear(in_features=1024, out_features=512, bias=True)
  )
)


In [19]:
27530744/64

430167.875

In [20]:
train.to_csv('train100.csv',index=False)
test.to_csv('test100.csv',index=False)

In [21]:
import os

In [22]:
def training(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (current, right, wrong) in enumerate(dataloader):
        current, right, wrong = current.to(device), right.to(device), wrong.to(device)

        # Compute prediction error
        pred = model(current)
        pred_right = model(right)
        pred_wrong = model(wrong)

        loss = loss_fn(pred, pred_right, pred_wrong).sum()

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 2000 == 0:
            print(batch, loss.item())
            torch.save(model.state_dict(), f"model1_{batch}.pth")


In [23]:
epochs = 1
for t in range(epochs):
    #print(f"Epoch {t+1}\n-------------------------------")
    training(dataloader, model, custom_loss1, optimizer)
print("Done!")

0 63.999961853027344
2000 63.99999237060547
4000 39.999717712402344
6000 47.88071823120117
8000 39.99880599975586
10000 39.999900817871094
12000 40.03901290893555
14000 62.000709533691406
16000 23.999067306518555
18000 21.99859619140625


KeyboardInterrupt: 

In [None]:
torch.save(model.state_dict(), "model1.pth")
print("Saved PyTorch Model State to model.pth")