In [5]:
import json
import pickle
from argparse import ArgumentParser, Namespace
from pathlib import Path
from typing import Dict

from dataset import SeqClsDataset
from utils import Vocab, Acc_counter
from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler
from model import SeqClassifier

import torch
from tqdm import trange
import torch.optim as optim

import torch.nn.functional as F

cache_dir = Path('./cache/slot/')
data_dir = Path('./data/slot')

device = torch.device('cuda')

with open(cache_dir / "vocab.pkl", "rb") as f:
    vocab: Vocab = pickle.load(f)

slot_idx_path = cache_dir / "tag2idx.json"
slot2idx: Dict[str, int] = json.loads(slot_idx_path.read_text())

data_paths = {split: data_dir / f"{split}.json" for split in ['train','eval']}
data = {split: json.loads(path.read_text()) for split, path in data_paths.items()}
data=[y for x in data.keys() for y in data[x] ]

embeddings = torch.load(cache_dir / "embeddings.pt")

datasets = SeqClsDataset(data, vocab, slot2idx, 36)

dataloader=DataLoader(datasets,batch_size=2,shuffle=False,collate_fn=datasets.collate_fn)

model = SeqClassifier(embeddings=embeddings,hidden_size=128,num_layers=2,dropout=0.1,bidirectional=True,num_class=len(slot2idx))
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()))
criterion = torch.nn.CrossEntropyLoss()
model.to(device)
criterion.to(device)
model.train()

SeqClassifier(
  (embed): Embedding(4117, 300)
  (rnn): GRU(300, 128, num_layers=2, dropout=0.1, bidirectional=True)
  (fc): Linear(in_features=256, out_features=9, bias=True)
)

In [6]:
d=iter(dataloader)

In [7]:
y, x, length = next(d)
_y = model(x,length,device)

In [2]:
f=torch.nn.LogSoftmax(dim=1)
for epoch in range(10):
    
    token_acc = Acc_counter()
    sent_acc = Acc_counter()
    for y, x, length in dataloader:
        
        _y = model(x,length,device)
        loss = model.loss_and_acc(_y,y,criterion,device,sent_acc,token_acc)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    print(epoch,f"{sent_acc.out()}/{token_acc.out()}")

0 40.2717%/80.7844%
1 55.8224%/90.4740%
2 69.2261%/94.2495%
3 75.2305%/95.7008%
4 79.0272%/96.5082%
5 81.7200%/97.0429%
6 83.7215%/97.4389%
7 85.8079%/97.7989%
8 86.9602%/98.0250%
9 88.4279%/98.2479%


In [8]:
_y

tensor([[[-0.1465, -0.1069,  0.0550, -0.0192, -0.0476, -0.0941,  0.0843,
          -0.0545,  0.0165],
         [-0.1451, -0.1586,  0.0337, -0.0318, -0.1341, -0.1519,  0.0809,
          -0.0828,  0.0274],
         [-0.1577, -0.1565, -0.0292, -0.0361, -0.1400, -0.1843,  0.1272,
          -0.0964,  0.0861],
         [-0.1123, -0.1620, -0.0465, -0.0850, -0.1407, -0.1959,  0.1594,
          -0.1300,  0.1075],
         [-0.0883, -0.0770, -0.1132, -0.1147, -0.0963, -0.2524,  0.1614,
          -0.1053,  0.1185],
         [-0.0492,  0.0111, -0.1587, -0.0696, -0.0522, -0.2203,  0.1496,
          -0.0446,  0.0796],
         [-0.0566,  0.0087, -0.1537, -0.0531, -0.0110, -0.1897,  0.1380,
          -0.0253,  0.0537],
         [-0.0272,  0.0367,  0.0042,  0.0078,  0.0152,  0.0047,  0.0377,
          -0.0107,  0.0432]],

        [[-0.1138, -0.1678,  0.1226, -0.0712, -0.0876, -0.1456,  0.1298,
          -0.1048, -0.0717],
         [-0.1373, -0.1352,  0.0797, -0.1013, -0.1508, -0.1837,  0.1321,
       

In [18]:
F.softmax(_y,dim=2)[0][0]

tensor([0.0991, 0.1031, 0.1212, 0.1126, 0.1094, 0.1044, 0.1248, 0.1087, 0.1166],
       device='cuda:0', grad_fn=<SelectBackward0>)