In [0]:
from google.colab import drive
drive.mount('/content/drive')

In [0]:
import os

In [2]:
%cd drive/My Drive/drqa_doc_reader

/content/drive/My Drive/drqa_doc_reader


In [3]:
pwd

'/content/drive/My Drive/drqa_doc_reader'

In [4]:
#!pip install ujson



In [0]:
#!python setup.py --train_url="./data/train-v1.1.json" --dev_url=".data/dev-v1.1.json" --glove_url=".data/glove.840B.300d.zip" --include_test_examples=False

In [20]:
import json
import numpy as np
import torch
from model import StanfAR
import torch.optim as optim
import torch.nn.functional as F
from torch.utils import data
import time

'''
steps - 
1. load data
2. preprocess
3. train
4. tensorboard / evaluation on dev
5. saving/checkpointing/loading model
6. predict function
7. web app
8. packaging, code quality testing, etc.
'''
#%%

'\nsteps - \n1. load data\n2. preprocess\n3. train\n4. tensorboard / evaluation on dev\n5. saving/checkpointing/loading model\n6. predict function\n7. web app\n8. packaging, code quality testing, etc.\n'

In [0]:
def load_json_file(path):
    with open(path) as file:
        out = json.load(file)
    return out


def load_npz_file(path):
    return np.load(path)


def load_files(path):
    word2idx = load_json_file(path + "/word2idx.json")
    word_emb = load_json_file(path + "/word_emb.json")

    train_data = load_npz_file(path + "/train.npz")
    dev_data = load_npz_file(path + "/dev.npz")

    idx2word = {i:j for j,i in word2idx.items()}

    return word2idx, idx2word, word_emb, train_data, dev_data




In [0]:
#%% loading
word2idx, idx2word, word_emb, train_data, dev_data = load_files(path='data')



In [0]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [0]:
#%% preprocessing
train_q = torch.LongTensor(train_data['ques_idxs']).to(device)
train_c = torch.LongTensor(train_data['context_idxs']).to(device)

labels1 = torch.as_tensor(train_data['y1s']).to(device)
labels2 = torch.as_tensor(train_data['y2s']).to(device)

word_emb = torch.as_tensor(word_emb).to(device)




In [0]:
#%%
class Dataset(data.Dataset):
    def __init__(self):
        super().__init__()
        self.data = (train_q, train_c, labels1, labels2)

    def __len__(self):
        return len(self.data[0])

    def __getitem__(self, idx):
        query = self.data[0][idx]
        ctx = self.data[1][idx]
        y1 = self.data[2][idx]
        y2 = self.data[3][idx]

        return query, ctx, y1, y2




In [0]:
#%%
df = torch.utils.data.DataLoader(Dataset(), batch_size=32)


#%% training loop
torch.set_grad_enabled(True)

network = StanfAR(word_emb, 32).to(device)

optimizer = optim.Adamax(network.parameters(), lr=0.01)

total_loss = 0
total_correct = 0

i = 0
num_epochs = 500



In [0]:
#%%

for j in range(num_epochs):
  i=0
  acc = []
  tic = time.time()
  for batch in df:  # Get Batch
      i+=1
      query, context, y1, y2 = batch
      #print(f"Query input shape: {query.shape}\nContext Input shape: {context.shape}\ny1 shape: {y1.shape}\ny2 shape: {y2.shape}")
      if query.shape[0] != 32:
          break

      preds = network(query, context)  # Pass Batch

      loss = (F.cross_entropy(preds[0], y1))+(F.cross_entropy(preds[1], y2))
      #print(f"loss: {loss.shape}")

      optimizer.zero_grad()
      loss.backward()  # Calculate Gradients
      optimizer.step()  # Update Weights

      total_loss += loss.item()
      acc.append((preds[0].argmax(dim=1)==y1).sum())
      
  toc = time.time()
  print(f"epoch: {j+1}\naccuracy_est_batch: {np.mean(acc[-100:])}\ntime taken: {toc-tic}")




In [0]:
#%%
print(
    "epoch:", 0,
    "total_correct:", total_correct,
    "loss:", total_loss
)



#%%
model = StanfAR(word_emb, 32)
sample_data_q, sample_data_c  = next(iter(train_loader_q)), next(iter(train_loader_c))

out = model(sample_data_q, sample_data_c)
