In [1]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
import os

In [1]:
%cd drive/My Drive/drqa_doc_reader

/content/drive/My Drive/drqa_doc_reader


In [2]:
pwd

'/content/drive/My Drive/drqa_doc_reader'

In [0]:
#!pip install ujson

In [0]:
#!python setup.py --train_url="./data/train-v1.1.json" --dev_url=".data/dev-v1.1.json" --glove_url=".data/glove.840B.300d.zip" --include_test_examples=False

In [5]:
import json
import numpy as np
import torch
from model import StanfAR
import torch.optim as optim
import torch.nn.functional as F
from torch.utils import data
import time

'''
steps - 
1. load data
2. preprocess
3. train
4. tensorboard / evaluation on dev
5. saving/checkpointing/loading model
6. predict function
7. web app
8. packaging, code quality testing, etc.
'''
#%%

'\nsteps - \n1. load data\n2. preprocess\n3. train\n4. tensorboard / evaluation on dev\n5. saving/checkpointing/loading model\n6. predict function\n7. web app\n8. packaging, code quality testing, etc.\n'

In [0]:
def load_json_file(path):
    with open(path) as file:
        out = json.load(file)
    return out


def load_npz_file(path):
    return np.load(path)


def load_files(path):
    word2idx = load_json_file(path + "/word2idx.json")
    word_emb = load_json_file(path + "/word_emb.json")

    train_data = load_npz_file(path + "/train.npz")
    dev_data = load_npz_file(path + "/dev.npz")

    idx2word = {i:j for j,i in word2idx.items()}

    return word2idx, idx2word, word_emb, train_data, dev_data




In [0]:
#%% loading
word2idx, idx2word, word_emb, train_data, dev_data = load_files(path='data')



In [0]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [0]:
#%% preprocessing
train_q = torch.LongTensor(train_data['ques_idxs']).to(device)
train_c = torch.LongTensor(train_data['context_idxs']).to(device)

labels1 = torch.as_tensor(train_data['y1s']).to(device)
labels2 = torch.as_tensor(train_data['y2s']).to(device)

word_emb = torch.as_tensor(word_emb).to(device)




In [0]:
dev_q = torch.LongTensor(dev_data['ques_idxs']).to(device)
dev_c = torch.LongTensor(dev_data['context_idxs']).to(device)

labels1_dev = torch.as_tensor(dev_data['y1s']).to(device)
labels2_dev = torch.as_tensor(dev_data['y2s']).to(device)



In [0]:
class Dataset(data.Dataset):
    def __init__(self):
        super().__init__()
        self.data = (train_q, train_c, labels1, labels2, dev_q, dev_c, labels1_dev, labels2_dev)

    def __len__(self):
        return len(self.data[0])

    def __getitem__(self, idx):
        query = self.data[0][idx]
        ctx = self.data[1][idx]
        y1 = self.data[2][idx]
        y2 = self.data[3][idx]
        
        try:
            dev_query = self.data[4][idx]
            dev_ctx = self.data[5][idx]
            dev_l1 = self.data[6][idx]
            dev_l2 = self.data[7][idx]
        except:
            return query, ctx, y1, y2
          
        return query, ctx, y1, y2, dev_query, dev_ctx, dev_l1, dev_l2


In [0]:
#%%
df = torch.utils.data.DataLoader(Dataset(), batch_size=32)


#%% training loop
torch.set_grad_enabled(True)

network = StanfAR(word_emb, 32).to(device)

optimizer = optim.Adamax(network.parameters(), lr=0.01)

total_loss = 0
total_correct = 0

i = 0
num_epochs = 500



In [0]:
for j in range(num_epochs):
    test_acc1 = []
    test_acc2 = []
    acc1 = []
    acc2 = []
    i = 0
    tic_b = time.time()
    for batch in df:  # Get Batch
        i += 1
        try:
            query, context, y1, y2, dev_q, dev_ctx, dev_y1, dev_y2 = batch
        except:
            query, context, y1, y2 = batch

        if query.shape[0] != 32:
            break

        if i == 100:
            toc_b = time.time()
            print(f"Time for 100 batches: {toc_b-tic_b}")

        preds = network(query, context)  # Pass Batch

        loss = (F.cross_entropy(preds[0], y1))+(F.cross_entropy(preds[1], y2))

        optimizer.zero_grad()
        loss.backward()  # Calculate Gradients
        optimizer.step()  # Update Weights

        total_loss += loss.item()

        acc1.append((preds[0].argmax(dim=1) == y1).sum().item())
        acc2.append((preds[0].argmax(dim=1) == y1).sum().item())

        if i<=10480:
            with torch.no_grad():
                test_preds1, test_preds2 = network(dev_q, dev_ctx)
                accuracy1 = (test_preds1.argmax(dim=1)==dev_y1).sum().item()
                accuracy2 = (test_preds2.argmax(dim=1)==dev_y2).sum().item()
                test_acc1.append(accuracy1)
                test_acc2.append(accuracy2)
  
    print(f"Epoch: {j}\ntrain_accuracy1: {np.mean(acc1[-100:])}\ntrain_accuracy2: {np.mean(acc2[-100:])}\ntest_accuracy1: {np.mean(test_acc1[-100:])}\ntest_accuracy2: {np.mean(test_acc2[-100:])}\n")


Time for 100 batches: 21.847774267196655
Epoch: 0
train_accuracy1: 10.2
train_accuracy2: 10.2
test_accuracy1: 1.31
test_accuracy2: 0.8

Time for 100 batches: 23.82910990715027
Epoch: 1
train_accuracy1: 13.29
train_accuracy2: 13.29
test_accuracy1: 1.04
test_accuracy2: 0.6

Time for 100 batches: 23.965417623519897
Epoch: 2
train_accuracy1: 14.84
train_accuracy2: 14.84
test_accuracy1: 1.09
test_accuracy2: 0.71



[tensor(0, device='cuda:0')]

In [0]:
#%%
print(
    "epoch:", 0,
    "total_correct:", total_correct,
    "loss:", total_loss
)



#%%
model = StanfAR(word_emb, 32)
sample_data_q, sample_data_c  = next(iter(train_loader_q)), next(iter(train_loader_c))

out = model(sample_data_q, sample_data_c)
