In [0]:
!ln -sf /opt/bin/nvidia-smi /usr/bin/nvidia-smi
!pip install gputil
!pip install psutil
!pip install humanize
import psutil
import humanize
import os
import GPUtil as GPU
GPUs = GPU.getGPUs()
# XXX: only one GPU on Colab and isn’t guaranteed
gpu = GPUs[0]
def printm():
 process = psutil.Process(os.getpid())
 print("Gen RAM Free: " + humanize.naturalsize( psutil.virtual_memory().available ), " | Proc size: " + humanize.naturalsize( process.memory_info().rss))
 print("GPU RAM Free: {0:.0f}MB | Used: {1:.0f}MB | Util {2:3.0f}% | Total {3:.0f}MB".format(gpu.memoryFree, gpu.memoryUsed, gpu.memoryUtil*100, gpu.memoryTotal))
printm() 

Collecting gputil
  Downloading https://files.pythonhosted.org/packages/ed/0e/5c61eedde9f6c87713e89d794f01e378cfd9565847d4576fa627d758c554/GPUtil-1.4.0.tar.gz
Building wheels for collected packages: gputil
  Building wheel for gputil (setup.py) ... [?25l[?25hdone
  Created wheel for gputil: filename=GPUtil-1.4.0-cp36-none-any.whl size=7410 sha256=231c4c3f7a3795b789f2baab0fe467203122d6f4056f88f1190dcf2949768945
  Stored in directory: /root/.cache/pip/wheels/3d/77/07/80562de4bb0786e5ea186911a2c831fdd0018bda69beab71fd
Successfully built gputil
Installing collected packages: gputil
Successfully installed gputil-1.4.0
Gen RAM Free: 11.7 GB  | Proc size: 2.8 GB
GPU RAM Free: 10639MB | Used: 802MB | Util   7% | Total 11441MB


In [0]:
!kill -9 -1

In [0]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
%cd drive/My Drive/drqa_doc_reader

/content/drive/My Drive/drqa_doc_reader


In [0]:
pwd

'/content/drive/My Drive/drqa_doc_reader'

In [0]:
#!pip install ujson

In [0]:
#!python setup.py --train_url="./data/train-v1.1.json" --dev_url=".data/dev-v1.1.json" --glove_url=".data/glove.840B.300d.zip" --include_test_examples=False

In [0]:
import json
import numpy as np
import torch
from model import StanfAR
import torch.optim as optim
import torch.nn.functional as F
from torch.utils import data
import time
import spacy

'''
steps - 
1. load data
2. preprocess
3. train
4. tensorboard / evaluation on dev
5. saving/checkpointing/loading model
6. predict function
7. web app
8. packaging, code quality testing, etc.
'''
#%%

'\nsteps - \n1. load data\n2. preprocess\n3. train\n4. tensorboard / evaluation on dev\n5. saving/checkpointing/loading model\n6. predict function\n7. web app\n8. packaging, code quality testing, etc.\n'

In [0]:
def load_json_file(path):
    with open(path) as file:
        out = json.load(file)
    return out


def load_npz_file(path):
    return np.load(path)


def load_files(path):
    word2idx = load_json_file(path + "/word2idx.json")
    word_emb = load_json_file(path + "/word_emb.json")

    train_data = load_npz_file(path + "/train.npz")
    dev_data = load_npz_file(path + "/dev.npz")

    idx2word = {i:j for j,i in word2idx.items()}

    return word2idx, idx2word, word_emb, train_data, dev_data




In [0]:
#%% loading
word2idx, idx2word, word_emb, train_data, dev_data = load_files(path='data')



In [0]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [0]:
#%% preprocessing
train_q = torch.LongTensor(train_data['ques_idxs']).to(device)
train_c = torch.LongTensor(train_data['context_idxs']).to(device)

labels1 = torch.as_tensor(train_data['y1s']).to(device)
labels2 = torch.as_tensor(train_data['y2s']).to(device)

word_emb = torch.as_tensor(word_emb).to(device)




In [0]:
dev_q = torch.LongTensor(dev_data['ques_idxs']).to(device)
dev_c = torch.LongTensor(dev_data['context_idxs']).to(device)

labels1_dev = torch.as_tensor(dev_data['y1s']).to(device)
labels2_dev = torch.as_tensor(dev_data['y2s']).to(device)



In [0]:
class Dataset(data.Dataset):
    def __init__(self):
        super().__init__()
        self.data = (train_q, train_c, labels1, labels2, dev_q, dev_c, labels1_dev, labels2_dev)

    def __len__(self):
        return len(self.data[0])

    def __getitem__(self, idx):
        query = self.data[0][idx]
        ctx = self.data[1][idx]
        y1 = self.data[2][idx]
        y2 = self.data[3][idx]
        
        try:
            dev_query = self.data[4][idx]
            dev_ctx = self.data[5][idx]
            dev_l1 = self.data[6][idx]
            dev_l2 = self.data[7][idx]
        except:
            return query, ctx, y1, y2
          
        return query, ctx, y1, y2, dev_query, dev_ctx, dev_l1, dev_l2


In [0]:
#%%
df = torch.utils.data.DataLoader(Dataset(), batch_size=32)


#%% training loop
torch.set_grad_enabled(True)

network = StanfAR(word_emb, 32).to(device)

optimizer = optim.Adam(network.parameters(), lr=0.001)

total_loss = 0
total_correct = 0

i = 0
num_epochs = 500



In [0]:
# !ln -sf /opt/bin/nvidia-smi /usr/bin/nvidia-smi
# !pip install gputil
# !pip install psutil
# !pip install humanize
# import psutil
# import humanize
# import os
# import GPUtil as GPU
GPUs = GPU.getGPUs()
# XXX: only one GPU on Colab and isn’t guaranteed
gpu = GPUs[0]
def printm():
 process = psutil.Process(os.getpid())
 print("Gen RAM Free: " + humanize.naturalsize( psutil.virtual_memory().available ), " | Proc size: " + humanize.naturalsize( process.memory_info().rss))
 print("GPU RAM Free: {0:.0f}MB | Used: {1:.0f}MB | Util {2:3.0f}% | Total {3:.0f}MB".format(gpu.memoryFree, gpu.memoryUsed, gpu.memoryUtil*100, gpu.memoryTotal))
printm() 

In [28]:
i

1865

In [0]:
max_acc = 0
for j in range(num_epochs):
    test_acc1 = []
    test_acc2 = []
    acc1 = []
    acc2 = []
    i = 0
    tic_b = time.time()
    test_done = False
    for batch in df:  # Get Batch
        i += 1
        if j==0 and i<1865:
            continue
        try:
            query, context, y1, y2, dev_q, dev_ctx, dev_y1, dev_y2 = batch
        except:
            query, context, y1, y2 = batch
            test_done = True

        if query.shape[0] != 32:
            break

        if i == 100:
            toc_b = time.time()
            print(f"Time for 100 batches: {toc_b-tic_b}")

        preds = network(query, context)  # Pass Batch

        loss = (F.cross_entropy(preds[0], y1))+(F.cross_entropy(preds[1], y2))

        optimizer.zero_grad()
        loss.backward()  # Calculate Gradients
        optimizer.step()  # Update Weights

        total_loss += loss.item()

        acc1.append((preds[0].argmax(dim=1) == y1).sum().item())
        acc2.append((preds[1].argmax(dim=1) == y1).sum().item())
        
        torch.save(network.state_dict(), "doc_reader_state.pt")

        if not test_done:
            with torch.no_grad():
                test_preds1, test_preds2 = network(dev_q, dev_ctx)
                accuracy1 = (test_preds1.argmax(dim=1)==dev_y1).sum().item()
                accuracy2 = (test_preds2.argmax(dim=1)==dev_y2).sum().item()
                test_acc1.append(accuracy1)
                test_acc2.append(accuracy2)
  
    print(f"Epoch: {j}\ntrain_accuracy1: {np.mean(acc1[-100:])}\ntrain_accuracy2: {np.mean(acc2[-100:])}\ntest_accuracy1: {np.mean(test_acc1[-100:])}\ntest_accuracy2: {np.mean(test_acc2[-100:])}\n")
    
    if np.mean(test_acc1[-100:]) + np.mean(test_acc2[-100:]) > max_acc:
        max_acc = np.mean(test_acc1[-100:]) + np.mean(test_acc2[-100:])
        torch.save(network.state_dict(), f"doc_reader_state_{round(max_acc/2, 2)}.pth")
        print("model_saved")


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


Epoch: 0
train_accuracy1: 20.2
train_accuracy2: 7.68
test_accuracy1: nan
test_accuracy2: nan

Time for 100 batches: 118.24767732620239
Epoch: 1
train_accuracy1: 20.34
train_accuracy2: 7.91
test_accuracy1: 15.29
test_accuracy2: 17.12

model_saved
Time for 100 batches: 124.48526072502136


In [0]:
a = (np.mean(test_acc1[-100:]) + np.mean(test_acc2[-100:]))/2

In [0]:
torch.save(network.state_dict, f"new_model{a}.pth")

In [19]:
i

1865

In [0]:
# need to change model to allow for custom batch sizes and retrain. For now, can pass a dev batch and store it in dataframe to analyse model performance


def batch2sent(batch, colname):
    return pd.DataFrame(map(lambda x: ' '.join([idx2word[i] for i in x]), batch.tolist()), columns=[colname])
  
def ans2sent(y, colname):
    return pd.DataFrame(map(lambda x: idx2word[x], y.tolist()), columns = [colname])

def concat_cols(df1, df2):
    return pd.concat([df1, df2], axis=1)
  
def concat_rows(df1, df2):
    return pd.concat([df1, df2], axis=0)


In [0]:

i=0
output_df = pd.DataFrame()
for batch in df:
    if i==10:
        break
    else:
        query, context, y1, y2, dev_q, dev_ctx, dev_y1, dev_y2 = batch
        with torch.no_grad():
            test_preds1, test_preds2 = network(dev_q, dev_ctx)
        
        query_df = batch2sent(dev_q, "query")
        ctx_df = batch2sent(dev_ctx, "context")
        actual_start = ans2sent(dev_y1, dev_ctx "actual_start")
        actual_end = ans2sent(dev_y2, "actual_end")
        pred_start = ans2sent(test_preds1.argmax(dim=1), "pred_start")
        pred_end = ans2sent(test_preds2.argmax(dim=1), "pred_end")
        start_same = pd.Series(test_preds1.argmax(dim=1).tolist())==pd.Series(dev_y1.tolist())
        end_same = pd.Series(test_preds2.argmax(dim=1).tolist())==pd.Series(dev_y2.tolist())
        
        out = pd.concat([query_df, ctx_df, actual_start, actual_end, pred_start, pred_end, start_same, end_same], axis=1)
        
        output_df = pd.concat([output_df, out], axis=0)
         
        
          

In [0]:
output_df.to_excel("predictions2.xlsx")