# Get Data
Process QUASAR data and get it ready.

In [0]:
import json
import os
import nltk
import torch

from torchtext import data
from torchtext import datasets
from torchtext.vocab import GloVe



In [18]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [19]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
def word_tokenize(tokens):
    return [token.replace("''", '"').replace("``", '"') for token in nltk.word_tokenize(tokens)]

In [0]:
class QUASAR():
    def __init__(self):
        args = {}
        context_path="contexts/short/"
        question_path="questions/"
        context_threshold = 400
        word_dim=100
        train_batch_size=10
        dev_batch_size=10
        path = 'drive/My Drive/IIIT lectures/NLP Applications/Project/QUASAR/quasar-t/'
        dataset_path = path + '/torchtext/'
        train_examples_path = dataset_path + 'train_examples.pt'
        dev_examples_path = dataset_path + 'dev_examples.pt'
        train_file = "dev.json"
        dev_file = "test.json"

        print("preprocessing data files...")
        if not os.path.exists('{}/{}l'.format(path, train_file)):
            self.preprocess_file(path,"dev")
        if not os.path.exists('{}/{}l'.format(path, dev_file)):
            self.preprocess_file(path,"test")
      
        self.RAW = data.RawField()
        # explicit declaration for torchtext compatibility
        self.RAW.is_target = False
        self.CHAR_NESTING = data.Field(batch_first=True, tokenize=list, lower=True)
        self.CHAR = data.NestedField(self.CHAR_NESTING, tokenize=word_tokenize)
        self.WORD = data.Field(batch_first=True, tokenize=word_tokenize, lower=True, include_lengths=True)
        self.LABEL = data.Field(sequential=False, unk_token=None, use_vocab=False)

        dict_fields = {'id': ('id', self.RAW),
                       's_idx': ('s_idx', self.LABEL),
                       'e_idx': ('e_idx', self.LABEL),
                       'context': [('c_word', self.WORD), ('c_char', self.CHAR)],
                       'question': [('q_word', self.WORD), ('q_char', self.CHAR)]}

        list_fields = [('id', self.RAW), ('s_idx', self.LABEL), ('e_idx', self.LABEL),
                       ('c_word', self.WORD), ('c_char', self.CHAR),
                       ('q_word', self.WORD), ('q_char', self.CHAR)]

        if os.path.exists(dataset_path):
            print("loading splits...")
            # train_examples = torch.load(dev_examples_path)
            train_examples = torch.load(train_examples_path)
            dev_examples = torch.load(dev_examples_path)

            self.train = data.Dataset(examples=train_examples, fields=list_fields)
            # self.train = data.Dataset(examples=dev_examples, fields=list_fields)
            self.dev = data.Dataset(examples=dev_examples, fields=list_fields)
            print(len(self.train))
        else:
            print("building splits...")
            self.train, self.dev = data.TabularDataset.splits(
                path=path,
                train='{}l'.format(train_file),
                # train='{}l'.format(dev_file),
                validation='{}l'.format(dev_file),
                format='json',
                fields=dict_fields)

            os.makedirs(dataset_path)
            torch.save(self.train.examples, train_examples_path)
            # torch.save(self.train.examples, dev_examples_path)
            torch.save(self.dev.examples, dev_examples_path)

        #cut too long context in the training set for efficiency.
        # if context_threshold > 0:
        #     self.train.examples = [e for e in self.train.examples if len(e.c_word) <= context_threshold]
        # print(len(self.train))

        print("building vocab...")
        self.CHAR.build_vocab(self.train, self.dev)
        self.WORD.build_vocab(self.train, self.dev, vectors=GloVe(name='6B', dim=word_dim))

        print("building iterators...")
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.train_iter = data.BucketIterator(
            self.train,
            batch_size=train_batch_size,
            device=device,
            repeat=True,
            shuffle=True,
            sort_key=lambda x: len(x.c_word)
        )
        print(len(self.train_iter))
        self.dev_iter = data.BucketIterator(
            self.dev,
            batch_size=dev_batch_size,
            device=device,
            repeat=False,
            sort_key=lambda x: len(x.c_word)
        )
        print(len(self.dev_iter))

        # self.train_iter, self.dev_iter = \
        #    data.BucketIterator.splits((self.train, self.dev),
        #                               batch_sizes=[train_batch_size, dev_batch_size],
        #                               device=device,
        #                               sort_key=lambda x: len(x.c_word))
   
    def preprocess_file(self, path, type):
        dump = []
        context_path="contexts/short/"
        question_path="questions/"
        test_context_file = type+"_contexts.json"
        test_question_file = type+"_questions.json"
        test_nps_file= type+"_nps.json"
        print(test_question_file);
        with open(path+context_path+test_context_file) as context_f,open(path+context_path+test_nps_file) as nps_f,open(path+question_path+test_question_file) as question_f:
          context_line = context_f.readline()
          question_line = question_f.readline()
          nps_line = nps_f.readline()
          while(context_line):
            context_json_line = json.loads(context_line)
            question_json_line = json.loads(question_line)
            nps_json_line = json.loads(nps_line)
            id = question_json_line['uid']
            answer = question_json_line['answer']
            question = question_json_line['question']
            self.process_nps(id, nps_json_line['nps'], answer,context_json_line['contexts'], question, dump)
            context_line = context_f.readline()
            question_line = question_f.readline()
            nps_line = nps_f.readline()
            

        with open('{}l'.format(path+type+".json"), 'w', encoding='utf-8') as f:
            for line in dump:
                json.dump(line, f)
                print('', file=f)

        print('preprocess done')


    def process_nps(self, id, nps_list, answer, contexts, question, dump):
          full_context = self.merge_contexts(contexts)
          i=0
          for nps in nps_list:
            if(nps[0] == answer):
              i+=1
              if(i>4):
                break
              if(nps[1]<0 or nps[2]<0):
                continue
              merged_context = '';
              for context in contexts[:nps[1]]:
                merged_context+=" "+context[1]
              tokens = word_tokenize(merged_context)
              start_index = len(tokens) + nps[2]+1;
              end_index = start_index + len(word_tokenize(answer)) - 1
              dump.append(dict([('id', id),
                              ('context', full_context),
                              ('question', question),
                              ('answer', answer),
                              ('s_idx', start_index),
                              ('e_idx', end_index)]))
              
          
        
    # def get_start_index

    def merge_contexts(self,contexts):
      merged_context="";
      for context in contexts:
              merged_context+=" "+context[1]
      return merged_context
  


In [0]:
# data_q = QUASAR()

# Model

In [0]:
import torch
import torch.nn as nn


class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, batch_first=False, num_layers=1, bidirectional=False, dropout=0.2):
        super(LSTM, self).__init__()

        self.rnn = nn.LSTM(input_size=input_size,
                           hidden_size=hidden_size,
                           num_layers=num_layers,
                           bidirectional=bidirectional,
                           batch_first=batch_first)
        self.reset_params()
        self.dropout = nn.Dropout(p=dropout)

    def reset_params(self):
        for i in range(self.rnn.num_layers):
            nn.init.orthogonal_(getattr(self.rnn, f'weight_hh_l{i}'))
            nn.init.kaiming_normal_(getattr(self.rnn, f'weight_ih_l{i}'))
            nn.init.constant_(getattr(self.rnn, f'bias_hh_l{i}'), val=0)
            nn.init.constant_(getattr(self.rnn, f'bias_ih_l{i}'), val=0)
            getattr(self.rnn, f'bias_hh_l{i}').chunk(4)[1].fill_(1)

            if self.rnn.bidirectional:
                nn.init.orthogonal_(getattr(self.rnn, f'weight_hh_l{i}_reverse'))
                nn.init.kaiming_normal_(getattr(self.rnn, f'weight_ih_l{i}_reverse'))
                nn.init.constant_(getattr(self.rnn, f'bias_hh_l{i}_reverse'), val=0)
                nn.init.constant_(getattr(self.rnn, f'bias_ih_l{i}_reverse'), val=0)
                getattr(self.rnn, f'bias_hh_l{i}_reverse').chunk(4)[1].fill_(1)

    def forward(self, x):
        x, x_len = x
        x = self.dropout(x)

        x_len_sorted, x_idx = torch.sort(x_len, descending=True)
        x_sorted = x.index_select(dim=0, index=x_idx)
        _, x_ori_idx = torch.sort(x_idx)

        x_packed = nn.utils.rnn.pack_padded_sequence(x_sorted, x_len_sorted, batch_first=True)
        x_packed, (h, c) = self.rnn(x_packed)

        x = nn.utils.rnn.pad_packed_sequence(x_packed, batch_first=True)[0]
        x = x.index_select(dim=0, index=x_ori_idx)
        h = h.permute(1, 0, 2).contiguous().view(-1, h.size(0) * h.size(2)).squeeze()
        h = h.index_select(dim=0, index=x_ori_idx)

        return x, h


class Linear(nn.Module):
    def __init__(self, in_features, out_features, dropout=0.0):
        super(Linear, self).__init__()

        self.linear = nn.Linear(in_features=in_features, out_features=out_features)
        if dropout > 0:
            self.dropout = nn.Dropout(p=dropout)
        self.reset_params()

    def reset_params(self):
        nn.init.kaiming_normal_(self.linear.weight)
        nn.init.constant_(self.linear.bias, 0)

    def forward(self, x):
        if hasattr(self, 'dropout'):
            x = self.dropout(x)
        x = self.linear(x)
        return x

In [0]:
import torch
import torch.nn as nn
import torch.nn.functional as F



class BiDAF(nn.Module):
    def __init__(self,char_vocab_size, char_channel_size, char_dim, char_channel_width, word_dim, hidden_size, dropout, pretrained):
        super(BiDAF, self).__init__()

        # 1. Character Embedding Layer
        self.char_emb = nn.Embedding(char_vocab_size, char_dim, padding_idx=1)
        self.char_dim = char_dim
        self.char_channel_size = char_channel_size
        nn.init.uniform_(self.char_emb.weight, -0.001, 0.001)

        self.char_conv = nn.Sequential(
            nn.Conv2d(1, char_channel_size, (char_dim, char_channel_width)),
            nn.ReLU()
            )

        # 2. Word Embedding Layer
        # initialize word embedding with GloVe
        self.word_emb = nn.Embedding.from_pretrained(pretrained, freeze=True)

        # highway network
        assert hidden_size * 2 == (char_channel_size + word_dim)
        for i in range(2):
            setattr(self, 'highway_linear{}'.format(i),
                    nn.Sequential(Linear(hidden_size * 2, hidden_size * 2),
                                  nn.ReLU()))
            setattr(self, 'highway_gate{}'.format(i),
                    nn.Sequential(Linear(hidden_size * 2, hidden_size * 2),
                                  nn.Sigmoid()))

        # 3. Contextual Embedding Layer
        self.context_LSTM = LSTM(input_size=hidden_size * 2,
                                 hidden_size=hidden_size,
                                 bidirectional=True,
                                 batch_first=True,
                                 dropout=dropout)

        # 4. Attention Flow Layer
        self.att_weight_c = Linear(hidden_size * 2, 1)
        self.att_weight_q = Linear(hidden_size * 2, 1)
        self.att_weight_cq = Linear(hidden_size * 2, 1)

        # 5. Modeling Layer
        self.modeling_LSTM1 = LSTM(input_size=hidden_size * 8,
                                   hidden_size=hidden_size,
                                   bidirectional=True,
                                   batch_first=True,
                                   dropout=dropout)

        self.modeling_LSTM2 = LSTM(input_size=hidden_size * 2,
                                   hidden_size=hidden_size,
                                   bidirectional=True,
                                   batch_first=True,
                                   dropout=dropout)

        # 6. Output Layer
        self.p1_weight_g = Linear(hidden_size * 8, 1)
        self.p1_weight_m = Linear(hidden_size * 2, 1)
        self.p2_weight_g = Linear(hidden_size * 8, 1)
        self.p2_weight_m = Linear(hidden_size * 2, 1)

        self.output_LSTM = LSTM(input_size=hidden_size * 2,
                                hidden_size=hidden_size,
                                bidirectional=True,
                                batch_first=True,
                                dropout=dropout)

        self.dropout = nn.Dropout(p=dropout)

    def forward(self, batch):
        # TODO: More memory-efficient architecture
        def char_emb_layer(x):
            """
            :param x: (batch, seq_len, word_len)
            :return: (batch, seq_len, char_channel_size)
            """
            batch_size = x.size(0)
            # (batch, seq_len, word_len, char_dim)
            x = self.dropout(self.char_emb(x))
            # (batch， seq_len, char_dim, word_len)
            x = x.transpose(2, 3)
            # (batch * seq_len, 1, char_dim, word_len)
            x = x.view(-1, self.char_dim, x.size(3)).unsqueeze(1)
            # (batch * seq_len, char_channel_size, 1, conv_len) -> (batch * seq_len, char_channel_size, conv_len)
            x = self.char_conv(x).squeeze()
            # (batch * seq_len, char_channel_size, 1) -> (batch * seq_len, char_channel_size)
            x = F.max_pool1d(x, x.size(2)).squeeze()
            # (batch, seq_len, char_channel_size)
            x = x.view(batch_size, -1, self.char_channel_size)

            return x

        def highway_network(x1, x2):
            """
            :param x1: (batch, seq_len, char_channel_size)
            :param x2: (batch, seq_len, word_dim)
            :return: (batch, seq_len, hidden_size * 2)
            """
            # (batch, seq_len, char_channel_size + word_dim)
            x = torch.cat([x1, x2], dim=-1)
            for i in range(2):
                h = getattr(self, 'highway_linear{}'.format(i))(x)
                g = getattr(self, 'highway_gate{}'.format(i))(x)
                x = g * h + (1 - g) * x
            # (batch, seq_len, hidden_size * 2)
            return x

        def att_flow_layer(c, q):
            """
            :param c: (batch, c_len, hidden_size * 2)
            :param q: (batch, q_len, hidden_size * 2)
            :return: (batch, c_len, q_len)
            """
            c_len = c.size(1)
            q_len = q.size(1)

            # (batch, c_len, q_len, hidden_size * 2)
            #c_tiled = c.unsqueeze(2).expand(-1, -1, q_len, -1)
            # (batch, c_len, q_len, hidden_size * 2)
            #q_tiled = q.unsqueeze(1).expand(-1, c_len, -1, -1)
            # (batch, c_len, q_len, hidden_size * 2)
            #cq_tiled = c_tiled * q_tiled
            #cq_tiled = c.unsqueeze(2).expand(-1, -1, q_len, -1) * q.unsqueeze(1).expand(-1, c_len, -1, -1)

            cq = []
            for i in range(q_len):
                #(batch, 1, hidden_size * 2)
                qi = q.select(1, i).unsqueeze(1)
                #(batch, c_len, 1)
                ci = self.att_weight_cq(c * qi).squeeze()
                cq.append(ci)
            # (batch, c_len, q_len)
            cq = torch.stack(cq, dim=-1)

            # (batch, c_len, q_len)
            s = self.att_weight_c(c).expand(-1, -1, q_len) + \
                self.att_weight_q(q).permute(0, 2, 1).expand(-1, c_len, -1) + \
                cq

            # (batch, c_len, q_len)
            a = F.softmax(s, dim=2)
            # (batch, c_len, q_len) * (batch, q_len, hidden_size * 2) -> (batch, c_len, hidden_size * 2)
            c2q_att = torch.bmm(a, q)
            # (batch, 1, c_len)
            b = F.softmax(torch.max(s, dim=2)[0], dim=1).unsqueeze(1)
            # (batch, 1, c_len) * (batch, c_len, hidden_size * 2) -> (batch, hidden_size * 2)
            q2c_att = torch.bmm(b, c).squeeze()
            # (batch, c_len, hidden_size * 2) (tiled)
            q2c_att = q2c_att.unsqueeze(1).expand(-1, c_len, -1)
            # q2c_att = torch.stack([q2c_att] * c_len, dim=1)

            # (batch, c_len, hidden_size * 8)
            x = torch.cat([c, c2q_att, c * c2q_att, c * q2c_att], dim=-1)
            return x

        def output_layer(g, m, l):
            """
            :param g: (batch, c_len, hidden_size * 8)
            :param m: (batch, c_len ,hidden_size * 2)
            :return: p1: (batch, c_len), p2: (batch, c_len)
            """
            # (batch, c_len)
            p1 = (self.p1_weight_g(g) + self.p1_weight_m(m)).squeeze()
            # (batch, c_len, hidden_size * 2)
            m2 = self.output_LSTM((m, l))[0]
            # (batch, c_len)
            p2 = (self.p2_weight_g(g) + self.p2_weight_m(m2)).squeeze()

            return p1, p2

        # 1. Character Embedding Layer
        c_char = char_emb_layer(batch.c_char)
        q_char = char_emb_layer(batch.q_char)
        # 2. Word Embedding Layer
        c_word = self.word_emb(batch.c_word[0])
        q_word = self.word_emb(batch.q_word[0])
        c_lens = batch.c_word[1]
        q_lens = batch.q_word[1]

        # Highway network
        c = highway_network(c_char, c_word)
        q = highway_network(q_char, q_word)
        # 3. Contextual Embedding Layer
        c = self.context_LSTM((c, c_lens))[0]
        q = self.context_LSTM((q, q_lens))[0]
        # 4. Attention Flow Layer
        g = att_flow_layer(c, q)
        # 5. Modeling Layer
        m = self.modeling_LSTM2((self.modeling_LSTM1((g, c_lens))[0], c_lens))[0]
        # 6. Output Layer
        p1, p2 = output_layer(g, m, c_lens)

        # (batch, c_len), (batch, c_len)
        return p1, p2

# Evaluate

In [0]:
from __future__ import print_function
from collections import Counter
import string
import re
import argparse
import json
import sys


def normalize_answer(s):
    """Lower text and remove punctuation, articles and extra whitespace."""

    def remove_articles(text):
        return re.sub(r'\b(a|an|the)\b', ' ', text)

    def white_space_fix(text):
        return ' '.join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return ''.join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s))))


def f1_score(prediction, ground_truth):
    prediction_tokens = normalize_answer(prediction).split()
    ground_truth_tokens = normalize_answer(ground_truth).split()
    common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
    num_same = sum(common.values())
    if num_same == 0:
        return 0
    precision = 1.0 * num_same / len(prediction_tokens)
    recall = 1.0 * num_same / len(ground_truth_tokens)
    f1 = (2 * precision * recall) / (precision + recall)
    return f1


def exact_match_score(prediction, ground_truth):
    return (normalize_answer(prediction) == normalize_answer(ground_truth))


def evaluate(question_file, predictions):
    f1 = exact_match = total = 0
    print(question_file)
    with open(question_file) as question_f:
        no_lines_to_read = 300;
        no_lines_read = 0;
        question_line = question_f.readline()
        while(question_line):
          no_lines_read+=1
          if(no_lines_read>no_lines_to_read):
            break
          question_json_line = json.loads(question_line)
          question_line = question_f.readline()
          total += 1
          if question_json_line['uid'] not in predictions:
              message = 'Unanswered question ' + question_json_line['uid'] + \
                        ' will receive score 0.'
              print(message, file=sys.stderr)
              continue
          ground_truth = question_json_line['answer']
          prediction = predictions[question_json_line['uid']]
          exact_match += exact_match_score(prediction, ground_truth)
          f1 += f1_score(prediction, ground_truth)

    exact_match = 100.0 * exact_match / total
    f1 = 100.0 * f1 / total

    return {'exact_match': exact_match, 'f1': f1}

def main(question_file, prediction_file):
    with open(prediction_file) as prediction_file:
        predictions = json.load(prediction_file)

    results = evaluate(question_file, predictions)
    #print(json.dumps(results))
    return results

# Train

In [26]:
!pip install tensorboardX



In [0]:
import copy, json, os

import torch
from torch import nn, optim
from tensorboardX import SummaryWriter
from time import gmtime, strftime


def train(char_vocab_size, char_channel_size, char_dim, char_channel_width, word_dim, hidden_size, dropout, print_freq, learning_rate, model_time, epoch, questions_file, prediction_file, path, data):
    device = torch.device(f"cuda" if torch.cuda.is_available() else "cpu")
    model = BiDAF(char_vocab_size, char_channel_size, char_dim, char_channel_width, word_dim, hidden_size, dropout, data.WORD.vocab.vectors).to(device)

    
    parameters = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = optim.Adadelta(parameters, lr=learning_rate)
    criterion = nn.CrossEntropyLoss()

    writer = SummaryWriter(log_dir=path+'runs/' + model_time)

    model.train()
    loss, last_epoch = 0, -1
    max_dev_exact, max_dev_f1 = -1, -1

    iterator = data.train_iter
    for i, batch in enumerate(iterator):
        present_epoch = int(iterator.epoch)
        if present_epoch == epoch:
            break
        if present_epoch > last_epoch:
            print('epoch:', present_epoch + 1)
        last_epoch = present_epoch

        p1, p2 = model(batch)

        optimizer.zero_grad()
        batch_loss = criterion(p1, batch.s_idx) + criterion(p2, batch.e_idx)
        loss += batch_loss.item()
        batch_loss.backward()
        optimizer.step()

        if (i + 1) % print_freq == 0:
            dev_loss, dev_exact, dev_f1 = test(model, questions_file, prediction_file, data)
            c = (i + 1) // print_freq

            writer.add_scalar('loss/train', loss, c)
            writer.add_scalar('loss/dev', dev_loss, c)
            writer.add_scalar('exact_match/dev', dev_exact, c)
            writer.add_scalar('f1/dev', dev_f1, c)
            print(f'train loss: {loss:.3f} / dev loss: {dev_loss:.3f}'
                  f' / dev EM: {dev_exact:.3f} / dev F1: {dev_f1:.3f}')

            if dev_f1 > max_dev_f1:
                max_dev_f1 = dev_f1
                max_dev_exact = dev_exact
                best_model = copy.deepcopy(model)

            loss = 0
            model.train()

    writer.close()
    print(f'max dev EM: {max_dev_exact:.3f} / max dev F1: {max_dev_f1:.3f}')

    return best_model

    
def test(model, questions_file, prediction_file, data):
    device = torch.device(f"cuda" if torch.cuda.is_available() else "cpu")
    criterion = nn.CrossEntropyLoss()
    loss = 0
    answers = dict()
    model.eval()

    with torch.set_grad_enabled(False):
        for batch in iter(data.dev_iter):
            p1, p2 = model(batch)
            batch_loss = criterion(p1, batch.s_idx) + criterion(p2, batch.e_idx)
            loss += batch_loss.item()

            # (batch, c_len, c_len)
            batch_size, c_len = p1.size()
            ls = nn.LogSoftmax(dim=1)
            mask = (torch.ones(c_len, c_len) * float('-inf')).to(device).tril(-1).unsqueeze(0).expand(batch_size, -1, -1)
            score = (ls(p1).unsqueeze(2) + ls(p2).unsqueeze(1)) + mask
            score, s_idx = score.max(dim=1)
            score, e_idx = score.max(dim=1)
            s_idx = torch.gather(s_idx, 1, e_idx.view(-1, 1)).squeeze()

            for i in range(batch_size):
                id = batch.id[i]
                answer = batch.c_word[0][i][s_idx[i]:e_idx[i]+1]
                answer = ' '.join([data.WORD.vocab.itos[idx] for idx in answer])
                answers[id] = answer

   
    with open(prediction_file, 'w', encoding='utf-8') as f:
        print(json.dumps(answers), file=f)

    results = evaluate(questions_file, answers)
    return loss, results['exact_match'], results['f1']


# Init

In [0]:
CHAR_DIM=8
CHAR_CHANNEL_WIDTH=5
CHANNEL_SIZE=100
CONTEXT_THRESHOLD=400
WORD_DIM=100
HIDDEN_SIZE=100
DROPOUT=0
PRINT_FREQ=50
LEARNING_RATE=0.5
MODEL_TIME=strftime('%H:%M:%S', gmtime())
EPOCH=12
PREDICTION_FILE='drive/My Drive/IIIT lectures/NLP Applications/Project/QUASAR/quasar-t/prediction_file.out'
QUESTIONS_FILE='drive/My Drive/IIIT lectures/NLP Applications/Project/QUASAR/quasar-t/questions/test_questions.json'
PATH='drive/My Drive/IIIT lectures/NLP Applications/Project/QUASAR/quasar-t/'
DEV_QUESTIONS_FILE='drive/My Drive/IIIT lectures/NLP Applications/Project/QUASAR/quasar-t/questions/'
DEV_CONTEXT_FILE='drive/My Drive/IIIT lectures/NLP Applications/Project/QUASAR/quasar-t/'
TEST_QUESTIONS_FILE='drive/My Drive/IIIT lectures/NLP Applications/Project/QUASAR/quasar-t/questions/'
TEST_CONTEXT_FILE='drive/My Drive/IIIT lectures/NLP Applications/Project/QUASAR/quasar-t/'

In [29]:
print('Loading Data')
data_q=QUASAR()
print('data loaded')
CHAR_VOCAB_SIZE=len(data_q.CHAR.vocab)
print(CHAR_VOCAB_SIZE)
WORD_VOCAB_SIZE=len(data_q.WORD.vocab)
print(WORD_VOCAB_SIZE)

Loading Data
preprocessing data files...
loading splits...
6346
building vocab...
building iterators...
635
625
data loaded
69
215254


# Start

In [0]:
print('start training')
best_model = train(CHAR_VOCAB_SIZE, CHANNEL_SIZE,CHAR_DIM, CHAR_CHANNEL_WIDTH, WORD_DIM, HIDDEN_SIZE, DROPOUT, PRINT_FREQ, LEARNING_RATE, MODEL_TIME, EPOCH, QUESTIONS_FILE, PREDICTION_FILE, PATH, data_q)
if not os.path.exists(PATH+'saved_models'):
        os.makedirs(PATH+'saved_models')
torch.save(best_model.state_dict(), f'{PATH}saved_models/QUASAR_{MODEL_TIME}.pt')
print('training finished!')