In [21]:
!pip install pytorch_pretrained_bert



In [22]:
import os
import re
import sys
import math
import logging
import pdb
import json
import random
from time import time
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import collections
import argparse
from glob import glob
from torch.autograd import Variable
from torch.utils.data import Dataset
from torch import optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from pytorch_pretrained_bert.optimization import BertAdam
from tensorboardX import SummaryWriter
from gensim import models
from collections import OrderedDict
from transformers import BertModel, BertTokenizer, RobertaModel, RobertaTokenizer, AdamW

from sympy import Eq, solve
from sympy.parsing.sympy_parser import parse_expr
import sympy as sp

import warnings
warnings.filterwarnings("ignore")

# from attrdict import AttrDict
import unicodedata
try:
	import cPickle as pickle
except ImportError:
	import pickle


## Components

In [31]:
##################################################
# Attention.py #
##################################################

# Luong attention layer
class Attn(nn.Module):
	def __init__(self, method, hidden_size):
		super(Attn, self).__init__()
		self.method = method
		if self.method not in ['dot', 'general', 'concat']:
			raise ValueError(self.method, "is not an appropriate attention method.")
		self.hidden_size = hidden_size
		if self.method == 'general':
			self.attn = nn.Linear(self.hidden_size, hidden_size)
		elif self.method == 'concat':
			self.attn = nn.Linear(self.hidden_size * 2, hidden_size)
			self.v = nn.Parameter(torch.FloatTensor(1, hidden_size))

	def dot_score(self, hidden, encoder_outputs):
		return torch.sum(hidden * encoder_outputs, dim=2)

	def general_score(self, hidden, encoder_outputs):
		energy = self.attn(encoder_outputs)
		return torch.sum(hidden * energy, dim=2)

	def concat_score(self, hidden, encoder_outputs):
		energy = self.attn(torch.cat((hidden.expand(encoder_outputs.size(0), -1, -1), encoder_outputs), 2)).tanh()
		return torch.sum(self.v * energy, dim=2)

	def forward(self, hidden, encoder_outputs):
		# Calculate the attention weights (energies) based on the given method
		if self.method == 'general':
			attn_energies = self.general_score(hidden, encoder_outputs)
		elif self.method == 'concat':
			attn_energies = self.concat_score(hidden, encoder_outputs)
		elif self.method == 'dot':
			attn_energies = self.dot_score(hidden, encoder_outputs)

		# Transpose max_length and batch_size dimensions
		attn_energies = attn_energies.t()

		# Return the softmax normalized probability scores (with added dimension)
		return F.softmax(attn_energies, dim=1).unsqueeze(1)

class LuongAttnDecoderRNN(nn.Module):
	def __init__(self, attn_model, embedding, cell_type, hidden_size, output_size, nlayers=1, dropout=0.1):
		super(LuongAttnDecoderRNN, self).__init__()

		# Keep for reference
		self.attn_model 	= attn_model
		self.hidden_size 	= hidden_size
		self.output_size 	= output_size
		self.nlayers 		= nlayers
		self.dropout 		= dropout
		self.cell_type 		= cell_type

		# Define layers
		self.embedding = embedding
		self.embedding_size  = self.embedding.embedding_dim
		self.embedding_dropout = nn.Dropout(self.dropout)
		if self.cell_type == 'gru':
			self.rnn = nn.GRU(self.embedding_size, self.hidden_size, self.nlayers, dropout=(0 if self.nlayers == 1 else self.dropout))
		else:
			self.rnn = nn.LSTM(self.embedding_size, self.hidden_size, self.nlayers, dropout=(0 if self.nlayers == 1 else self.dropout))
		self.concat = nn.Linear(self.hidden_size * 2, self.hidden_size)
		self.out = nn.Linear(self.hidden_size, self.output_size)

		self.attn = Attn(self.attn_model, self.hidden_size)

	def forward(self, input_step, last_hidden, encoder_outputs):
		# Note: we run this one step (word) at a time
		# Get embedding of current input word
		embedded = self.embedding(input_step)
		embedded = self.embedding_dropout(embedded)

		try:
			embedded = embedded.view(1, input_step.size(0), self.embedding_size)
		except:
			embedded = embedded.view(1, 1, self.embedding_size)

		rnn_output, hidden = self.rnn(embedded, last_hidden)
		# Calculate attention weights from the current GRU output
		attn_weights = self.attn(rnn_output, encoder_outputs)
		# Multiply attention weights to encoder outputs to get new "weighted sum" context vector
		context = attn_weights.bmm(encoder_outputs.transpose(0, 1))
		# Concatenate weighted context vector and GRU output using Luong eq. 5
		rnn_output = rnn_output.squeeze(0)
		context = context.squeeze(1)
		concat_input = torch.cat((rnn_output, context), 1)
		concat_output = F.relu(self.concat(concat_input))
		representation = concat_output
		# Predict next word using Luong eq. 6
		output = self.out(concat_output)
		output = F.log_softmax(output, dim=1)
		# Return output and final hidden state
		return output, hidden, attn_weights, representation
    
    
##################################################
# Contextual embeddings.py #
##################################################

class BertEncoder(nn.Module):
	def __init__(self, bert_model = 'bert-base-uncased',device = 'cuda:0 ', freeze_bert = False):
		super(BertEncoder, self).__init__()
		self.bert_layer = BertModel.from_pretrained(bert_model)
		self.bert_tokenizer = BertTokenizer.from_pretrained(bert_model)
		self.device = device
		
		if freeze_bert:
			for p in self.bert_layer.parameters():
				p.requires_grad = False
		
	def bertify_input(self, sentences):
		'''
		Preprocess the input sentences using bert tokenizer and converts them to a torch tensor containing token ids

		'''
		#Tokenize the input sentences for feeding into BERT
		all_tokens  = [['[CLS]'] + self.bert_tokenizer.tokenize(sentence) + ['[SEP]'] for sentence in sentences]
		
		#Pad all the sentences to a maximum length
		input_lengths = [len(tokens) for tokens in all_tokens]
		max_length    = max(input_lengths)
		padded_tokens = [tokens + ['[PAD]' for _ in range(max_length - len(tokens))] for tokens in all_tokens]

		#Convert tokens to token ids
		token_ids = torch.tensor([self.bert_tokenizer.convert_tokens_to_ids(tokens) for tokens in padded_tokens]).to(self.device)

		#Obtain attention masks
		pad_token = self.bert_tokenizer.convert_tokens_to_ids('[PAD]')
		attn_masks = (token_ids != pad_token).long()

		return token_ids, attn_masks, input_lengths

	# def forward(self, sentences):
	# 	'''
	# 	Feed the batch of sentences to a BERT encoder to obtain contextualized representations of each token
	# 	'''
	# 	#Preprocess sentences
	# 	token_ids, attn_masks, input_lengths = self.bertify_input(sentences)

	# 	#Feed through bert
	# 	cont_reps, _ = self.bert_layer(token_ids, attention_mask = attn_masks)

	# 	return cont_reps, input_lengths

	def forward(self, sentences):
		'''
		Feed the batch of sentences to a BERT encoder to obtain contextualized representations of each token
		'''
		#Preprocess sentences
		token_ids, attn_masks, input_lengths = self.bertify_input(sentences)

		#Feed through bert
		# cont_reps, _ = self.bert_layer(token_ids, attention_mask = attn_masks)
		output = self.bert_layer(token_ids, attention_mask = attn_masks)
		cont_reps = output.last_hidden_state

		return cont_reps, input_lengths

class RobertaEncoder(nn.Module):
	def __init__(self, roberta_model = 'roberta-base', device = 'cuda:0 ', freeze_roberta = False):
		super(RobertaEncoder, self).__init__()
		self.roberta_layer = RobertaModel.from_pretrained(roberta_model)
		self.roberta_tokenizer = RobertaTokenizer.from_pretrained(roberta_model)
		self.device = device
		
		if freeze_roberta:
			for p in self.roberta_layer.parameters():
				p.requires_grad = False
		
	def robertify_input(self, sentences):
		'''
		Preprocess the input sentences using roberta tokenizer and converts them to a torch tensor containing token ids

		'''
		# Tokenize the input sentences for feeding into RoBERTa
		all_tokens  = [['<s>'] + self.roberta_tokenizer.tokenize(sentence) + ['</s>'] for sentence in sentences]
		
		# Pad all the sentences to a maximum length
		input_lengths = [len(tokens) for tokens in all_tokens]
		max_length    = max(input_lengths)
		padded_tokens = [tokens + ['<pad>' for _ in range(max_length - len(tokens))] for tokens in all_tokens]

		# Convert tokens to token ids
		token_ids = torch.tensor([self.roberta_tokenizer.convert_tokens_to_ids(tokens) for tokens in padded_tokens]).to(self.device)

		# Obtain attention masks
		pad_token = self.roberta_tokenizer.convert_tokens_to_ids('<pad>')
		attn_masks = (token_ids != pad_token).long()

		return token_ids, attn_masks, input_lengths

	def forward(self, sentences):
		'''
		Feed the batch of sentences to a RoBERTa encoder to obtain contextualized representations of each token
		'''
		# Preprocess sentences
		token_ids, attn_masks, input_lengths = self.robertify_input(sentences)

		# Feed through RoBERTa
		output = self.roberta_layer(token_ids, attention_mask = attn_masks)
        
		cont_reps = output.last_hidden_state

		return cont_reps, input_lengths
    
##################################################
# Decoder.py #
##################################################

class DecoderRNN(nn.Module):
	'''
	To DO
	Encoder helps in building the sentence encoding module for a batched version
	of data that is sent in [T x B] having corresponding input lengths in [1 x B]

	Args:
			hidden_size: Hidden size of the RNN cell
			embedding: Embeddings matrix [vocab_size, embedding_dim]
			cell_type: Type of RNN cell to be used : LSTM, GRU
			nlayers: Number of layers of LSTM (default = 1)
			dropout: Dropout Rate (default = 0.1)
			bidirectional: Bidirectional model to be formed (default: False)
	'''
	def __init__(self, embedding, cell_type, hidden_size, output_size, nlayers=1, dropout=0.2):
		super(DecoderRNN, self).__init__()
		self.hidden_size        = hidden_size
		self.cell_type          = cell_type
		self.embedding          = embedding
		self.embedding_size     = self.embedding.embedding_dim
		self.embedding_dropout = nn.Dropout(dropout)
		self.nlayers            = nlayers
		self.output_size        = output_size

		if self.cell_type == 'lstm':
			self.rnn = nn.LSTM(self.embedding_size, self.hidden_size, num_layers=self.nlayers, dropout=(0 if nlayers == 1 else dropout))
		else:
			self.rnn = nn.GRU(self.embedding_size, self.hidden_size, num_layers=self.nlayers, dropout=(0 if nlayers == 1 else dropout))

		self.out     = nn.Linear(self.hidden_size, self.output_size)


	def forward(self, input_step, last_hidden):
		'''
		To Do
			Args:
				input_seqs (tensor) : input tensor | size : [Seq_len X Batch_size]
				input_lengths (list/tensor) : length of each input sentence | size : [Batch_size] 
				device (gpu) : Used for sorting the sentences and putting it to device

			Returns:
				output (tensor) : Last State representations of RNN [Seq_len X Batch_size X hidden_size]
				hidden (tuple)	: Hidden states and (cell states) of recurrent networks
		'''
		output              = self.embedding(input_step)
		output              = self.embedding_dropout(output)
		output              = output.view(1, input_step.size(0), self.embedding_size)
		output              = F.relu(output)
		output, last_hidden = self.rnn(output, last_hidden)
		output              = output.squeeze(0)
		output              = self.out(output)
		output              = F.log_softmax(output, dim=1)

		return output, last_hidden

##################################################
# Encoder.py #
##################################################

class Encoder(nn.Module):
	'''
	Encoder helps in building the sentence encoding module for a batched version
	of data that is sent in [T x B] having corresponding input lengths in [1 x B]

	Args:
			hidden_size: Hidden size of the RNN cell
			embedding: Embeddings matrix [vocab_size, embedding_dim]
			cell_type: Type of RNN cell to be used : LSTM, GRU
			nlayers: Number of layers of LSTM (default = 1)
			dropout: Dropout Rate (default = 0.1)
			bidirectional: Bidirectional model to be formed (default: False)
	'''

	def __init__(self, hidden_size=512,embedding_size = 768, cell_type='lstm', nlayers=1, dropout=0.1, bidirectional=True):
		super(Encoder, self).__init__()
		self.hidden_size = hidden_size
		self.nlayers = nlayers
		self.dropout = dropout
		self.cell_type = cell_type
		self.embedding_size = embedding_size
		# self.embedding_size = self.embedding.embedding_dim
		self.bidirectional = bidirectional

		if self.cell_type == 'lstm':
			self.rnn = nn.LSTM(self.embedding_size, self.hidden_size,
							   num_layers=self.nlayers,
							   dropout=(0 if self.nlayers == 1 else dropout),
							   bidirectional=bidirectional)
		elif self.cell_type == 'gru':
			self.rnn = nn.GRU(self.embedding_size, self.hidden_size,
							  num_layers=self.nlayers,
							  dropout=(0 if self.nlayers == 1 else dropout),
							  bidirectional=bidirectional)
		else:
			self.rnn = nn.RNN(self.embedding_size, self.hidden_size,
							  num_layers=self.nlayers,
							  nonlinearity='tanh',							# ['relu', 'tanh']
							  dropout=(0 if self.nlayers == 1 else dropout),
							  bidirectional=bidirectional)

	def forward(self, sorted_seqs, sorted_len, orig_idx, device=None, hidden=None):
		'''
			Args:
				input_seqs (tensor) : input tensor | size : [Seq_len X Batch_size]
				input_lengths (list/tensor) : length of each input sentence | size : [Batch_size] 
				device (gpu) : Used for sorting the sentences and putting it to device

			Returns:
				output (tensor) : Last State representations of RNN [Seq_len X Batch_size X hidden_size]
				hidden (tuple)	: Hidden states and (cell states) of recurrent networks
		'''

		# sorted_seqs, sorted_len, orig_idx = sort_by_len(input_seqs, input_lengths, device)
		# pdb.set_trace()

		#embedded = self.embedding(sorted_seqs)  ### NO MORE IDS
		packed = torch.nn.utils.rnn.pack_padded_sequence(
			sorted_seqs, sorted_len)
		outputs, hidden = self.rnn(packed, hidden)
		outputs, output_lengths = torch.nn.utils.rnn.pad_packed_sequence(
			outputs)  # unpack (back to padded)

		outputs = outputs.index_select(1, orig_idx)

		if self.bidirectional:
			outputs = outputs[:, :, :self.hidden_size] + outputs[:, : ,self.hidden_size:] # Sum bidirectional outputs

		return outputs, hidden


## args.py

In [32]:
def build_parser():
	# Data loading parameters
	parser = argparse.ArgumentParser(description='Run Single sequence model')

	# Mode specifications
	parser.add_argument('-mode', type=str, default='train', choices=['train', 'test', 'conf'], help='Modes: train, test, conf')
	parser.add_argument('-debug', dest='debug', action='store_true', help='Operate in debug mode')
	parser.add_argument('-no-debug', dest='debug', action='store_false', help='Operate in normal mode')
	parser.set_defaults(debug=False)

	# Run Config
	parser.add_argument('-run_name', type=str, default='debug', help='run name for logs')
	parser.add_argument('-dataset', type=str, default='mawps', help='Dataset')
	parser.add_argument('-display_freq', type=int, default= 10000, help='number of batches after which to display samples')
	parser.add_argument('-outputs', dest='outputs', action='store_true', help='Show full validation outputs')
	parser.add_argument('-no-outputs', dest='outputs', action='store_false', help='Do not show full validation outputs')
	parser.set_defaults(outputs=True)
	parser.add_argument('-results', dest='results', action='store_true', help='Store results')
	parser.add_argument('-no-results', dest='results', action='store_false', help='Do not store results')
	parser.set_defaults(results=True)

	# Meta Attributes
	parser.add_argument('-vocab_size', type=int, default=30000, help='Vocabulary size to consider')
	parser.add_argument('-histogram', dest='histogram', action='store_true', help='Operate in debug mode')
	parser.add_argument('-no-histogram', dest='histogram', action='store_false', help='Operate in normal mode')
	parser.set_defaults(histogram=True)
	parser.add_argument('-save_writer', dest='save_writer',action='store_true', help='To write tensorboard')
	parser.add_argument('-no-save_writer', dest='save_writer', action='store_false', help='Dont write tensorboard')
	parser.set_defaults(save_writer=False)

	# Device Configuration
	parser.add_argument('-gpu', type=int, default=2, help='Specify the gpu to use')
	parser.add_argument('-early_stopping', type=int, default=50, help='Early Stopping after n epoch')
	parser.add_argument('-seed', type=int, default=6174, help='Default seed to set')
	parser.add_argument('-logging', type=int, default=1, help='Set to 0 if you do not require logging')
	parser.add_argument('-ckpt', type=str, default='model', help='Checkpoint file name')
	parser.add_argument('-save_model', dest='save_model',action='store_true', help='To save the model')
	parser.add_argument('-no-save_model', dest='save_model', action='store_false', help='Dont save the model')
	parser.set_defaults(save_model=False)
	# parser.add_argument('-log_fmt', type=str, default='%(asctime)s | %(levelname)s | %(name)s | %(message)s', help='Specify format of the logger')

	# LSTM parameters
	parser.add_argument('-emb2_size', type=int, default=16, help='Embedding dimensions of inputs')
	parser.add_argument('-cell_type', type=str, default='lstm', help='RNN cell for encoder and decoder, default: lstm')

	parser.add_argument('-use_attn', dest='use_attn',action='store_true', help='To use attention mechanism?')
	parser.add_argument('-no-attn', dest='use_attn', action='store_false', help='Not to use attention mechanism?')
	parser.set_defaults(use_attn=True)

	parser.add_argument('-attn_type', type=str, default='general', help='Attention mechanism: (general, concat), default: general')
	parser.add_argument('-hidden_size', type=int, default=256, help='Number of hidden units in each layer')
	parser.add_argument('-depth', type=int, default=1, help='Number of layers in each encoder and decoder')
	parser.add_argument('-dropout', type=float, default=0.1, help= 'Dropout probability for input/output/state units (0.0: no dropout)')
	parser.add_argument('-max_length', type=int, default=100, help='Specify max decode steps: Max length string to output')
	parser.add_argument('-init_range', type=float, default=0.08, help='Initialization range for seq2seq model')
	parser.add_argument('-bidirectional', dest='bidirectional', action='store_true', help='Bidirectionality in LSTMs')
	parser.add_argument('-no-bidirectional', dest='bidirectional', action='store_false', help='Bidirectionality in LSTMs')
	parser.set_defaults(bidirectional=True)
	parser.add_argument('-lr', type=float, default=0.0005, help='Learning rate')
	# parser.add_argument('-bert_lr', type=float, default=5e-5, help='Larning rate to train BERT embeddings')
	parser.add_argument('-warmup', type=float, default=0.1, help='Proportion of training to perform linear learning rate warmup for')
	parser.add_argument('-max_grad_norm', type=float, default=0.25, help='Clip gradients to this norm')
	parser.add_argument('-batch_size', type=int, default=8, help='Batch size')
	parser.add_argument('-epochs', type=int, default=50, help='Maximum # of training epochs')
	parser.add_argument('-opt', type=str, default='adam', choices=['adam', 'adadelta', 'sgd', 'asgd'], help='Optimizer for training')
	parser.add_argument('-separate_opt', dest='separate_opt', action='store_true', help='Separate Optimizers for Embedding and model - AdamW for emb and Adam for model')
	parser.add_argument('-no-separate_opt', dest='separate_opt', action='store_false', help='Common optimizer for Embedding and model')
	parser.set_defaults(separate_opt=False)
	parser.add_argument('-teacher_forcing_ratio', type=float, default=0.9, help='Teacher forcing ratio')

	# Embeddings
	parser.add_argument('-embedding', type=str, default='roberta', choices=['bert', 'roberta', 'word2vec', 'random'], help='Embeddings')
	# parser.add_argument('-use_word2vec', dest='use_word2vec', action='store_true', help='use word2vec')
	# parser.add_argument('-no-use_word2vec', dest='use_word2vec', action='store_false', help='Do not use word2vec')
	# parser.set_defaults(use_word2vec=False)
	# parser.add_argument('-word2vec_bin', type=str, default='/datadrive/satwik/global_data/glove.840B.300d.txt', help='Binary file of word2vec')
	parser.add_argument('-word2vec_bin', type=str, default='/datadrive/global_files/GoogleNews-vectors-negative300.bin', help='Binary file of word2vec')
	# parser.add_argument('-train_word2vec', dest='train_word2vec', action='store_true', help='train word2vec')
	# parser.add_argument('-no-train_word2vec', dest='train_word2vec', action='store_false', help='Do not train word2vec')
	# parser.set_defaults(train_word2vec=True)
	parser.add_argument('-emb1_size', type=int, default=768, help='Embedding dimensions of inputs')
	parser.add_argument('-emb_name', type=str, default='roberta-base', choices=['bert-base-uncased', 'roberta-base'], help='Which pre-trained model')
	# parser.add_argument('-bert_size', type=int, default = 768, help = 'Size of BERT\'s last layer representations')
	parser.add_argument('-emb_lr', type=float, default=1e-5, help='Larning rate to train embeddings')
	parser.add_argument('-freeze_emb', dest='freeze_emb', action='store_true', help='Freeze embedding weights')
	parser.add_argument('-no-freeze_emb', dest='freeze_emb', action='store_false', help='Train embedding weights')
	parser.set_defaults(freeze_emb=False)

	parser.add_argument('-grade_disp', dest='grade_disp', action='store_true', help='Display grade information in validation outputs')
	parser.add_argument('-no-grade_disp', dest='grade_disp', action='store_false', help='Don\'t display grade information')
	parser.set_defaults(grade_disp=False)
	parser.add_argument('-type_disp', dest='type_disp', action='store_true', help='Display Type information in validation outputs')
	parser.add_argument('-no-type_disp', dest='type_disp', action='store_false', help='Don\'t display Type information')
	parser.set_defaults(type_disp=False)
	parser.add_argument('-challenge_disp', dest='challenge_disp', action='store_true', help='Display information in validation outputs')
	parser.add_argument('-no-challenge_disp', dest='challenge_disp', action='store_false', help='Don\'t display information')
	parser.set_defaults(challenge_disp=False)
	parser.add_argument('-nums_disp', dest='nums_disp', action='store_true', help='Display number of numbers information in validation outputs')
	parser.add_argument('-no-nums_disp', dest='nums_disp', action='store_false', help='Don\'t display number of numbers information')
	parser.set_defaults(nums_disp=True)
	parser.add_argument('-more_nums', dest='more_nums', action='store_true', help='More numbers in Voc2')
	parser.add_argument('-no-more_nums', dest='more_nums', action='store_false', help='Usual numbers in Voc2')
	parser.set_defaults(more_nums=False)
	parser.add_argument('-mawps_vocab', dest='mawps_vocab', action='store_true', help='Custom Numbers in Voc2')
	parser.add_argument('-no-mawps_vocab', dest='mawps_vocab', action='store_false', help='No Custom Numbers in Voc2')
	parser.set_defaults(mawps_vocab=False)

	parser.add_argument('-show_train_acc', dest='show_train_acc', action='store_true', help='Calculate the train accuracy')
	parser.add_argument('-no-show_train_acc', dest='show_train_acc', action='store_false', help='Don\'t calculate the train accuracy')
	parser.set_defaults(show_train_acc=True)

	parser.add_argument('-full_cv', dest='full_cv', action='store_true', help='5-fold CV')
	parser.add_argument('-no-full_cv', dest='full_cv', action='store_false', help='No 5-fold CV')
	parser.set_defaults(full_cv=False)

	#Conf parameters
	parser.add_argument('-conf', type = str, default = 'posterior', choices = ["posterior", "similarity"], help = 'Confidence estimation criteria to use, ["posterior", "similarity"]')
	parser.add_argument('-sim_criteria', type = str, default = 'bleu', choices = ['bert_score', 'bleu_score'], help = 'Only applicable if similarity based criteria is selected for confidence.')
	parser.add_argument('-adv', action = 'store_true', help = 'If dealing with out of distribution examples')
	
	return parser

def parse_arguments(arg_dict=None):
    parser = build_parser()
    if arg_dict:
        # Override default values with provided dictionary values
        args = parser.parse_args([])
        for key, value in arg_dict.items():
            setattr(args, key, value)
        return args
    else:
        return parser.parse_args()  # If no dictionary is provided, use default command line arguments

## Utilities Folder

In [33]:
##################################################
# Bleu.py #
##################################################
"""Python implementation of BLEU and smooth-BLEU.
This module provides a Python implementation of BLEU and smooth-BLEU.
Smooth BLEU is computed following the method outlined in the paper:
Chin-Yew Lin, Franz Josef Och. ORANGE: a method for evaluating automatic
evaluation metrics for machine translation. COLING 2004.
"""

def _get_ngrams(segment, max_order):
    ngram_counts = collections.Counter()
    for order in range(1, max_order + 1):
        for i in range(0, len(segment) - order + 1):
            ngram = tuple(segment[i:i+order])
            ngram_counts[ngram] += 1
    return ngram_counts


def compute_bleu(reference_corpus, translation_corpus, max_order=4,
                 smooth=False):
 
    matches_by_order = [0] * max_order
    possible_matches_by_order = [0] * max_order
    reference_length = 0
    translation_length = 0
    for (references, translation) in zip(reference_corpus,
                                       translation_corpus):
        reference_length += min(len(r) for r in references)
        translation_length += len(translation)

        merged_ref_ngram_counts = collections.Counter()
        for reference in references:
            merged_ref_ngram_counts |= _get_ngrams(reference, max_order)
        translation_ngram_counts = _get_ngrams(translation, max_order)
        overlap = translation_ngram_counts & merged_ref_ngram_counts
        for ngram in overlap:
            matches_by_order[len(ngram)-1] += overlap[ngram]
        for order in range(1, max_order+1):
            possible_matches = len(translation) - order + 1
            if possible_matches > 0:
                possible_matches_by_order[order-1] += possible_matches

    precisions = [0] * max_order
    for i in range(0, max_order):
        if smooth:
            precisions[i] = ((matches_by_order[i] + 1.) /
                           (possible_matches_by_order[i] + 1.))
        else:
            if possible_matches_by_order[i] > 0:
                precisions[i] = (float(matches_by_order[i]) /
                             possible_matches_by_order[i])
            else:
                precisions[i] = 0.0

    if min(precisions) > 0:
        p_log_sum = sum((1. / max_order) * math.log(p) for p in precisions)
        geo_mean = math.exp(p_log_sum)
    else:
        geo_mean = 0

    ratio = float(translation_length) / reference_length

    if ratio > 1.0:
        bp = 1.
    else:
        if ratio > 1E-1:
            bp = math.exp(1 - 1. / ratio)
        else:
            bp = 1E-2

    bleu = geo_mean * bp

    return (bleu, precisions, bp, ratio, translation_length, reference_length)

##################################################
# eq_preprocessing.py #
##################################################


OPS = ['+', '-', '*', '/']

class Node():
    def __init__(self, val):
        self.val    = val
        self.left   = None
        self.right  = None


def preorder(node, prefix = ''):
    if node is None:
        return prefix
    val = node.val
    prefix += val +' '
    prefix = preorder(node.left, prefix)
    prefix = preorder(node.right, prefix)
    return prefix

def expr2tree(string):
    tokens = string.split()
    if len(tokens) == 1:
        return Node(tokens[0])
    i = 0
    while i < len(tokens):
        if tokens[i] in OPS:
            break
        i += 1

    node = Node(tokens[i])
    node.left  = expr2tree(' '.join(tokens[:i]))
    node.right = expr2tree(' '.join(tokens[i+1:])) 
    return node

def infix2prefix(equation):
    tree_root = expr2tree(equation)
    prefix = preorder(tree_root, '')
    return prefix

# if __name__ == "__main__":
#     parser = argparse.ArgumentParser()
#     parser.add_argument('-eqn', required=True, type = str)
#     args = parser.parse_args()

#     print(infix2prefix(args.eqn))

In [34]:
##################################################
# Evaluate.py #
##################################################

"""
EXAMPLE:
prefix1 = '* n0 + + n1 n2 n3'
list_num1 = [13, 9, 10, 3] n0->13, n1->9, n2->10, n3->3
print(ans_evaluator(prefix1, list_num1))
answer: 286
"""

def format_eq(eq):
	fin_eq = ""
	ls = ['0','1','2','3','4','5','6','7','8','9','.']
	temp_num = ""
	flag = 0
	for i in eq:
		if flag > 0:
			fin_eq = fin_eq + i
			flag = flag-1
		elif i == 'n':
			flag = 6
			if fin_eq == "":
				fin_eq = fin_eq + i
			else:
				fin_eq = fin_eq + ' ' + i
		elif i in ls:
			temp_num = temp_num + i
		elif i == ' ':
			if temp_num == "":
				continue
			else:
				if fin_eq == "":
					fin_eq = fin_eq + temp_num
				else:
					fin_eq = fin_eq + ' ' + temp_num
			temp_num = ""
		else:
			if fin_eq == "":
				if temp_num == "":
					fin_eq = fin_eq + i
				else:
					fin_eq = fin_eq + temp_num + ' ' + i
			else:
				if temp_num == "":
					fin_eq = fin_eq + ' ' + i
				else:
					fin_eq = fin_eq + ' ' + temp_num + ' ' + i
			temp_num = ""
	if temp_num != "":
		fin_eq = fin_eq + ' ' + temp_num
	return fin_eq

def prefix_to_infix(prefix):
	operators = ['+', '-', '*', '/']
	stack = []
	elements = format_eq(prefix).split()
	for i in range(len(elements)-1, -1, -1):
		if elements[i] in operators and len(stack)>1:
			op1 = stack.pop(-1)
			op2 = stack.pop(-1)
			fin_operand = '(' + ' ' + op1 + ' ' + elements[i] + ' ' + op2 + ' ' + ')'
			stack.append(fin_operand)
		else:
			stack.append(elements[i])
	try:
		return stack[0]
	except:
		return ""

def stack_to_string(stack):
	op = ""
	for i in stack:
		if op == "":
			op = op + i
		else:
			op = op + ' ' + i
	return op

def back_align(eq, list_num):
	elements = eq.split()
	for i in range(len(elements)):
		if elements[i][0] == 'n':
			index = int(elements[i][6])
			try:
				number = str(list_num[index])
			except:
				return '-1000.112'
			elements[i] = number
	return stack_to_string(elements)    

def ans_evaluator(eq, list_num):
	#pdb.set_trace()
	infix = prefix_to_infix(eq)
	aligned = back_align(infix, list_num)
	try:
		final_ans = parse_expr(aligned, evaluate = True)
	except:
		final_ans = -1000.112
	return final_ans

def cal_score(outputs, nums, ans, eqns):
	corr = 0
	tot = 0
	disp_corr = []
	for i in range(len(outputs)):
		op = stack_to_string(outputs[i])
		if 'NONE' in op:
			if op == eqns[i]:
				corr+=1
				tot+=1
				disp_corr.append(1)
			else:
				tot+=1
				disp_corr.append(0)
		else:
			num = nums[i].split()
			num = [float(nu) for nu in num]
			answer = ans[i].item()

			pred = ans_evaluator(op, num)

			if abs(pred - answer) <= 0.1:
				corr+=1
				tot+=1
				disp_corr.append(1)
			else:
				tot+=1
				disp_corr.append(0)

	return corr, tot, disp_corr

def get_infix_eq(outputs, nums):
	eqs = []
	for i in range(len(outputs)):
		op = stack_to_string(outputs[i])
		num = nums[i].split()
		num = [float(nu) for nu in num]

		infix = prefix_to_infix(op)
		eqs.append(infix)

	return eqs


##################################################
# Helper.py #
##################################################

def gpu_init_pytorch(gpu_num):
	'''
		Initialize GPU
	'''
	torch.cuda.set_device(int(gpu_num))
	device = torch.device("cuda:{}".format(
		gpu_num) if torch.cuda.is_available() else "cpu")
	return device

def create_save_directories(path):
	if not os.path.exists(path):
		os.makedirs(path)

def save_checkpoint(state, epoch, logger, model_path, ckpt):
	'''
		Saves the model state along with epoch number. The name format is important for 
		the load functions. Don't mess with it.

		Args:
			model state
			epoch number
			logger variable
			directory to save models
			checkpoint name
	'''
	ckpt_path = os.path.join(model_path, '{}.pt'.format(ckpt))
	logger.info('Saving Checkpoint at : {}'.format(ckpt_path))
	torch.save(state, ckpt_path)

def get_latest_checkpoint(model_path, logger):
	'''
		Looks for the checkpoint with highest epoch number in the directory "model_path" 

		Args:
			model_path: including the run_name
			logger variable: to log messages
		Returns:
			checkpoint: path to the latest checkpoint 
	'''

	ckpts = glob('{}/*.pt'.format(model_path))
	ckpts = sorted(ckpts)

	if len(ckpts) == 0:
		logger.warning('No Checkpoints Found')

		return None
	else:
		#pdb.set_trace()
		#latest_epoch = max([int(x.split('_')[-1].split('.')[0]) for x in ckpts])
		#ckpts = sorted(ckpts, key= lambda x: int(x.split('_')[-1].split('.')[0]) , reverse=True )
		ckpt_path = ckpts[0]
		#logger.info('Checkpoint found with epoch number : {}'.format(latest_epoch))
		logger.debug('Checkpoint found at : {}'.format(ckpt_path))

		return ckpt_path

def load_checkpoint(config, model, mode, ckpt_path, logger, device):
	checkpoint = torch.load(ckpt_path, map_location=lambda storage, loc: storage)
	model.load_state_dict(checkpoint['model_state_dict'])
	model.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
	if config.separate_opt:
		model.emb_optimizer.load_state_dict(checkpoint['emb_optimizer_state_dict'])
	start_epoch = checkpoint['epoch']
	min_train_loss  =checkpoint['min_train_loss']
	min_val_loss = checkpoint['min_val_loss']
	voc1 = checkpoint['voc1']
	voc2 = checkpoint['voc2']
	max_train_acc = checkpoint['max_train_acc']
	max_val_acc = checkpoint['max_val_acc']
	max_val_bleu = checkpoint['max_val_bleu']
	best_epoch = checkpoint['best_epoch']

	model.to(device)

	if mode == 'train':
		model.train()
	else:
		model.eval()

	logger.info('Successfully Loaded Checkpoint from {}, with epoch number: {} for {}'.format(ckpt_path, start_epoch, mode))

	return start_epoch, min_train_loss, min_val_loss, max_train_acc, max_val_acc, max_val_bleu, best_epoch, voc1, voc2

class Voc1:
	def __init__(self):
		self.trimmed = False
		self.frequented = False
		self.w2id = {'<s>': 0, '</s>': 1, 'unk': 2}
		self.id2w = {0: '<s>', 1: '</s>', 2: 'unk'}
		self.w2c = {}
		self.nwords = 3

	def add_word(self, word):
		if word not in self.w2id:
			self.w2id[word] = self.nwords
			self.id2w[self.nwords] = word
			self.w2c[word] = 1
			self.nwords += 1
		else:
			self.w2c[word] += 1

	def add_sent(self, sent):
		for word in sent.split():
			self.add_word(word)

	def most_frequent(self, topk):
		# if self.frequented == True:
		# 	return
		# self.frequented = True

		keep_words = []
		count = 3
		sort_by_value = sorted(
			self.w2c.items(), key=lambda kv: kv[1], reverse=True)
		for word, freq in sort_by_value:
			keep_words += [word]*freq
			count += 1
			if count == topk:
				break

		self.w2id = {'<s>': 0, '</s>': 1, 'unk': 2}
		self.id2w = {0: '<s>', 1: '</s>', 2: 'unk'}
		self.w2c = {}
		self.nwords = 3

		for word in keep_words:
			self.add_word(word)

	def trim(self, mincount):
		if self.trimmed == True:
			return
		self.trimmed = True

		keep_words = []
		for k, v in self.w2c.items():
			if v >= mincount:
				keep_words += [k]*v

		self.w2id = {'<s>': 0, '</s>': 1, 'unk': 2}
		self.id2w = {0: '<s>', 1: '</s>', 2: 'unk'}
		self.w2c = {}
		self.nwords = 3
		for word in keep_words:
			self.addWord(word)

	def get_id(self, idx):
		return self.w2id[idx]

	def get_word(self, idx):
		return self.id2w[idx]

	def create_vocab_dict(self, args, train_dataloader):
		for data in train_dataloader:
			for sent in data['ques']:
				self.add_sent(sent)

		self.most_frequent(args.vocab_size)
		assert len(self.w2id) == self.nwords
		assert len(self.id2w) == self.nwords

	def add_to_vocab_dict(self, args, dataloader):
		for data in dataloader:
			for sent in data['ques']:
				self.add_sent(sent)

		self.most_frequent(args.vocab_size)
		assert len(self.w2id) == self.nwords
		assert len(self.id2w) == self.nwords

class Voc2:
	def __init__(self, config):
		self.frequented = False
		if config.more_nums:
			self.w2id = {'<s>': 0, '</s>': 1, '+': 2, '-': 3, '*': 4, '/': 5, 'number0': 6, 'number1': 7, 'number2': 8, 'number3': 9, 'number4': 10, 'number5': 11, 'number6': 12, 'number7': 13, 'number8': 14, 'number9': 15, 'number10': 16, 'number11': 17}
			self.id2w = {0: '<s>', 1: '</s>', 2: '+', 3: '-', 4: '*', 5: '/', 6: 'number0', 7: 'number1', 8: 'number2', 9: 'number3', 10: 'number4', 11: 'number5', 12: 'number6', 13: 'number7', 14: 'number8', 15: 'number9', 16: 'number10', 17: 'number11'}
			self.w2c = {'+': 0, '-': 0, '*': 0, '/': 0, 'number0': 0, 'number1': 0, 'number2': 0, 'number3': 0, 'number4': 0, 'number5': 0, 'number6': 0, 'number7': 0, 'number8': 0, 'number9': 0, 'number10': 0, 'number11': 0}
			self.nwords = 18
		elif config.mawps_vocab:
			# '0.25', '8.0', '0.05', '60.0', '7.0', '5.0', '2.0', '4.0', '1.0', '12.0', '100.0', '25.0', '0.1', '3.0', '0.01', '0.5', '10.0'
			self.w2id = {'<s>': 0, '</s>': 1, '+': 2, '-': 3, '*': 4, '/': 5, 'number0': 6, 'number1': 7, 'number2': 8, 'number3': 9, 'number4': 10, '0.25': 11, '8.0': 12, '0.05': 13, '60.0': 14, '7.0': 15, '5.0': 16, '2.0': 17, '4.0': 18, '1.0': 19, '12.0': 20, '100.0': 21, '25.0': 22, '0.1': 23, '3.0': 24, '0.01': 25, '0.5': 26, '10.0': 27}
			self.id2w = {0: '<s>', 1: '</s>', 2: '+', 3: '-', 4: '*', 5: '/', 6: 'number0', 7: 'number1', 8: 'number2', 9: 'number3', 10: 'number4', 11: '0.25', 12: '8.0', 13: '0.05', 14: '60.0', 15: '7.0', 16: '5.0', 17: '2.0', 18: '4.0', 19: '1.0', 20: '12.0', 21: '100.0', 22: '25.0', 23: '0.1', 24: '3.0', 25: '0.01', 26: '0.5', 27: '10.0'}
			self.w2c = {'+': 0, '-': 0, '*': 0, '/': 0, 'number0': 0, 'number1': 0, 'number2': 0, 'number3': 0, 'number4': 0, '0.25': 0, '8.0': 0, '0.05': 0, '60.0': 0, '7.0': 0, '5.0': 0, '2.0': 0, '4.0': 0, '1.0': 0, '12.0': 0, '100.0': 0, '25.0': 0, '0.1': 0, '3.0': 0, '0.01': 0, '0.5': 0, '10.0': 0}
			self.nwords = 28
		else:
			self.w2id = {'<s>': 0, '</s>': 1, '+': 2, '-': 3, '*': 4, '/': 5, 'number0': 6, 'number1': 7, 'number2': 8, 'number3': 9, 'number4': 10}
			self.id2w = {0: '<s>', 1: '</s>', 2: '+', 3: '-', 4: '*', 5: '/', 6: 'number0', 7: 'number1', 8: 'number2', 9: 'number3', 10: 'number4'}
			self.w2c = {'+': 0, '-': 0, '*': 0, '/': 0, 'number0': 0, 'number1': 0, 'number2': 0, 'number3': 0, 'number4': 0}
			self.nwords = 11

	def add_word(self, word):
		if word not in self.w2id: # IT SHOULD NEVER GO HERE!!
			self.w2id[word] = self.nwords
			self.id2w[self.nwords] = word
			self.w2c[word] = 1
			self.nwords += 1
		else:
			self.w2c[word] += 1

	def add_sent(self, sent):
		for word in sent.split():
			self.add_word(word)

	def get_id(self, idx):
		return self.w2id[idx]

	def get_word(self, idx):
		return self.id2w[idx]

	def create_vocab_dict(self, args, train_dataloader):
		for data in train_dataloader:
			for sent in data['eqn']:
				self.add_sent(sent)

		assert len(self.w2id) == self.nwords
		assert len(self.id2w) == self.nwords

	def add_to_vocab_dict(self, args, dataloader):
		for data in dataloader:
			for sent in data['eqn']:
				self.add_sent(sent)

		assert len(self.w2id) == self.nwords
		assert len(self.id2w) == self.nwords

def bleu_scorer(ref, hyp, script='default'):
    refsend = []
    for i in range(len(ref)):
        refsi = []
        for j in range(len(ref[i])):
            refsi.append(ref[i][j].split())
        refsend.append(refsi)

    gensend = []
    for i in range(len(hyp)):
        gensend.append(hyp[i].split())

    if script == 'nltk':
        metrics = corpus_bleu(refsend, gensend)
        return [metrics]

    metrics = compute_bleu(refsend, gensend)
    return metrics
    
    
##################################################
# Logger.py #
##################################################

'''Logging Modules'''

def get_logger(name, log_file_path='./logs/temp.log', logging_level=logging.INFO, log_format='%(asctime)s | %(levelname)s | %(filename)s: %(lineno)s : %(funcName)s() ::\t %(message)s'):
	logger = logging.getLogger(name)
	logger.setLevel(logging_level)
	formatter = logging.Formatter(log_format)

	file_handler = logging.FileHandler(log_file_path, mode='w')
	file_handler.setLevel(logging_level)
	file_handler.setFormatter(formatter)

	stream_handler = logging.StreamHandler()
	stream_handler.setLevel(logging_level)
	stream_handler.setFormatter(formatter)

	logger.addHandler(file_handler)
	logger.addHandler(stream_handler)

	return logger

def print_log(logger, dict):
	string = ''
	for key, value in dict.items():
		string += '\n {}: {}\t'.format(key.replace('_', ' '), value)
	logger.info(string)

def store_results(config, max_val_bleu, max_val_acc, min_val_loss, max_train_acc, min_train_loss, best_epoch):
	try:
		with open(config.result_path) as f:
			res_data =json.load(f)
	except:
		res_data = {}
	try:
		min_train_loss = min_train_loss.item()
	except:
		pass
	try:
		min_val_loss = min_val_loss.item()
	except:
		pass
	try:
		data= {'run name' : str(config.run_name)
		, 'max val acc': str(max_val_acc)
		, 'max train acc': str(max_train_acc)
		, 'max val bleu' : str(max_val_bleu)
		, 'min val loss' : str(min_val_loss)
		, 'min train loss': str(min_train_loss)
		, 'best epoch': str(best_epoch)
		, 'epochs' : config.epochs
		, 'dataset' : config.dataset
		, 'embedding': config.embedding
		, 'embedding_size': config.emb1_size
		, 'embedding_lr': config.emb_lr
		, 'freeze_emb': config.freeze_emb
		, 'cell_type' : config.cell_type
		, 'bidirectional' : config.bidirectional
		, 'hidden_size' : config.hidden_size
		, 'depth' : config.depth
		, 'lr' : config.lr
		, 'batch_size' : config.batch_size
		, 'dropout' : config.dropout
		, 'separate optimizers' : config.separate_opt
		, 'opt' : config.opt
		}
		res_data[str(config.run_name)] = data

		with open(config.result_path, 'w', encoding='utf-8') as f:
			json.dump(res_data, f, ensure_ascii= False, indent= 4)
	except:
		pdb.set_trace()

def store_val_results(config, acc_score, folds_scores):
	try:
		with open(config.val_result_path) as f:
			res_data = json.load(f)
	except:
		res_data = {}

	try:
		data= {'run_name' : str(config.run_name)
		, '5-fold avg acc score' : str(acc_score)
		, 'Fold0 acc' : folds_scores[0]
		, 'Fold1 acc' : folds_scores[1]
		, 'Fold2 acc' : folds_scores[2]
		, 'Fold3 acc' : folds_scores[3]
		, 'Fold4 acc' : folds_scores[4]
		, 'epochs' : config.epochs
		, 'embedding': config.embedding
		, 'embedding_size': config.emb1_size
		, 'embedding_lr': config.emb_lr
		, 'freeze_emb': config.freeze_emb
		, 'cell_type' : config.cell_type
		, 'bidirectional' : config.bidirectional
		, 'hidden_size' : config.hidden_size
		, 'depth' : config.depth
		, 'lr' : config.lr
		, 'batch_size' : config.batch_size
		, 'dropout' : config.dropout
		, 'separate optimizers' : config.separate_opt
		, 'opt' : config.opt
		}
		res_data[str(config.run_name)] = data

		with open(config.val_result_path, 'w', encoding='utf-8') as f:
			json.dump(res_data, f, ensure_ascii= False, indent= 4)
	except:
		pdb.set_trace()
        
##################################################
# sentence_processing.py #
##################################################
def sent_to_idx(voc, sent, max_length):
	idx_vec = []
	for w in sent.split(' '):
		try:
			idx = voc.get_id(w)
			idx_vec.append(idx)
		except:
			idx_vec.append(voc.get_id('unk'))
	# idx_vec.append(voc.get_id('</s>'))
	if len(idx_vec) < max_length-1:
		idx_vec.append(voc.get_id('</s>'))
	return idx_vec


def sents_to_idx(voc, sents, max_length):
	all_indexes = []
	for sent in sents:
		all_indexes.append(sent_to_idx(voc, sent, max_length))
	return all_indexes


def sent_to_tensor(voc, sentence, device, max_length):
	indexes = sent_to_idx(voc, sentence, max_length)
	return torch.tensor(indexes, dtype=torch.long, device=device).view(-1, 1)


def batch_to_tensor(voc, sents, device, max_length):
	batch_sent = []
	# batch_label = []
	for sent in sents:
		sent_id = sent_to_tensor(voc, sent, device, max_length)
		batch_sent.append(sent_id)

	return batch_sent


def idx_to_sent(voc, tensor, no_eos=False):
	sent_word_list = []
	for idx in tensor:
		word = voc.get_word(idx.item())
		if no_eos:
			if word != '</s>':
				sent_word_list.append(word)
			# else:
			# 	break
		else:
			sent_word_list.append(word)
	return sent_word_list


def idx_to_sents(voc, tensors, no_eos=False):
	tensors = tensors.transpose(0, 1)
	batch_word_list = []
	for tensor in tensors:
		batch_word_list.append(idx_to_sent(voc, tensor, no_eos))

	return batch_word_list


def pad_seq(seq, max_length, voc):
	seq += [voc.get_id('</s>') for i in range(max_length - len(seq))]
	return seq

# def process_single(sent, label, voc, device):

def sort_by_len(seqs, input_len, device=None, dim=1):
	orig_idx = list(range(seqs.size(dim)))
	# pdb.set_trace()

	# Index by which sorting needs to be done
	sorted_idx = sorted(orig_idx, key=lambda k: input_len[k], reverse=True)
	sorted_idx= torch.LongTensor(sorted_idx)
	if device:
		sorted_idx = sorted_idx.to(device)

	sorted_seqs = seqs.index_select(1, sorted_idx)
	sorted_lens=  [input_len[i] for i in sorted_idx]

	# For restoring original order
	orig_idx = sorted(orig_idx, key=lambda k: sorted_idx[k])
	orig_idx = torch.LongTensor(orig_idx)
	if device:
		orig_idx = orig_idx.to(device)
	return sorted_seqs, sorted_lens, orig_idx


def restore_order(seqs, input_len, orig_idx):
	orig_seqs= [seqs[i] for i in orig_idx]
	orig_lens= [input_len[i] for i in orig_idx]
	return orig_seqs, orig_lens


def process_batch(sent1s, sent2s, voc1, voc2, device):
	input_len1 = [len(s) for s in sent1s]
	input_len2 = [len(s) for s in sent2s]
	max_length_1 = max(input_len1)
	max_length_2 = max(input_len2)

	sent1s_padded = [pad_seq(s, max_length_1, voc1) for s in sent1s]
	sent2s_padded = [pad_seq(s, max_length_2, voc2) for s in sent2s]

	# Convert to [Max_len X Batch]
	sent1_var = Variable(torch.LongTensor(sent1s_padded)).transpose(0, 1)
	sent2_var = Variable(torch.LongTensor(sent2s_padded)).transpose(0, 1)

	sent1_var = sent1_var.to(device)
	sent2_var = sent2_var.to(device)

	return sent1_var, sent2_var, input_len1, input_len2

## Confidence_estimation

In [35]:
def posterior_based_conf(test_ques, model):

    decoded_words, decoded_log_probs = model.greedy_decode(test_ques, return_probs = True)
    posteriors = [np.exp(sum(log_probs)) for log_probs in decoded_log_probs]
    return decoded_words, posteriors

def similarity_based_conf(test_ques, train_ques,model, sim_criteria = 'bert_score'):
    '''
    Takes a batch of test question and evaluates their closest similarities between questions in training set.
    Inputs:
        test_ques: A list of strings containing a batch of test questions. Length: Batch Size
        train_ques: A list containing **ALL** the questions present in training data. Length: |Training Data|
        model: bert_seq2exp model
        sim_criteria: Criteria used to evaluate similarity between test questions and training questions

    Returns a numpy array containing closest similarity of each test input in the batch size. Shape: [Batch Size,]
    '''

    decoded_words = model.greedy_decode(test_ques)
    if sim_criteria == 'bert_score':
        similarities = bert_sim(test_ques, train_ques, model) #[Batch Size x |Training Data|]

    elif sim_criteria == 'bleu_score':
        similarities = bleu_sim(test_ques, train_ques)
    else:
        raise ValueError("Other similarity methods not implemented yet!")

    max_sims = np.max(similarities, axis = 1)
    return decoded_words, max_sims



def bert_sim(queries, keys, model):
    '''
    Inputs
        - queries: a batch of sentences whose similarity is to be measured with other sentences. Length: L_Q
        - keys: those other sentences. Length: L_K
        - model: bert_seq2exp model

    Outputs: A numpy array containing similarites between each test sentence with all training examples. Shape: [L_Q, L_K]
    '''

    #Feed queries and keys to bert and obtain contextualized representation, using embeddings of [CLS]
    #  (TODO: try pooling instead of [CLS])
    with torch.no_grad():
        queries_rep     = model.bert(queries)[0][:,0].detach().cpu().numpy()
        keys_rep        = torch.cat([model.bert(keys[i:min(i+16, len(keys)),])[0][:,0] for i in range(0, len(keys), 16)], dim = 0)
        keys_rep        = keys_rep.detach().cpu().numpy()

    sims = np.dot(queries_rep / np.linalg.norm(queries_rep, axis = -1, keepdims = True),
                 (keys_rep / np.linalg.norm(keys_rep, axis = -1, keepdims = True)).T)
    return sims


def bleu_sim(queries, keys):
    '''
    Inputs:
        - queries: a batch of sentences whose similarity is to be measured with other sentences. Length: L_Q
        - keys: those other sentences. Length: L_K

    Outputs: A numpy array containing bleu scores between each test sentence with all training examples. Shape: [L_Q, L_K]
    '''
    bleus = [[] for i in range(len(queries))]
    for i in range(len(queries)):
        for j in range(len(keys)):
            refs = [[keys[j].split()]]
            hyps = [queries[i].split()]
            bleu = compute_bleu(refs, hyps)[0]
            bleus[i].append(bleu)

    sims = np.array(bleus)
    return sims


## Dataloader.py

In [36]:
class TextDataset(Dataset):
	'''
		Expecting csv files with columns ['sent1', 'sent2']

		Args:
						data_path: Root folder Containing all the data
						dataset: Specific Folder==> data_path/dataset/	(Should contain train.csv and dev.csv)
						max_length: Self Explanatory
						is_debug: Load a subset of data for faster testing
						is_train: 

	'''

	def __init__(self, data_path='./kaggle/input/svamp-data/data/', dataset='mawps', datatype='train', max_length=30, is_debug=False, is_train=False, grade_info=False, type_info=False, challenge_info=False):
		if datatype=='train':
			file_path = os.path.join(data_path, dataset, 'train.csv')
		elif datatype=='dev':
			file_path = os.path.join(data_path, dataset, 'dev.csv')
		else:
			file_path = os.path.join(data_path, dataset, 'dev.csv')

		if grade_info:
			self.grade_info = True
		else:
			self.grade_info = False

		if type_info:
			self.type_info = True
		else:
			self.type_info = False

		if challenge_info:
			self.challenge_info = True
		else:
			self.challenge_info = False

		file_df= pd.read_csv(file_path)

		self.ques= file_df['Question'].values
		self.eqn= file_df['Equation'].values
		self.nums= file_df['Numbers'].values
		self.ans= file_df['Answer'].values

		if grade_info:
			self.grade = file_df['Grade'].values

		if type_info:
			self.type = file_df['Type'].values

		if challenge_info:
			self.type = file_df['Type'].values
			self.var_type = file_df['Variation Type'].values
			self.annotator = file_df['Annotator'].values
			self.alternate = file_df['Alternate'].values

		if is_debug:
			self.ques= self.ques[:5000:500]
			self.eqn= self.eqn[:5000:500]

		self.max_length= max_length

		if grade_info and type_info:
			all_sents = zip(self.ques, self.eqn, self.nums, self.ans, self.grade, self.type)
		elif grade_info and not type_info:
			all_sents = zip(self.ques, self.eqn, self.nums, self.ans, self.grade)
		elif type_info and not grade_info:
			all_sents = zip(self.ques, self.eqn, self.nums, self.ans, self.type)
		elif challenge_info:
			all_sents = zip(self.ques, self.eqn, self.nums, self.ans, self.type, self.var_type, self.annotator, self.alternate)
		else:
			all_sents = zip(self.ques, self.eqn, self.nums, self.ans)

		if is_train:
			all_sents = sorted(all_sents, key = lambda x : len(x[0].split()))

		if grade_info and type_info:
			self.ques, self.eqn, self.nums, self.ans, self.grade, self.type = zip(*all_sents)
		elif grade_info and not type_info:
			self.ques, self.eqn, self.nums, self.ans, self.grade = zip(*all_sents)
		elif type_info and not grade_info:
			self.ques, self.eqn, self.nums, self.ans, self.type = zip(*all_sents)
		elif challenge_info:
			self.ques, self.eqn, self.nums, self.ans, self.type, self.var_type, self.annotator, self.alternate = zip(*all_sents)
		else:
			self.ques, self.eqn, self.nums, self.ans = zip(*all_sents)

	def __len__(self):
		return len(self.ques)

	def __getitem__(self, idx):
		ques = self.process_string(str(self.ques[idx]))
		eqn = self.process_string(str(self.eqn[idx]))
		nums = self.nums[idx]
		ans = self.ans[idx]

		if self.grade_info and self.type_info:
			grade = self.grade[idx]
			type1 = self.type[idx]
			return {'ques': self.curb_to_length(ques), 'eqn': self.curb_to_length(eqn), 'nums': nums, 'ans': ans, 'grade': grade, 'type': type1}
		elif self.grade_info and not self.type_info:
			grade = self.grade[idx]
			return {'ques': self.curb_to_length(ques), 'eqn': self.curb_to_length(eqn), 'nums': nums, 'ans': ans, 'grade': grade}
		elif self.type_info and not self.grade_info:
			type1 = self.type[idx]
			return {'ques': self.curb_to_length(ques), 'eqn': self.curb_to_length(eqn), 'nums': nums, 'ans': ans, 'type': type1}
		elif self.challenge_info:
			type1 = self.type[idx]
			var_type = self.var_type[idx]
			annotator = self.annotator[idx]
			alternate = self.alternate[idx]
			return {'ques': self.curb_to_length(ques), 'eqn': self.curb_to_length(eqn), 'nums': nums, 'ans': ans, 'type': type1, 
					'var_type': var_type, 'annotator': annotator, 'alternate': alternate}
	
		return {'ques': self.curb_to_length(ques), 'eqn': self.curb_to_length(eqn), 'nums': nums, 'ans': ans}

	def curb_to_length(self, string):
		return ' '.join(string.strip().split()[:self.max_length])

	def process_string(self, string):
		#string = re.sub(r"[^A-Za-z0-9(),!?\'\`]", " ", string)
		string = re.sub(r"\'s", " 's", string)
		string = re.sub(r"\'ve", " 've", string)
		string = re.sub(r"n\'t", " n't", string)
		string = re.sub(r"\'re", " 're", string)
		string = re.sub(r"\'d", " 'd", string)
		string = re.sub(r"\'ll", " 'll", string)
		#string = re.sub(r",", " , ", string)
		#string = re.sub(r"!", " ! ", string)
		#string = re.sub(r"\(", " ( ", string)
		#string = re.sub(r"\)", " ) ", string)
		#string = re.sub(r"\?", " ? ", string)
		#string = re.sub(r"\s{2,}", " ", string)
		return string


## Model.py

In [37]:
class Seq2SeqModel(nn.Module):
	def __init__(self, config, voc1, voc2, device, logger, num_iters, EOS_tag='</s>', SOS_tag='<s>'):
		super(Seq2SeqModel, self).__init__()

		self.config = config
		self.device = device
		self.voc1 = voc1
		self.voc2 = voc2
		self.EOS_tag = EOS_tag
		self.SOS_tag = SOS_tag
		self.EOS_token = voc2.get_id(EOS_tag)
		self.SOS_token = voc2.get_id(SOS_tag)
		self.logger = logger
		self.num_iters = num_iters

		self.embedding2 = nn.Embedding(self.voc2.nwords, self.config.emb2_size)
		nn.init.uniform_(self.embedding2.weight, -1 * self.config.init_range, self.config.init_range)

		if self.config.embedding == 'bert':
			self.embedding1 = BertEncoder(self.config.emb_name, self.device, self.config.freeze_emb)
		elif self.config.embedding == 'roberta':
			self.embedding1 = RobertaEncoder(self.config.emb_name, self.device, self.config.freeze_emb)
		elif self.config.embedding == 'word2vec':
			self.config.emb1_size = 300
			self.embedding1 = nn.Embedding.from_pretrained(torch.FloatTensor(self._form_embeddings(self.config.word2vec_bin)), freeze = self.config.freeze_emb)
		else:
			self.embedding1  = nn.Embedding(self.voc1.nwords, self.config.emb1_size)
			nn.init.uniform_(self.embedding1.weight, -1 * self.config.init_range, self.config.init_range)

		self.logger.debug('Building Encoders...')
		self.encoder = Encoder(
			self.config.hidden_size,
			self.config.emb1_size,
			self.config.cell_type,
			self.config.depth,
			self.config.dropout,
			self.config.bidirectional
		)

		self.logger.debug('Encoders Built...')

		if self.config.use_attn:
			self.decoder    = LuongAttnDecoderRNN(self.config.attn_type,
												  self.embedding2,
												  self.config.cell_type,
												  self.config.hidden_size,
												  self.voc2.nwords,
												  self.config.depth,
												  self.config.dropout).to(device)
		else:
			self.decoder    = DecoderRNN(self.embedding2,
										 self.config.cell_type,
										 self.config.hidden_size,
										 self.voc2.nwords,
										 self.config.depth,
										 self.config.dropout).to(device)

		self.logger.debug('Decoder RNN Built...')

		self.logger.debug('Initalizing Optimizer and Criterion...')
		self._initialize_optimizer()

		# nn.CrossEntropyLoss() does both F.log_softmax() and nn.NLLLoss() 
		self.criterion = nn.NLLLoss() 

		self.logger.info('All Model Components Initialized...')

	def _form_embeddings(self, file_path):
		weights_all = models.KeyedVectors.load_word2vec_format(file_path, limit=200000, binary=True)
		weight_req  = torch.randn(self.voc1.nwords, self.config.emb1_size)
		for key, value in self.voc1.id2w.items():
			if value in weights_all:
				weight_req[key] = torch.FloatTensor(weights_all[value])

		return weight_req	

	def _initialize_optimizer(self):
		self.params =   list(self.embedding1.parameters()) + \
						list(self.encoder.parameters()) + \
						list(self.decoder.parameters())

		if self.config.separate_opt:
			self.emb_optimizer = AdamW(self.embedding1.parameters(), lr = self.config.emb_lr, correct_bias = True)
			self.optimizer = optim.Adam(
				[{"params": self.encoder.parameters()},
				{"params": self.decoder.parameters()}],
				lr = self.config.lr,
			)
		else:
			if self.config.opt == 'adam':
				self.optimizer = optim.Adam(
					[{"params": self.embedding1.parameters(), "lr": self.config.emb_lr},
					{"params": self.encoder.parameters()},
					{"params": self.decoder.parameters()}],
					lr = self.config.lr
				)
			elif self.config.opt == 'adadelta':
				self.optimizer = optim.Adadelta(
					[{"params": self.embedding1.parameters(), "lr": self.config.emb_lr},
					{"params": self.encoder.parameters()},
					{"params": self.decoder.parameters()}],
					lr = self.config.lr
				)
			elif self.config.opt == 'asgd':
				self.optimizer = optim.ASGD(
					[{"params": self.embedding1.parameters(), "lr": self.config.emb_lr},
					{"params": self.encoder.parameters()},
					{"params": self.decoder.parameters()}],
					lr = self.config.lr
				)
			else:
				self.optimizer = optim.SGD(
					[{"params": self.embedding1.parameters(), "lr": self.config.emb_lr},
					{"params": self.encoder.parameters()},
					{"params": self.decoder.parameters()}],
					lr = self.config.lr
				)

	def forward(self, input_seq1, input_seq2, input_len1, input_len2):
		'''
			Args:
				input_seq1 (tensor): values are word indexes | size : [max_len x batch_size]
				input_len1 (tensor): Length of each sequence in input_len1 | size : [batch_size]
				input_seq2 (tensor): values are word indexes | size : [max_len x batch_size]
				input_len2 (tensor): Length of each sequence in input_len2 | size : [batch_size]
			Returns:
				out (tensor) : Probabilities of each output label for each point | size : [batch_size x num_labels]
		'''

	def trainer(self, ques, input_seq1, input_seq2, input_len1, input_len2, config, device=None ,logger=None):
		'''
			Args:
				ques (list): input examples as is (i.e. not indexed) | size : [batch_size]
			Returns:
				
		'''
		self.optimizer.zero_grad()
		if self.config.separate_opt:
			self.emb_optimizer.zero_grad()

		if self.config.embedding == 'bert' or self.config.embedding == 'roberta':
			input_seq1, input_len1 = self.embedding1(ques)
			input_seq1 = input_seq1.transpose(0,1)
			# input_seq1: Tensor [max_len x BS x emb1_size]
			# input_len1: List [BS]
			sorted_seqs, sorted_len, orig_idx = sort_by_len(input_seq1, input_len1, self.device)
			# sorted_seqs: Tensor [max_len x BS x emb1_size]
			# input_len1: List [BS]
			# orig_idx: Tensor [BS]
		else:
			sorted_seqs, sorted_len, orig_idx = sort_by_len(input_seq1, input_len1, self.device)
			sorted_seqs = self.embedding1(sorted_seqs)

		encoder_outputs, encoder_hidden = self.encoder(sorted_seqs, sorted_len, orig_idx, self.device)
		
		self.loss =0

		decoder_input = torch.tensor([self.SOS_token for i in range(input_seq1.size(1))], device = self.device)

		if config.cell_type == 'lstm':
			decoder_hidden = (encoder_hidden[0][:self.decoder.nlayers], encoder_hidden[1][:self.decoder.nlayers])
		else:
			decoder_hidden = encoder_hidden[:self.decoder.nlayers]

		use_teacher_forcing = True if random.random() < self.config.teacher_forcing_ratio else False
		target_len = max(input_len2)

		if use_teacher_forcing:
			for step in range(target_len):
				if self.config.use_attn:
					decoder_output, decoder_hidden, decoder_attention, _ = self.decoder(decoder_input, decoder_hidden, encoder_outputs)
				else:
					decoder_output, decoder_hidden = self.decoder(decoder_input, decoder_hidden)
				self.loss += self.criterion(decoder_output, input_seq2[step])
				decoder_input = input_seq2[step]
		else:
			for step in range(target_len):
				if self.config.use_attn:
					decoder_output, decoder_hidden, decoder_attention, _ = self.decoder(decoder_input, decoder_hidden, encoder_outputs)
				else:
					decoder_output, decoder_hidden = self.decoder(decoder_input, decoder_hidden)
				
				topv, topi = decoder_output.topk(1)
				self.loss += self.criterion(decoder_output, input_seq2[step])
				decoder_input = topi.squeeze().detach() 

		self.loss.backward()
		if self.config.max_grad_norm > 0:
			torch.nn.utils.clip_grad_norm_(self.params, self.config.max_grad_norm)
		self.optimizer.step()
		if self.config.separate_opt:
			self.emb_optimizer.step()

		return self.loss.item()/target_len

	def greedy_decode(self, ques, input_seq1=None, input_seq2=None, input_len1=None, input_len2=None, validation=False, return_probs = False):
		with torch.no_grad():
			if self.config.embedding == 'bert' or self.config.embedding == 'roberta':
				input_seq1, input_len1 = self.embedding1(ques)
				input_seq1 = input_seq1.transpose(0,1)
				sorted_seqs, sorted_len, orig_idx = sort_by_len(input_seq1, input_len1, self.device)
			else:
				sorted_seqs, sorted_len, orig_idx = sort_by_len(input_seq1, input_len1, self.device)
				sorted_seqs = self.embedding1(sorted_seqs)

			encoder_outputs, encoder_hidden = self.encoder(sorted_seqs, sorted_len, orig_idx, self.device)

			loss = 0.0
			decoder_input = torch.tensor([self.SOS_token for i in range(input_seq1.size(1))], device=self.device)

			if self.config.cell_type == 'lstm':
				decoder_hidden = (encoder_hidden[0][:self.decoder.nlayers], encoder_hidden[1][:self.decoder.nlayers])
			else:
				decoder_hidden = encoder_hidden[:self.decoder.nlayers]

			decoded_words = [[] for i in range(input_seq1.size(1))]
			decoded_probs = [[] for i in range(input_seq1.size(1))]
			decoder_attentions = []

			if validation:
				target_len = max(input_len2)
			else:
				target_len = self.config.max_length

			for step in range(target_len):
				if self.config.use_attn:
					decoder_output, decoder_hidden, decoder_attention, _ = self.decoder(decoder_input, decoder_hidden, encoder_outputs)
					decoder_attentions.append(decoder_attention)
				else:
					decoder_output, decoder_hidden = self.decoder(decoder_input, decoder_hidden)

				if validation:
					loss += self.criterion(decoder_output, input_seq2[step])
				topv, topi = decoder_output.topk(1)
				for i in range(input_seq1.size(1)):
					if topi[i].item() == self.EOS_token:
						continue
					decoded_words[i].append(self.voc2.get_word(topi[i].item()))
					decoded_probs[i].append(topv[i].item())
				decoder_input = topi.squeeze().detach()

			if validation:
				if self.config.use_attn:
					return loss/target_len, decoded_words, decoder_attentions[:step + 1]
				else:
					return loss/target_len, decoded_words, None
			else:
				if return_probs:
					return decoded_words, decoded_probs

				return decoded_words

	def obtain_hidden(self, config, ques, input_seq1=None, input_seq2=None, input_len1=None, input_len2=None):
		with torch.no_grad():
			if self.config.embedding == 'bert' or self.config.embedding == 'roberta':
				input_seq1, input_len1 = self.embedding1(ques)
				input_seq1 = input_seq1.transpose(0,1)
				sorted_seqs, sorted_len, orig_idx = sort_by_len(input_seq1, input_len1, self.device)
			else:
				sorted_seqs, sorted_len, orig_idx = sort_by_len(input_seq1, input_len1, self.device)
				sorted_seqs = self.embedding1(sorted_seqs)

			encoder_outputs, encoder_hidden = self.encoder(sorted_seqs, sorted_len, orig_idx, self.device)

			loss =0.0
			decoder_input = torch.tensor([self.SOS_token for i in range(input_seq1.size(1))], device=self.device)

			if self.config.cell_type == 'lstm':
				decoder_hidden = (encoder_hidden[0][:self.decoder.nlayers], encoder_hidden[1][:self.decoder.nlayers])
			else:
				decoder_hidden = encoder_hidden[:self.decoder.nlayers]

			decoded_words = [[] for i in range(input_seq1.size(1))]
			decoder_attentions = []

			hiddens = []

			target_len = max(input_len2)

			for step in range(target_len):
				if self.config.use_attn:
					decoder_output, decoder_hidden, decoder_attention, hidden = self.decoder(decoder_input, decoder_hidden, encoder_outputs)
					decoder_attentions.append(decoder_attention)
				else:
					decoder_output, decoder_hidden = self.decoder(decoder_input, decoder_hidden)

				topv, topi = decoder_output.topk(1)
				for i in range(input_seq1.size(1)):
					if topi[i].item() == self.EOS_token:
						continue
					decoded_words[i].append(self.voc2.get_word(topi[i].item()))
					hiddens.append([self.voc2.get_word(topi[i].item()), hidden[i]])
				decoder_input = topi.squeeze().detach()

			return hiddens, decoded_words

def build_model(config, voc1, voc2, device, logger, num_iters):
	'''
		Add Docstring
	'''
	model = Seq2SeqModel(config, voc1, voc2, device, logger, num_iters)
	model = model.to(device)

	return model

def train_model(model, train_dataloader, val_dataloader, voc1, voc2, device, config, logger, epoch_offset= 0, min_val_loss=float('inf'), max_val_bleu=0.0, max_val_acc = 0.0, min_train_loss=float('inf'), max_train_acc = 0.0, best_epoch = 0, writer= None):
	'''
		Add Docstring
	'''

	if config.histogram and config.save_writer and writer:
		for name, param in model.named_parameters():
			writer.add_histogram(name, param, epoch_offset)
	
	estop_count=0
	
	for epoch in range(1, config.epochs + 1):
		od = OrderedDict()
		od['Epoch'] = epoch + epoch_offset
		print_log(logger, od)

		batch_num = 1
		train_loss_epoch = 0.0
		train_acc_epoch = 0.0
		train_acc_epoch_cnt = 0.0
		train_acc_epoch_tot = 0.0
		val_loss_epoch = 0.0

		start_time= time()
		total_batches = len(train_dataloader)

		for data in train_dataloader:
			ques = data['ques']

			sent1s = sents_to_idx(voc1, data['ques'], config.max_length)
			sent2s = sents_to_idx(voc2, data['eqn'], config.max_length)
			sent1_var, sent2_var, input_len1, input_len2  = process_batch(sent1s, sent2s, voc1, voc2, device)

			nums = data['nums']
			ans = data['ans']

			model.train()

			loss = model.trainer(ques, sent1_var, sent2_var, input_len1, input_len2, config, device, logger)
			train_loss_epoch += loss

			if config.show_train_acc:
				model.eval()

				_, decoder_output, _ = model.greedy_decode(ques, sent1_var, sent2_var, input_len1, input_len2, validation=True)
				temp_acc_cnt, temp_acc_tot, _ = cal_score(decoder_output, nums, ans, data['eqn'])
				train_acc_epoch_cnt += temp_acc_cnt
				train_acc_epoch_tot += temp_acc_tot

			batch_num+=1
			print("Completed {} / {}...".format(batch_num, total_batches), end = '\r', flush = True)

		train_loss_epoch = train_loss_epoch/len(train_dataloader)
		if config.show_train_acc:
			train_acc_epoch = train_acc_epoch_cnt/train_acc_epoch_tot
		else:
			train_acc_epoch = 0.0

		time_taken = (time() - start_time)/60.0

		if config.save_writer and writer:
			writer.add_scalar('loss/train_loss', train_loss_epoch, epoch + epoch_offset)

		logger.debug('Training for epoch {} completed...\nTime Taken: {}'.format(epoch, time_taken))
		logger.debug('Starting Validation')

		val_bleu_epoch, val_loss_epoch, val_acc_epoch = run_validation(config=config, model=model, dataloader=val_dataloader, voc1=voc1, voc2=voc2, device=device, logger=logger, epoch_num = epoch)

		if train_loss_epoch < min_train_loss:
			min_train_loss = train_loss_epoch

		if train_acc_epoch > max_train_acc:
			max_train_acc = train_acc_epoch

		if val_bleu_epoch[0] > max_val_bleu:
			max_val_bleu = val_bleu_epoch[0]

		if val_loss_epoch < min_val_loss:
			min_val_loss = val_loss_epoch

		if val_acc_epoch > max_val_acc:
			max_val_acc = val_acc_epoch
			best_epoch = epoch + epoch_offset

			if config.separate_opt:
				state = {
					'epoch' : epoch + epoch_offset,
					'best_epoch': best_epoch,
					'model_state_dict': model.state_dict(),
					'voc1': model.voc1,
					'voc2': model.voc2,
					'optimizer_state_dict': model.optimizer.state_dict(),
					'emb_optimizer_state_dict': model.emb_optimizer.state_dict(),
					'train_loss_epoch' : train_loss_epoch,
					'min_train_loss' : min_train_loss,
					'train_acc_epoch' : train_acc_epoch,
					'max_train_acc' : max_train_acc,
					'val_loss_epoch' : val_loss_epoch,
					'min_val_loss' : min_val_loss,
					'val_acc_epoch' : val_acc_epoch,
					'max_val_acc' : max_val_acc,
					'val_bleu_epoch': val_bleu_epoch[0],
					'max_val_bleu': max_val_bleu
				}
			else:
				state = {
					'epoch' : epoch + epoch_offset,
					'best_epoch': best_epoch,
					'model_state_dict': model.state_dict(),
					'voc1': model.voc1,
					'voc2': model.voc2,
					'optimizer_state_dict': model.optimizer.state_dict(),
					'train_loss_epoch' : train_loss_epoch,
					'min_train_loss' : min_train_loss,
					'train_acc_epoch' : train_acc_epoch,
					'max_train_acc' : max_train_acc,
					'val_loss_epoch' : val_loss_epoch,
					'min_val_loss' : min_val_loss,
					'val_acc_epoch' : val_acc_epoch,
					'max_val_acc' : max_val_acc,
					'val_bleu_epoch': val_bleu_epoch[0],
					'max_val_bleu': max_val_bleu
				}
			logger.debug('Validation Bleu: {}'.format(val_bleu_epoch[0]))

			if config.save_model:
				save_checkpoint(state, epoch + epoch_offset, logger, config.model_path, config.ckpt)
			estop_count = 0
		else:
			estop_count+=1

		if config.save_writer and writer:
			writer.add_scalar('loss/val_loss', val_loss_epoch, epoch + epoch_offset)
			writer.add_scalar('acc/val_score', val_bleu_epoch[0], epoch + epoch_offset)

		od = OrderedDict()
		od['Epoch'] = epoch + epoch_offset
		od['best_epoch'] = best_epoch
		od['train_loss_epoch'] = train_loss_epoch
		od['min_train_loss'] = min_train_loss
		od['val_loss_epoch']= val_loss_epoch
		od['min_val_loss']= min_val_loss
		od['train_acc_epoch'] = train_acc_epoch
		od['max_train_acc'] = max_train_acc
		od['val_acc_epoch'] = val_acc_epoch
		od['max_val_acc'] = max_val_acc
		od['val_bleu_epoch'] = val_bleu_epoch
		od['max_val_bleu'] = max_val_bleu
		print_log(logger, od)

		if config.histogram and config.save_writer and writer:
			for name, param in model.named_parameters():
				writer.add_histogram(name, param, epoch + epoch_offset)

		if estop_count > config.early_stopping:
			logger.debug('Early Stopping at Epoch: {} after no improvement in {} epochs'.format(epoch, estop_count))
			break

	if config.save_writer:
		writer.export_scalars_to_json(os.path.join(config.board_path, 'all_scalars.json'))
		writer.close()

	logger.info('Training Completed for {} epochs'.format(config.epochs))

	if config.results:
		store_results(config, max_val_bleu, max_val_acc, min_val_loss, max_train_acc, min_train_loss, best_epoch)
		logger.info('Scores saved at {}'.format(config.result_path))

	return max_val_acc

def run_validation(config, model, dataloader, voc1, voc2, device, logger, epoch_num):
	batch_num = 1
	val_loss_epoch = 0.0
	val_bleu_epoch = 0.0
	val_acc_epoch = 0.0
	val_acc_epoch_cnt = 0.0
	val_acc_epoch_tot = 0.0

	model.eval()

	refs= []
	hyps= []

	if config.mode == 'test':
		questions, gen_eqns, act_eqns, scores = [], [], [], []

	display_n = config.batch_size

	with open(config.outputs_path + '/outputs.txt', 'a') as f_out:
		f_out.write('---------------------------------------\n')
		f_out.write('Epoch: ' + str(epoch_num) + '\n')
		f_out.write('---------------------------------------\n')
	total_batches = len(dataloader)
	for data in dataloader:
		sent1s = sents_to_idx(voc1, data['ques'], config.max_length)
		sent2s = sents_to_idx(voc2, data['eqn'], config.max_length)
		nums = data['nums']
		ans = data['ans']
		if config.grade_disp:
			grade = data['grade']
		if config.type_disp:
			type1 = data['type']
		if config.challenge_disp:
			type1 = data['type']
			var_type = data['var_type']
			annotator = data['annotator']
			alternate = data['alternate']

		ques = data['ques']

		sent1_var, sent2_var, input_len1, input_len2 = process_batch(sent1s, sent2s, voc1, voc2, device)

		val_loss, decoder_output, decoder_attn = model.greedy_decode(ques, sent1_var, sent2_var, input_len1, input_len2, validation=True)

		temp_acc_cnt, temp_acc_tot, disp_corr = cal_score(decoder_output, nums, ans, data['eqn'])
		val_acc_epoch_cnt += temp_acc_cnt
		val_acc_epoch_tot += temp_acc_tot

		sent1s = idx_to_sents(voc1, sent1_var, no_eos= True)
		sent2s = idx_to_sents(voc2, sent2_var, no_eos= True)

		refs += [[' '.join(sent2s[i])] for i in range(sent2_var.size(1))]
		hyps += [' '.join(decoder_output[i]) for i in range(sent1_var.size(1))]

		if config.mode == 'test':
			questions+= data['ques']
			gen_eqns += [' '.join(decoder_output[i]) for i in range(sent1_var.size(1))]
			act_eqns += [' '.join(sent2s[i]) for i in range(sent2_var.size(1))]
			scores   += [cal_score([decoder_output[i]], [nums[i]], [ans[i]], [data['eqn'][i]])[0] for i in range(sent1_var.size(1))]

		with open(config.outputs_path + '/outputs.txt', 'a') as f_out:
			f_out.write('Batch: ' + str(batch_num) + '\n')
			f_out.write('---------------------------------------\n')
			for i in range(len(sent1s[:display_n])):
				try:
					f_out.write('Example: ' + str(i) + '\n')
					if config.grade_disp:
						f_out.write('Grade: ' + str(grade[i].item()) + '\n')
					if config.type_disp:
						f_out.write('Type: ' + str(type1[i]) + '\n')
					f_out.write('Source: ' + stack_to_string(sent1s[i]) + '\n')
					f_out.write('Target: ' + stack_to_string(sent2s[i]) + '\n')
					f_out.write('Generated: ' + stack_to_string(decoder_output[i]) + '\n')
					if config.challenge_disp:
						f_out.write('Type: ' + str(type1[i]) + '\n')
						f_out.write('Variation Type: ' + str(var_type[i]) + '\n')
						f_out.write('Annotator: ' + str(annotator[i]) + '\n')
						f_out.write('Alternate: ' + str(alternate[i].item()) + '\n')
					if config.nums_disp:
						src_nums = 0
						tgt_nums = 0
						pred_nums = 0
						for k in range(len(sent1s[i])):
							if sent1s[i][k][:6] == 'number':
								src_nums += 1
						for k in range(len(sent2s[i])):
							if sent2s[i][k][:6] == 'number':
								tgt_nums += 1
						for k in range(len(decoder_output[i])):
							if decoder_output[i][k][:6] == 'number':
								pred_nums += 1
						f_out.write('Numbers in question: ' + str(src_nums) + '\n')
						f_out.write('Numbers in Target Equation: ' + str(tgt_nums) + '\n')
						f_out.write('Numbers in Predicted Equation: ' + str(pred_nums) + '\n')
					f_out.write('Result: ' + str(disp_corr[i]) + '\n' + '\n')
				except:
					logger.warning('Exception: Failed to generate')
					pdb.set_trace()
					break
			f_out.write('---------------------------------------\n')
			f_out.close()

		if batch_num % config.display_freq ==0:
			for i in range(len(sent1s[:display_n])):
				try:
					od = OrderedDict()
					logger.info('-------------------------------------')
					od['Source'] = ' '.join(sent1s[i])

					od['Target'] = ' '.join(sent2s[i])

					od['Generated'] = ' '.join(decoder_output[i])
					print_log(logger, od)
					logger.info('-------------------------------------')
				except:
					logger.warning('Exception: Failed to generate')
					pdb.set_trace()
					break

		val_loss_epoch += val_loss
		batch_num +=1
		print("Completed {} / {}...".format(batch_num, total_batches), end = '\r', flush = True)

	val_bleu_epoch = bleu_scorer(refs, hyps)
	if config.mode == 'test':
		results_df = pd.DataFrame([questions, act_eqns, gen_eqns, scores]).transpose()
		results_df.columns = ['Question', 'Actual Equation', 'Generated Equation', 'Score']
		csv_file_path = os.path.join(config.outputs_path, config.dataset+'.csv')
		results_df.to_csv(csv_file_path, index = False)
		return sum(scores)/len(scores)

	val_acc_epoch = val_acc_epoch_cnt/val_acc_epoch_tot

	return val_bleu_epoch, val_loss_epoch/len(dataloader), val_acc_epoch

def estimate_confidence(config, model, dataloader, logger):
	
	questions	= []
	act_eqns 	= []
	gen_eqns	= []
	scores		= []
	confs		= []
	batch_num = 0
	
	#Load training data (Will be useful for similarity based methods)
	train_df 	= pd.read_csv(os.path.join('data',config.dataset,'train.csv'))
	train_ques	= train_df['Question'].values 
	
	total_batches = len(dataloader)
	logger.info("Beginning estimating confidence based on {} criteria".format(config.conf))
	start = time()
	for data in dataloader:
		ques, eqn, nums, ans = data['ques'], data['eqn'], data['nums'], data['ans']
		
		if config.conf == 'posterior':
			decoded_words, confidence = posterior_based_conf(ques, model)
		elif config.conf == 'similarity':
			decoded_words, confidence = similarity_based_conf(ques, train_ques, model, sim_criteria= config.sim_criteria)
		else:
			#TODO: Implement other methods
			raise ValueError("Other confidence methods not implemented yet. Use -conf posterior")
		
		if not config.adv:
			correct_or_not = [cal_score([decoded_words[i]], [nums[i]], [ans[i]])[0] for i in range(len(decoded_words))]
		else:
			correct_or_not = [-1 for i in range(len(decoded_words))]

		gen_eqn = [' '.join(words) for words in decoded_words]
		
		questions 	+= ques
		act_eqns	+= eqn
		gen_eqns	+= gen_eqn
		scores		+= correct_or_not
		confs		+= list(confidence)
		batch_num	+= 1
		print("Completed {} / {}...".format(batch_num, total_batches), end = '\r', flush = True)

	results_df = pd.DataFrame([questions, act_eqns, gen_eqns, scores, confs]).transpose()
	results_df.columns = ['Question', 'Actual Equation', 'Generated Equation', 'Score', 'Confidence']
	if config.conf != 'similarity':
		csv_file_path = os.path.join('ConfidenceEstimates',config.dataset + '_' + config.run_name + '_' + config.conf + '.csv')
	else:
		csv_file_path = os.path.join('ConfidenceEstimates',config.dataset + '_' + config.run_name + '_' + config.conf + '_' + config.sim_criteria + '.csv')
	results_df.to_csv(csv_file_path)
	logger.info("Done in {} seconds".format(time() - start))

def get_hiddens(config, model, val_dataloader, voc1, voc2, device):
	batch_num =1
	
	model.eval()

	hiddens = []
	operands = []

	for data in val_dataloader:
		if len(data['ques']) == config.batch_size:
			sent1s = sents_to_idx(voc1, data['ques'], config.max_length)
			sent2s = sents_to_idx(voc2, data['eqn'], config.max_length)
			nums = data['nums']
			ans = data['ans']

			ques = data['ques']

			sent1_var, sent2_var, input_len1, input_len2 = process_batch(sent1s, sent2s, voc1, voc2, device)

			hidden, decoder_output = model.obtain_hidden(config, ques, sent1_var, sent2_var, input_len1, input_len2)

			infix = get_infix_eq(decoder_output, nums)[0] # WORKS ONLY FOR BATCH SIZE 1
			words = infix.split()

			type_rep = []
			operand_types = []

			for w in range(len(words)):
				if words[w] == '/':
					if words[w-1][0] == 'n':
						operand_types.append(['dividend', words[w-1]])
					if words[w+1][0] == 'n':
						operand_types.append(['divisor', words[w+1]])
				elif words[w] == '-':
					if words[w-1][0] == 'n':
						operand_types.append(['minuend', words[w-1]])
					if words[w+1][0] == 'n':
						operand_types.append(['subtrahend', words[w+1]])

			for z in range(len(operand_types)):
				entity = operand_types[z][1]
				for y in range(len(hidden)):
					if hidden[y][0] == entity:
						type_rep.append([operand_types[z][0], hidden[y][1]])

			hiddens = hiddens + hidden
			operands = operands + type_rep

	return hiddens, operands

## Main.py

In [38]:
global log_folder
global model_folder
global result_folder
global data_path
global board_path

log_folder = 'logs'
model_folder = 'models'
outputs_folder = 'outputs'
result_folder = './out/'
data_path = '/kaggle/input/svamp-dataset/data/'
board_path = './runs/'

def load_data(config, logger):
	'''
		Loads the data from the datapath in torch dataset form

		Args:
			config (dict) : configuration/args
			logger (logger) : logger object for logging

		Returns:
			dataloader(s) 
	'''
	if config.mode == 'train':
		logger.debug('Loading Training Data...')

		'''Load Datasets'''
		print(data_path)
		train_set = TextDataset(data_path=data_path, dataset=config.dataset,
								datatype='train', max_length=config.max_length, is_debug=config.debug)
		val_set = TextDataset(data_path=data_path, dataset=config.dataset, datatype='dev', max_length=config.max_length, 
								is_debug=config.debug, grade_info=config.grade_disp, type_info=config.type_disp, 
								challenge_info=config.challenge_disp)
		
		'''In case of sort by length, write a different case with shuffle=False '''
		train_dataloader = DataLoader(
			train_set, batch_size=config.batch_size, shuffle=True, num_workers=5)
		val_dataloader = DataLoader(
			val_set, batch_size=config.batch_size, shuffle=True, num_workers=5)

		train_size = len(train_dataloader) * config.batch_size
		val_size = len(val_dataloader)* config.batch_size
		
		msg = 'Training and Validation Data Loaded:\nTrain Size: {}\nVal Size: {}'.format(train_size, val_size)
		logger.info(msg)

		return train_dataloader, val_dataloader

	elif config.mode == 'test' or config.mode == 'conf':
		logger.debug('Loading Test Data...')

		test_set = TextDataset(data_path=data_path, dataset=config.dataset,
							   datatype='test', max_length=config.max_length, is_debug=config.debug)
		test_dataloader = DataLoader(
			test_set, batch_size=config.batch_size, shuffle=True, num_workers=5)

		logger.info('Test Data Loaded...')
		return test_dataloader

	else:
		logger.critical('Invalid Mode Specified')
		raise Exception('{} is not a valid mode'.format(config.mode))


kaggle_args = {
    'mode': 'train',
    'gpu': 0,
    'embedding': 'bert',
    'emb_name': 'bert-base-uncased',
    'emb1_size': 768,
    'hidden_size': 32,
    'depth': 1,
    'lr': 0.0002,
    'emb_lr': 8e-6,
    'batch_size': 16,
    'epochs': 10,
    'dataset': 'mawps-asdiv-a_svamp',
    'full_cv': False,
    'run_name': 'run_cv_asdiv-a',
}

config =  parse_arguments(kaggle_args)

mode = config.mode
if mode == 'train':
    is_train = True
else:
    is_train = False

''' Set seed for reproducibility'''
np.random.seed(config.seed)
torch.manual_seed(config.seed)
random.seed(config.seed)

'''GPU initialization'''
device = gpu_init_pytorch(config.gpu)

if config.full_cv:
    global data_path 
    data_name = config.dataset
    data_path = data_path + data_name + '/'
    config.val_result_path = os.path.join(result_folder, 'CV_results_{}.json'.format(data_name))
    fold_acc_score = 0.0
    folds_scores = []
    for z in range(5):
        run_name = config.run_name + '_fold' + str(z)
        config.dataset = 'fold' + str(z)
        config.log_path = os.path.join(log_folder, run_name)
        config.model_path = os.path.join(model_folder, run_name)
        config.board_path = os.path.join(board_path, run_name)
        config.outputs_path = os.path.join(outputs_folder, run_name)

        vocab1_path = os.path.join(config.model_path, 'vocab1.p')
        vocab2_path = os.path.join(config.model_path, 'vocab2.p')
        config_file = os.path.join(config.model_path, 'config.p')
        log_file = os.path.join(config.log_path, 'log.txt')

        if config.results:
            config.result_path = os.path.join(result_folder, 'val_results_{}_{}.json'.format(data_name, config.dataset))

        if is_train:
            create_save_directories(config.log_path)
            create_save_directories(config.model_path)
            create_save_directories(config.outputs_path)
        else:
            create_save_directories(config.log_path)
            create_save_directories(config.result_path)

        logger = get_logger(run_name, log_file, logging.DEBUG)
        writer = SummaryWriter(config.board_path)

        logger.debug('Created Relevant Directories')
        logger.info('Experiment Name: {}'.format(config.run_name))

        '''Read Files and create/load Vocab'''
        if is_train:
            train_dataloader, val_dataloader = load_data(config, logger)

            logger.debug('Creating Vocab...')

            voc1 = Voc1()
            voc1.create_vocab_dict(config, train_dataloader)

            # To Do : Remove Later
            voc1.add_to_vocab_dict(config, val_dataloader)

            voc2 = Voc2(config)
            voc2.create_vocab_dict(config, train_dataloader)

            # To Do : Remove Later
            voc2.add_to_vocab_dict(config, val_dataloader)

            logger.info(
                'Vocab Created with number of words : {}'.format(voc1.nwords))

            with open(vocab1_path, 'wb') as f:
                pickle.dump(voc1, f, protocol=pickle.HIGHEST_PROTOCOL)
            with open(vocab2_path, 'wb') as f:
                pickle.dump(voc2, f, protocol=pickle.HIGHEST_PROTOCOL)

            logger.info('Vocab saved at {}'.format(vocab1_path))

        else:
            test_dataloader = load_data(config, logger)
            logger.info('Loading Vocab File...')

            with open(vocab1_path, 'rb') as f:
                voc1 = pickle.load(f)
            with open(vocab2_path, 'rb') as f:
                voc2 = pickle.load(f)

            logger.info('Vocab Files loaded from {}\nNumber of Words: {}'.format(vocab1_path, voc1.nwords))

        checkpoint = get_latest_checkpoint(config.model_path, logger)

        if is_train:
            model = build_model(config=config, voc1=voc1, voc2=voc2, device=device, logger=logger, num_iters=len(train_dataloader))

            logger.info('Initialized Model')

            if checkpoint == None:
                min_val_loss = torch.tensor(float('inf')).item()
                min_train_loss = torch.tensor(float('inf')).item()
                max_val_bleu = 0.0
                max_val_acc = 0.0
                max_train_acc = 0.0
                best_epoch = 0
                epoch_offset = 0
            else:
                epoch_offset, min_train_loss, min_val_loss, max_train_acc, max_val_acc, max_val_bleu, best_epoch, voc1, voc2 = load_checkpoint(config, model, config.mode, checkpoint, logger, device)

            with open(config_file, 'wb') as f:
                pickle.dump(vars(config), f, protocol=pickle.HIGHEST_PROTOCOL)

            logger.debug('Config File Saved')

            logger.info('Starting Training Procedure')
            max_val_acc = train_model(model, train_dataloader, val_dataloader, voc1, voc2,
                        device, config, logger, epoch_offset, min_val_loss, max_val_bleu, max_val_acc, min_train_loss, max_train_acc, best_epoch, writer)

        else:
            gpu = config.gpu

            with open(config_file, 'rb') as f:
                config = AttrDict(pickle.load(f))
                config.gpu = gpu

            model = build_model(config=config, voc1=voc1, voc2=voc2, device=device, logger=logger)

            epoch_offset, min_train_loss, min_val_loss, max_train_acc, max_val_acc, max_val_bleu, best_epoch, voc1, voc2 = load_checkpoint(config, model, config.mode, checkpoint, logger, device)

            logger.info('Prediction from')
            od = OrderedDict()
            od['epoch'] = epoch_offset
            od['min_train_loss'] = min_train_loss
            od['min_val_loss'] = min_val_loss
            od['max_train_acc'] = max_train_acc
            od['max_val_acc'] = max_val_acc
            od['max_val_bleu'] = max_val_bleu
            od['best_epoch'] = best_epoch
            print_log(logger, od)

            test_acc_epoch, test_loss_epoch = run_validation(config, model, test_dataloader, voc1, voc2, device, logger)
            logger.info('Accuracy: {} \t Loss: {}'.format(test_acc_epoch, test_loss_epoch))

        fold_acc_score += max_val_acc
        folds_scores.append(max_val_acc)

    fold_acc_score = fold_acc_score/5
    store_val_results(config, fold_acc_score, folds_scores)
    logger.info('Final Val score: {}'.format(fold_acc_score))


else:
    '''Run Config files/paths'''
    run_name = config.run_name
    config.log_path = os.path.join(log_folder, run_name)
    config.model_path = os.path.join(model_folder, run_name)
    config.board_path = os.path.join(board_path, run_name)
    config.outputs_path = os.path.join(outputs_folder, run_name)

    vocab1_path = os.path.join(config.model_path, 'vocab1.p')
    vocab2_path = os.path.join(config.model_path, 'vocab2.p')
    config_file = os.path.join(config.model_path, 'config.p')
    log_file = os.path.join(config.log_path, 'log.txt')

    if config.results:
        config.result_path = os.path.join(result_folder, 'val_results_{}.json'.format(config.dataset))

    if is_train:
        create_save_directories(config.log_path)
        create_save_directories(config.model_path)
        create_save_directories(config.outputs_path)
    else:
        create_save_directories(config.log_path)
        create_save_directories(config.result_path)

    logger = get_logger(run_name, log_file, logging.DEBUG)
    writer = SummaryWriter(config.board_path)

    logger.debug('Created Relevant Directories')
    logger.info('Experiment Name: {}'.format(config.run_name))

    '''Read Files and create/load Vocab'''
    if is_train:
        train_dataloader, val_dataloader = load_data(config, logger)

        logger.debug('Creating Vocab...')

        voc1 = Voc1()
        voc1.create_vocab_dict(config, train_dataloader)

        # To Do : Remove Later
        voc1.add_to_vocab_dict(config, val_dataloader)

        voc2 = Voc2(config)
        voc2.create_vocab_dict(config, train_dataloader)

        # To Do : Remove Later
        voc2.add_to_vocab_dict(config, val_dataloader)

        logger.info(
            'Vocab Created with number of words : {}'.format(voc1.nwords))

        with open(vocab1_path, 'wb') as f:
            pickle.dump(voc1, f, protocol=pickle.HIGHEST_PROTOCOL)
        with open(vocab2_path, 'wb') as f:
            pickle.dump(voc2, f, protocol=pickle.HIGHEST_PROTOCOL)

        logger.info('Vocab saved at {}'.format(vocab1_path))

    else:
        test_dataloader = load_data(config, logger)
        logger.info('Loading Vocab File...')

        with open(vocab1_path, 'rb') as f:
            voc1 = pickle.load(f)
        with open(vocab2_path, 'rb') as f:
            voc2 = pickle.load(f)

        logger.info('Vocab Files loaded from {}\nNumber of Words: {}'.format(vocab1_path, voc1.nwords))

    checkpoint = get_latest_checkpoint(config.model_path, logger)

    if is_train:
        model = build_model(config=config, voc1=voc1, voc2=voc2, device=device, logger=logger, num_iters=len(train_dataloader))

        logger.info('Initialized Model')

        if checkpoint == None:
            min_val_loss = torch.tensor(float('inf')).item()
            min_train_loss = torch.tensor(float('inf')).item()
            max_val_bleu = 0.0
            max_val_acc = 0.0
            max_train_acc = 0.0
            best_epoch = 0
            epoch_offset = 0
        else:
            epoch_offset, min_train_loss, min_val_loss, max_train_acc, max_val_acc, max_val_bleu, best_epoch, voc1, voc2 = load_checkpoint(config, model, config.mode, checkpoint, logger, device)

        with open(config_file, 'wb') as f:
            pickle.dump(vars(config), f, protocol=pickle.HIGHEST_PROTOCOL)

        logger.debug('Config File Saved')

        logger.info('Starting Training Procedure')
        train_model(model, train_dataloader, val_dataloader, voc1, voc2,
                    device, config, logger, epoch_offset, min_val_loss, max_val_bleu, max_val_acc, min_train_loss, max_train_acc, best_epoch, writer)

    else :
        gpu = config.gpu
        conf = config.conf
        sim_criteria = config.sim_criteria
        adv = config.adv
        mode = config.mode
        dataset = config.dataset
        batch_size = config.batch_size
        with open(config_file, 'rb') as f:
            config = AttrDict(pickle.load(f))
            config.gpu = gpu
            config.conf = conf
            config.sim_criteria = sim_criteria
            config.adv = adv
            config.mode = mode
            config.dataset = dataset
            config.batch_size = batch_size

        model = build_model(config=config, voc1=voc1, voc2=voc2, device=device, logger=logger,num_iters=len(test_dataloader))

        epoch_offset, min_train_loss, min_val_loss, max_train_acc, max_val_acc, max_val_bleu, best_epoch, voc1, voc2 = load_checkpoint(config, model, config.mode, checkpoint, logger, device)

        logger.info('Prediction from')
        od = OrderedDict()
        od['epoch'] = epoch_offset
        od['min_train_loss'] = min_train_loss
        od['min_val_loss'] = min_val_loss
        od['max_train_acc'] = max_train_acc
        od['max_val_acc'] = max_val_acc
        od['max_val_bleu'] = max_val_bleu
        od['best_epoch'] = best_epoch
        print_log(logger, od)

        if config.mode == 'test':
            test_acc_epoch = run_validation(config, model, test_dataloader, voc1, voc2, device, logger, 0)
            logger.info('Accuracy: {}'.format(test_acc_epoch))
        else:
            estimate_confidence(config, model, test_dataloader, logger)



2024-11-13 07:49:28,128 | DEBUG | 1513005854.py: 262 : <module>() ::	 Created Relevant Directories
2024-11-13 07:49:28,128 | DEBUG | 1513005854.py: 262 : <module>() ::	 Created Relevant Directories
2024-11-13 07:49:28,128 | DEBUG | 1513005854.py: 262 : <module>() ::	 Created Relevant Directories
2024-11-13 07:49:28,128 | DEBUG | 1513005854.py: 262 : <module>() ::	 Created Relevant Directories
2024-11-13 07:49:28,128 | DEBUG | 1513005854.py: 262 : <module>() ::	 Created Relevant Directories
2024-11-13 07:49:28,132 | INFO | 1513005854.py: 263 : <module>() ::	 Experiment Name: run_cv_asdiv-a
2024-11-13 07:49:28,132 | INFO | 1513005854.py: 263 : <module>() ::	 Experiment Name: run_cv_asdiv-a
2024-11-13 07:49:28,132 | INFO | 1513005854.py: 263 : <module>() ::	 Experiment Name: run_cv_asdiv-a
2024-11-13 07:49:28,132 | INFO | 1513005854.py: 263 : <module>() ::	 Experiment Name: run_cv_asdiv-a
2024-11-13 07:49:28,132 | INFO | 1513005854.py: 263 : <module>() ::	 Experiment Name: run_cv_asdiv-a


/kaggle/input/svamp-dataset/data/


2024-11-13 07:49:29,657 | INFO | 1513005854.py: 283 : <module>() ::	 Vocab Created with number of words : 4086
2024-11-13 07:49:29,657 | INFO | 1513005854.py: 283 : <module>() ::	 Vocab Created with number of words : 4086
2024-11-13 07:49:29,657 | INFO | 1513005854.py: 283 : <module>() ::	 Vocab Created with number of words : 4086
2024-11-13 07:49:29,657 | INFO | 1513005854.py: 283 : <module>() ::	 Vocab Created with number of words : 4086
2024-11-13 07:49:29,657 | INFO | 1513005854.py: 283 : <module>() ::	 Vocab Created with number of words : 4086
2024-11-13 07:49:29,668 | INFO | 1513005854.py: 291 : <module>() ::	 Vocab saved at models/run_cv_asdiv-a/vocab1.p
2024-11-13 07:49:29,668 | INFO | 1513005854.py: 291 : <module>() ::	 Vocab saved at models/run_cv_asdiv-a/vocab1.p
2024-11-13 07:49:29,668 | INFO | 1513005854.py: 291 : <module>() ::	 Vocab saved at models/run_cv_asdiv-a/vocab1.p
2024-11-13 07:49:29,668 | INFO | 1513005854.py: 291 : <module>() ::	 Vocab saved at models/run_cv_as

Completed 198 / 197...

2024-11-13 07:50:14,695 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 1 completed...
Time Taken: 0.7393231471379598
2024-11-13 07:50:14,695 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 1 completed...
Time Taken: 0.7393231471379598
2024-11-13 07:50:14,695 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 1 completed...
Time Taken: 0.7393231471379598
2024-11-13 07:50:14,695 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 1 completed...
Time Taken: 0.7393231471379598
2024-11-13 07:50:14,695 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 1 completed...
Time Taken: 0.7393231471379598
2024-11-13 07:50:14,700 | DEBUG | 477948701.py: 370 : train_model() ::	 Starting Validation
2024-11-13 07:50:14,700 | DEBUG | 477948701.py: 370 : train_model() ::	 Starting Validation
2024-11-13 07:50:14,700 | DEBUG | 477948701.py: 370 : train_model() ::	 Starting Validation
2024-11-13 07:50:14,700 | DEBUG | 47794

Completed 64 / 63...

2024-11-13 07:50:20,355 | DEBUG | 477948701.py: 429 : train_model() ::	 Validation Bleu: 0.0
2024-11-13 07:50:20,355 | DEBUG | 477948701.py: 429 : train_model() ::	 Validation Bleu: 0.0
2024-11-13 07:50:20,355 | DEBUG | 477948701.py: 429 : train_model() ::	 Validation Bleu: 0.0
2024-11-13 07:50:20,355 | DEBUG | 477948701.py: 429 : train_model() ::	 Validation Bleu: 0.0
2024-11-13 07:50:20,355 | DEBUG | 477948701.py: 429 : train_model() ::	 Validation Bleu: 0.0
2024-11-13 07:50:20,360 | INFO | 1857244066.py: 424 : print_log() ::	 
 Epoch: 1	
 best epoch: 1	
 train loss epoch: 2.174502324533542	
 min train loss: 2.174502324533542	
 val loss epoch: 1.613183617591858	
 min val loss: 1.613183617591858	
 train acc epoch: 0.00031867431485022306	
 max train acc: 0.00031867431485022306	
 val acc epoch: 0.015	
 max val acc: 0.015	
 val bleu epoch: (0.0, [0.798, 0.0, 0.0, 0.0], 0.0844158583405561, 0.2880184331797235, 1000, 3472)	
 max val bleu: 0.0	
2024-11-13 07:50:20,360 | INFO | 1857244066.py:

Completed 198 / 197...

2024-11-13 07:51:05,131 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 2 completed...
Time Taken: 0.7460256139437358
2024-11-13 07:51:05,131 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 2 completed...
Time Taken: 0.7460256139437358
2024-11-13 07:51:05,131 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 2 completed...
Time Taken: 0.7460256139437358
2024-11-13 07:51:05,131 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 2 completed...
Time Taken: 0.7460256139437358
2024-11-13 07:51:05,131 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 2 completed...
Time Taken: 0.7460256139437358
2024-11-13 07:51:05,135 | DEBUG | 477948701.py: 370 : train_model() ::	 Starting Validation
2024-11-13 07:51:05,135 | DEBUG | 477948701.py: 370 : train_model() ::	 Starting Validation
2024-11-13 07:51:05,135 | DEBUG | 477948701.py: 370 : train_model() ::	 Starting Validation
2024-11-13 07:51:05,135 | DEBUG | 47794

Completed 64 / 63...

2024-11-13 07:51:10,907 | DEBUG | 477948701.py: 429 : train_model() ::	 Validation Bleu: 0.0
2024-11-13 07:51:10,907 | DEBUG | 477948701.py: 429 : train_model() ::	 Validation Bleu: 0.0
2024-11-13 07:51:10,907 | DEBUG | 477948701.py: 429 : train_model() ::	 Validation Bleu: 0.0
2024-11-13 07:51:10,907 | DEBUG | 477948701.py: 429 : train_model() ::	 Validation Bleu: 0.0
2024-11-13 07:51:10,907 | DEBUG | 477948701.py: 429 : train_model() ::	 Validation Bleu: 0.0
2024-11-13 07:51:10,913 | INFO | 1857244066.py: 424 : print_log() ::	 
 Epoch: 2	
 best epoch: 2	
 train loss epoch: 1.283494258047966	
 min train loss: 1.283494258047966	
 val loss epoch: 1.2596991062164307	
 min val loss: 1.2596991062164307	
 train acc epoch: 0.022944550669216062	
 max train acc: 0.022944550669216062	
 val acc epoch: 0.038	
 max val acc: 0.038	
 val bleu epoch: (0.0, [0.5411997363216875, 0.2015732546705998, 0.047388781431334626, 0.0], 0.8655727295446725, 0.8738479262672811, 3034, 3472)	
 max val bleu: 0.0	
2024

Completed 198 / 197...

2024-11-13 07:51:55,938 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 3 completed...
Time Taken: 0.750271757443746
2024-11-13 07:51:55,938 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 3 completed...
Time Taken: 0.750271757443746
2024-11-13 07:51:55,938 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 3 completed...
Time Taken: 0.750271757443746
2024-11-13 07:51:55,938 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 3 completed...
Time Taken: 0.750271757443746
2024-11-13 07:51:55,938 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 3 completed...
Time Taken: 0.750271757443746
2024-11-13 07:51:55,942 | DEBUG | 477948701.py: 370 : train_model() ::	 Starting Validation
2024-11-13 07:51:55,942 | DEBUG | 477948701.py: 370 : train_model() ::	 Starting Validation
2024-11-13 07:51:55,942 | DEBUG | 477948701.py: 370 : train_model() ::	 Starting Validation
2024-11-13 07:51:55,942 | DEBUG | 477948701.

Completed 64 / 63...

2024-11-13 07:52:01,715 | DEBUG | 477948701.py: 429 : train_model() ::	 Validation Bleu: 0.17013697465031893
2024-11-13 07:52:01,715 | DEBUG | 477948701.py: 429 : train_model() ::	 Validation Bleu: 0.17013697465031893
2024-11-13 07:52:01,715 | DEBUG | 477948701.py: 429 : train_model() ::	 Validation Bleu: 0.17013697465031893
2024-11-13 07:52:01,715 | DEBUG | 477948701.py: 429 : train_model() ::	 Validation Bleu: 0.17013697465031893
2024-11-13 07:52:01,715 | DEBUG | 477948701.py: 429 : train_model() ::	 Validation Bleu: 0.17013697465031893
2024-11-13 07:52:01,719 | INFO | 1857244066.py: 424 : print_log() ::	 
 Epoch: 3	
 best epoch: 3	
 train loss epoch: 0.9431342226599677	
 min train loss: 0.9431342226599677	
 val loss epoch: 1.220763921737671	
 min val loss: 1.220763921737671	
 train acc epoch: 0.14117272147864882	
 max train acc: 0.14117272147864882	
 val acc epoch: 0.112	
 max val acc: 0.112	
 val bleu epoch: (0.17013697465031893, [0.6209837930054023, 0.2487087802940008, 0.121951219

Completed 198 / 197...

2024-11-13 07:52:47,010 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 4 completed...
Time Taken: 0.7547236124674479
2024-11-13 07:52:47,010 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 4 completed...
Time Taken: 0.7547236124674479
2024-11-13 07:52:47,010 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 4 completed...
Time Taken: 0.7547236124674479
2024-11-13 07:52:47,010 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 4 completed...
Time Taken: 0.7547236124674479
2024-11-13 07:52:47,010 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 4 completed...
Time Taken: 0.7547236124674479
2024-11-13 07:52:47,015 | DEBUG | 477948701.py: 370 : train_model() ::	 Starting Validation
2024-11-13 07:52:47,015 | DEBUG | 477948701.py: 370 : train_model() ::	 Starting Validation
2024-11-13 07:52:47,015 | DEBUG | 477948701.py: 370 : train_model() ::	 Starting Validation
2024-11-13 07:52:47,015 | DEBUG | 47794

Completed 64 / 63...

2024-11-13 07:52:52,824 | DEBUG | 477948701.py: 429 : train_model() ::	 Validation Bleu: 0.17079564693963897
2024-11-13 07:52:52,824 | DEBUG | 477948701.py: 429 : train_model() ::	 Validation Bleu: 0.17079564693963897
2024-11-13 07:52:52,824 | DEBUG | 477948701.py: 429 : train_model() ::	 Validation Bleu: 0.17079564693963897
2024-11-13 07:52:52,824 | DEBUG | 477948701.py: 429 : train_model() ::	 Validation Bleu: 0.17079564693963897
2024-11-13 07:52:52,824 | DEBUG | 477948701.py: 429 : train_model() ::	 Validation Bleu: 0.17079564693963897
2024-11-13 07:52:52,829 | INFO | 1857244066.py: 424 : print_log() ::	 
 Epoch: 4	
 best epoch: 4	
 train loss epoch: 0.8040544736203807	
 min train loss: 0.8040544736203807	
 val loss epoch: 1.347752332687378	
 min val loss: 1.220763921737671	
 train acc epoch: 0.1755895474824729	
 max train acc: 0.1755895474824729	
 val acc epoch: 0.115	
 max val acc: 0.115	
 val bleu epoch: (0.17079564693963897, [0.6682570867246702, 0.25595005852516584, 0.1273192578

Completed 198 / 197...

2024-11-13 07:53:37,818 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 5 completed...
Time Taken: 0.7496691465377807
2024-11-13 07:53:37,818 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 5 completed...
Time Taken: 0.7496691465377807
2024-11-13 07:53:37,818 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 5 completed...
Time Taken: 0.7496691465377807
2024-11-13 07:53:37,818 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 5 completed...
Time Taken: 0.7496691465377807
2024-11-13 07:53:37,818 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 5 completed...
Time Taken: 0.7496691465377807
2024-11-13 07:53:37,823 | DEBUG | 477948701.py: 370 : train_model() ::	 Starting Validation
2024-11-13 07:53:37,823 | DEBUG | 477948701.py: 370 : train_model() ::	 Starting Validation
2024-11-13 07:53:37,823 | DEBUG | 477948701.py: 370 : train_model() ::	 Starting Validation
2024-11-13 07:53:37,823 | DEBUG | 47794

Completed 64 / 63...

2024-11-13 07:53:43,708 | INFO | 1857244066.py: 424 : print_log() ::	 
 Epoch: 5	
 best epoch: 4	
 train loss epoch: 0.7371042163339767	
 min train loss: 0.7371042163339767	
 val loss epoch: 1.4336364269256592	
 min val loss: 1.220763921737671	
 train acc epoch: 0.18132568514977693	
 max train acc: 0.18132568514977693	
 val acc epoch: 0.111	
 max val acc: 0.115	
 val bleu epoch: (0.19183057805954024, [0.6684952978056427, 0.2662659123055163, 0.13402625820568928, 0.05676328502415459], 1.0, 1.1025345622119815, 3828, 3472)	
 max val bleu: 0.19183057805954024	
2024-11-13 07:53:43,708 | INFO | 1857244066.py: 424 : print_log() ::	 
 Epoch: 5	
 best epoch: 4	
 train loss epoch: 0.7371042163339767	
 min train loss: 0.7371042163339767	
 val loss epoch: 1.4336364269256592	
 min val loss: 1.220763921737671	
 train acc epoch: 0.18132568514977693	
 max train acc: 0.18132568514977693	
 val acc epoch: 0.111	
 max val acc: 0.115	
 val bleu epoch: (0.19183057805954024, [0.6684952978056427, 0.26626591230

Completed 198 / 197...

2024-11-13 07:54:28,896 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 6 completed...
Time Taken: 0.7529789725939433
2024-11-13 07:54:28,896 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 6 completed...
Time Taken: 0.7529789725939433
2024-11-13 07:54:28,896 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 6 completed...
Time Taken: 0.7529789725939433
2024-11-13 07:54:28,896 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 6 completed...
Time Taken: 0.7529789725939433
2024-11-13 07:54:28,896 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 6 completed...
Time Taken: 0.7529789725939433
2024-11-13 07:54:28,900 | DEBUG | 477948701.py: 370 : train_model() ::	 Starting Validation
2024-11-13 07:54:28,900 | DEBUG | 477948701.py: 370 : train_model() ::	 Starting Validation
2024-11-13 07:54:28,900 | DEBUG | 477948701.py: 370 : train_model() ::	 Starting Validation
2024-11-13 07:54:28,900 | DEBUG | 47794

Completed 64 / 63...

2024-11-13 07:54:34,743 | INFO | 1857244066.py: 424 : print_log() ::	 
 Epoch: 6	
 best epoch: 4	
 train loss epoch: 0.6801659926444343	
 min train loss: 0.6801659926444343	
 val loss epoch: 1.381201982498169	
 min val loss: 1.220763921737671	
 train acc epoch: 0.1988527724665392	
 max train acc: 0.1988527724665392	
 val acc epoch: 0.111	
 max val acc: 0.115	
 val bleu epoch: (0.19786575289419225, [0.6805034815211569, 0.2757863935625457, 0.13321799307958476, 0.06130790190735695], 1.0, 1.0754608294930876, 3734, 3472)	
 max val bleu: 0.19786575289419225	
2024-11-13 07:54:34,743 | INFO | 1857244066.py: 424 : print_log() ::	 
 Epoch: 6	
 best epoch: 4	
 train loss epoch: 0.6801659926444343	
 min train loss: 0.6801659926444343	
 val loss epoch: 1.381201982498169	
 min val loss: 1.220763921737671	
 train acc epoch: 0.1988527724665392	
 max train acc: 0.1988527724665392	
 val acc epoch: 0.111	
 max val acc: 0.115	
 val bleu epoch: (0.19786575289419225, [0.6805034815211569, 0.2757863935625457,

Completed 198 / 197...

2024-11-13 07:55:19,843 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 7 completed...
Time Taken: 0.7514898339907329
2024-11-13 07:55:19,843 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 7 completed...
Time Taken: 0.7514898339907329
2024-11-13 07:55:19,843 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 7 completed...
Time Taken: 0.7514898339907329
2024-11-13 07:55:19,843 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 7 completed...
Time Taken: 0.7514898339907329
2024-11-13 07:55:19,843 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 7 completed...
Time Taken: 0.7514898339907329
2024-11-13 07:55:19,847 | DEBUG | 477948701.py: 370 : train_model() ::	 Starting Validation
2024-11-13 07:55:19,847 | DEBUG | 477948701.py: 370 : train_model() ::	 Starting Validation
2024-11-13 07:55:19,847 | DEBUG | 477948701.py: 370 : train_model() ::	 Starting Validation
2024-11-13 07:55:19,847 | DEBUG | 47794

Completed 64 / 63...

2024-11-13 07:55:25,660 | DEBUG | 477948701.py: 429 : train_model() ::	 Validation Bleu: 0.21332353411658372
2024-11-13 07:55:25,660 | DEBUG | 477948701.py: 429 : train_model() ::	 Validation Bleu: 0.21332353411658372
2024-11-13 07:55:25,660 | DEBUG | 477948701.py: 429 : train_model() ::	 Validation Bleu: 0.21332353411658372
2024-11-13 07:55:25,660 | DEBUG | 477948701.py: 429 : train_model() ::	 Validation Bleu: 0.21332353411658372
2024-11-13 07:55:25,660 | DEBUG | 477948701.py: 429 : train_model() ::	 Validation Bleu: 0.21332353411658372
2024-11-13 07:55:25,664 | INFO | 1857244066.py: 424 : print_log() ::	 
 Epoch: 7	
 best epoch: 7	
 train loss epoch: 0.632317229515405	
 min train loss: 0.632317229515405	
 val loss epoch: 1.4509685039520264	
 min val loss: 1.220763921737671	
 train acc epoch: 0.22020395156150413	
 max train acc: 0.22020395156150413	
 val acc epoch: 0.125	
 max val acc: 0.125	
 val bleu epoch: (0.21332353411658372, [0.6979253112033195, 0.2910133843212237, 0.1529411764

Completed 198 / 197...

2024-11-13 07:56:10,821 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 8 completed...
Time Taken: 0.7525042851765951
2024-11-13 07:56:10,821 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 8 completed...
Time Taken: 0.7525042851765951
2024-11-13 07:56:10,821 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 8 completed...
Time Taken: 0.7525042851765951
2024-11-13 07:56:10,821 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 8 completed...
Time Taken: 0.7525042851765951
2024-11-13 07:56:10,821 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 8 completed...
Time Taken: 0.7525042851765951
2024-11-13 07:56:10,826 | DEBUG | 477948701.py: 370 : train_model() ::	 Starting Validation
2024-11-13 07:56:10,826 | DEBUG | 477948701.py: 370 : train_model() ::	 Starting Validation
2024-11-13 07:56:10,826 | DEBUG | 477948701.py: 370 : train_model() ::	 Starting Validation
2024-11-13 07:56:10,826 | DEBUG | 47794

Completed 64 / 63...

2024-11-13 07:56:16,619 | DEBUG | 477948701.py: 429 : train_model() ::	 Validation Bleu: 0.22385528009206107
2024-11-13 07:56:16,619 | DEBUG | 477948701.py: 429 : train_model() ::	 Validation Bleu: 0.22385528009206107
2024-11-13 07:56:16,619 | DEBUG | 477948701.py: 429 : train_model() ::	 Validation Bleu: 0.22385528009206107
2024-11-13 07:56:16,619 | DEBUG | 477948701.py: 429 : train_model() ::	 Validation Bleu: 0.22385528009206107
2024-11-13 07:56:16,619 | DEBUG | 477948701.py: 429 : train_model() ::	 Validation Bleu: 0.22385528009206107
2024-11-13 07:56:16,625 | INFO | 1857244066.py: 424 : print_log() ::	 
 Epoch: 8	
 best epoch: 8	
 train loss epoch: 0.5804588939736139	
 min train loss: 0.5804588939736139	
 val loss epoch: 1.5635582208633423	
 min val loss: 1.220763921737671	
 train acc epoch: 0.2995538559592097	
 max train acc: 0.2995538559592097	
 val acc epoch: 0.142	
 max val acc: 0.142	
 val bleu epoch: (0.22385528009206107, [0.6924101198402131, 0.29509981851179673, 0.157264957

Completed 198 / 197...

2024-11-13 07:57:01,982 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 9 completed...
Time Taken: 0.755810538927714
2024-11-13 07:57:01,982 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 9 completed...
Time Taken: 0.755810538927714
2024-11-13 07:57:01,982 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 9 completed...
Time Taken: 0.755810538927714
2024-11-13 07:57:01,982 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 9 completed...
Time Taken: 0.755810538927714
2024-11-13 07:57:01,982 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 9 completed...
Time Taken: 0.755810538927714
2024-11-13 07:57:01,986 | DEBUG | 477948701.py: 370 : train_model() ::	 Starting Validation
2024-11-13 07:57:01,986 | DEBUG | 477948701.py: 370 : train_model() ::	 Starting Validation
2024-11-13 07:57:01,986 | DEBUG | 477948701.py: 370 : train_model() ::	 Starting Validation
2024-11-13 07:57:01,986 | DEBUG | 477948701.

Completed 64 / 63...

2024-11-13 07:57:07,863 | DEBUG | 477948701.py: 429 : train_model() ::	 Validation Bleu: 0.2259107508225161
2024-11-13 07:57:07,863 | DEBUG | 477948701.py: 429 : train_model() ::	 Validation Bleu: 0.2259107508225161
2024-11-13 07:57:07,863 | DEBUG | 477948701.py: 429 : train_model() ::	 Validation Bleu: 0.2259107508225161
2024-11-13 07:57:07,863 | DEBUG | 477948701.py: 429 : train_model() ::	 Validation Bleu: 0.2259107508225161
2024-11-13 07:57:07,863 | DEBUG | 477948701.py: 429 : train_model() ::	 Validation Bleu: 0.2259107508225161
2024-11-13 07:57:07,868 | INFO | 1857244066.py: 424 : print_log() ::	 
 Epoch: 9	
 best epoch: 9	
 train loss epoch: 0.5245336416913368	
 min train loss: 0.5245336416913368	
 val loss epoch: 1.6859229803085327	
 min val loss: 1.220763921737671	
 train acc epoch: 0.3467176545570427	
 max train acc: 0.3467176545570427	
 val acc epoch: 0.156	
 max val acc: 0.156	
 val bleu epoch: (0.2259107508225161, [0.7017721518987342, 0.2952542372881356, 0.1635897435897436

Completed 198 / 197...

2024-11-13 07:57:52,917 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 10 completed...
Time Taken: 0.7507085204124451
2024-11-13 07:57:52,917 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 10 completed...
Time Taken: 0.7507085204124451
2024-11-13 07:57:52,917 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 10 completed...
Time Taken: 0.7507085204124451
2024-11-13 07:57:52,917 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 10 completed...
Time Taken: 0.7507085204124451
2024-11-13 07:57:52,917 | DEBUG | 477948701.py: 369 : train_model() ::	 Training for epoch 10 completed...
Time Taken: 0.7507085204124451
2024-11-13 07:57:52,921 | DEBUG | 477948701.py: 370 : train_model() ::	 Starting Validation
2024-11-13 07:57:52,921 | DEBUG | 477948701.py: 370 : train_model() ::	 Starting Validation
2024-11-13 07:57:52,921 | DEBUG | 477948701.py: 370 : train_model() ::	 Starting Validation
2024-11-13 07:57:52,921 | DEBUG | 

Completed 64 / 63...

2024-11-13 07:57:58,750 | DEBUG | 477948701.py: 429 : train_model() ::	 Validation Bleu: 0.2386855797201566
2024-11-13 07:57:58,750 | DEBUG | 477948701.py: 429 : train_model() ::	 Validation Bleu: 0.2386855797201566
2024-11-13 07:57:58,750 | DEBUG | 477948701.py: 429 : train_model() ::	 Validation Bleu: 0.2386855797201566
2024-11-13 07:57:58,750 | DEBUG | 477948701.py: 429 : train_model() ::	 Validation Bleu: 0.2386855797201566
2024-11-13 07:57:58,750 | DEBUG | 477948701.py: 429 : train_model() ::	 Validation Bleu: 0.2386855797201566
2024-11-13 07:57:58,755 | INFO | 1857244066.py: 424 : print_log() ::	 
 Epoch: 10	
 best epoch: 10	
 train loss epoch: 0.4673842300416651	
 min train loss: 0.4673842300416651	
 val loss epoch: 1.548175573348999	
 min val loss: 1.220763921737671	
 train acc epoch: 0.4181007010834927	
 max train acc: 0.4181007010834927	
 val acc epoch: 0.164	
 max val acc: 0.164	
 val bleu epoch: (0.2386855797201566, [0.7120042587170615, 0.30758070366340223, 0.17529880478087

--Return--
None
> [0;32m/tmp/ipykernel_30/1857244066.py[0m(469)[0;36mstore_results[0;34m()[0m
[0;32m    467 [0;31m                        [0mjson[0m[0;34m.[0m[0mdump[0m[0;34m([0m[0mres_data[0m[0;34m,[0m [0mf[0m[0;34m,[0m [0mensure_ascii[0m[0;34m=[0m [0;32mFalse[0m[0;34m,[0m [0mindent[0m[0;34m=[0m [0;36m4[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    468 [0;31m        [0;32mexcept[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m--> 469 [0;31m                [0mpdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    470 [0;31m[0;34m[0m[0m
[0m[0;32m    471 [0;31m[0;32mdef[0m [0mstore_val_results[0m[0;34m([0m[0mconfig[0m[0;34m,[0m [0macc_score[0m[0;34m,[0m [0mfolds_scores[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  exit


In [None]:
# # Custom input problem
# custom_problem = ["John has 5 apples. John gave 3 to Mary. How many apples does John have now?"]

# run_name = config.run_name
# config.log_path = os.path.join(log_folder, run_name)
# config.model_path = os.path.join(model_folder, run_name)
# config.board_path = os.path.join(board_path, run_name)
# config.outputs_path = os.path.join(outputs_folder, run_name)

# vocab1_path = os.path.join(config.model_path, 'vocab1.p')
# vocab2_path = os.path.join(config.model_path, 'vocab2.p')
# config_file = os.path.join(config.model_path, 'config.p')
# log_file = os.path.join(config.log_path, 'log.txt')
    
# with open(vocab1_path, 'rb') as f:
# 	voc1 = pickle.load(f)
# with open(vocab2_path, 'rb') as f:
# 	voc2 = pickle.load(f)

# device = gpu_init_pytorch(config.gpu)

# # Convert input problem to indices using your vocabulary (voc1)
# input_problem = sents_to_idx(voc1, custom_problem, config.max_length)

# def process_batch(sent1s, voc1, device):
# 	input_len1 = [len(s) for s in sent1s]
# 	max_length_1 = max(input_len1)

# 	sent1s_padded = [pad_seq(s, max_length_1, voc1) for s in sent1s]

# 	# Convert to [Max_len X Batch]
# 	sent1_var = Variable(torch.LongTensor(sent1s_padded)).transpose(0, 1)

# 	sent1_var = sent1_var.to(device)

# 	return sent1_var, input_len1

# # Process the batch for input
# sent1_var, input_len1 = process_batch(input_problem, voc1, device)

# # Generate the predicted output using greedy decoding
# decoder_output = model.greedy_decode(custom_problem, sent1_var, None, input_len1, None)

# # Print the generated equation
# print("Generated Equation: ", ' '.join(decoder_output[0]))

In [39]:
def generate_full_question(question, numbers):
    for i, num in enumerate(numbers):
        placeholder = f"number{i}"
        question = question.replace(placeholder, str(num))
    return question

def convert_eqn(equation, numbers):
    for i, num in enumerate(numbers):
        placeholder = f"number{i}"
        equation = equation.replace(placeholder, str(num))
    return equation


# Function to write evaluation results into a file
def write_to_file(filename, data):
    with open(filename, 'w') as f:
        for line in data:
            f.write(line + '\n')

# Loop over the validation data and collect output for file
output_lines = []
for data in val_dataloader:
    # Convert questions and equations to index representations
    sent1s = sents_to_idx(voc1, data['ques'], config.max_length)
    sent2s = sents_to_idx(voc2, data['eqn'], config.max_length)
    nums = data['nums']
    ans = data['ans']
    
    # Prepare data for the model
    ques = data['ques']
    sent1_var, sent2_var, input_len1, input_len2 = process_batch(sent1s, sent2s, voc1, voc2, device)
    
    # Perform decoding
    val_loss, decoder_output, decoder_attn = model.greedy_decode(
        ques, sent1_var, sent2_var, input_len1, input_len2, validation=True
    )
    
    # Iterate over each entry in the batch and collect the required information
    for i in range(len(ques)):
        # Retrieve question, expected equation, numbers, and decoder output
        question = ques[i]
        expected_eqn = data['eqn'][i]
        decoded_eqn = ' '.join(decoder_output[i])  # Convert list to string format
        numbers = list(map(int, nums[i].split()))
        true_answer = ans[i].item()

        # Convert the equation tokens and evaluate the decoded answer
        op = stack_to_string(decoder_output[i])
        num = [float(nu) for nu in nums[i].split()]
        pred = ans_evaluator(op, num)
        
        # Generate the converted question and equations
        converted_question = generate_full_question(question, numbers)
        converted_expected_eqn = convert_eqn(expected_eqn, numbers)
        converted_decoded_eqn = convert_eqn(decoded_eqn, numbers)

        # Compare decoded answer with true answer
        result_comparison = "Correct" if abs(pred - true_answer) <= 0.1 else "Incorrect"

        # Prepare output for file
        # output_lines.append(f"Question {i+1}: {question}")
        output_lines.append(f"Converted Question {i+1}: {converted_question}")
        # output_lines.append(f"Expected Equation: {expected_eqn}")
        # output_lines.append(f"Converted Expected Equation: {converted_expected_eqn}")
        # output_lines.append(f"Decoded Equation: {decoded_eqn}")
        # output_lines.append(f"Converted Decoded Equation: {converted_decoded_eqn}")
        # output_lines.append(f"Numbers: {numbers}")
        output_lines.append(f"True Answer: {true_answer}")
        output_lines.append(f"Decoded Answer: {pred}")
        output_lines.append(f"Predicted Result: {result_comparison}")
        output_lines.append("-" * 80)


# Write all collected lines to eval.txt
write_to_file("rnn_eval_bert.txt", output_lines)

In [None]:
import torch
torch.save(model, '/kaggle/working/entire_model.pth')

In [None]:
!zip -r /kaggle/working/kaggle_working_dir.zip /kaggle/working

In [None]:
# from IPython.display import FileLink
# FileLink(r'/kaggle/working/kaggle_working_dir.zip')