Make a copy of this notebook and rename using your USERID in the following format, 2017CSZ8058

Give read access to keshavkolluru@gmail.com, vishalsaley114@gmail.com and kartikeya.badola@gmail.com


In [None]:
## DONT CHANGE THIS CELL
!wget http://www.cse.iitd.ac.in/~mausam/courses/col772/autumn2021/A3/data.zip

In [None]:
## Import relevant packages

import os

import time
import datetime

import torch
import torch.optim as O
import torch.nn as nn
import torch.nn.functional as F
from torch.optim.lr_scheduler import StepLR

import logging
from argparse import ArgumentParser

from pdb import set_trace

In [None]:
## Various utility functions

def parse_args():
	parser = ArgumentParser(description='NLI Baseline')
	parser.add_argument('--dataset', '-d', type=str, default='mnli')
	parser.add_argument('--model', '-m', type=str, default='bilstm')
	parser.add_argument('--gpu', type=int, default=0)
	parser.add_argument('--batch_size', type=int, default=128)
	parser.add_argument('--embed_dim', type=int, default=300)
	parser.add_argument('--d_hidden', type=int, default=200)
	parser.add_argument('--dp_ratio', type=int, default=0.2)
	parser.add_argument('--epochs', type=int, default=20)
	parser.add_argument('--lr', type=float, default=0.001)
	parser.add_argument('--combine', type=str, default='cat')
	parser.add_argument('--results_dir', type=str, default='results')
	return check_args(parser.parse_args())

"""checking arguments"""
def check_args(args):
	# --result_dir
	check_folder(os.path.join(args.results_dir, args.model, args.dataset))

	# --epoch
	try:
			assert args.epochs >= 1
	except:
			print('number of epochs must be larger than or equal to one')

	# --batch_size
	try:
			assert args.batch_size >= 1
	except:
			print('batch size must be larger than or equal to one')
	return args

def get_device(gpu_no):
	if torch.cuda.is_available():
		torch.cuda.set_device(gpu_no)
		return torch.device('cuda:{}'.format(gpu_no))
	else:
		return torch.device('cpu')

def makedirs(name):
	"""helper function for python 2 and 3 to call os.makedirs()
		avoiding an error if the directory to be created already exists"""

	import os, errno

	try:
		os.makedirs(name)
	except OSError as ex:
		if ex.errno == errno.EEXIST and os.path.isdir(name):
			# ignore existing directory
			pass
		else:
			# a different error happened
			raise

def check_folder(log_dir):
	if not os.path.exists(log_dir):
		os.makedirs(log_dir)
	return log_dir

def get_logger(args, phase):
	logging.basicConfig(level=logging.INFO, 
												filename = "{}/{}/{}/{}.log".format(args.results_dir, args.model, args.dataset, phase),
												format = '%(asctime)s - %(message)s', 
												datefmt='%d-%b-%y %H:%M:%S')
	return logging.getLogger(phase)

In [None]:
## Basic training loop

class Train():
	def __init__(self):
		print("program execution start: {}".format(datetime.datetime.now()))
		self.args = parse_args()
		self.device = get_device(self.args.gpu)
		self.logger = get_logger(self.args, "train")
		self.logger.info("Arguments: {}".format(self.args))
		
		dataset_options = {
											'batch_size': self.args.batch_size, 
											'device': self.device
										}

    ## TODO: Load your own dataset
		self.dataset = None
		
    ## TODO: Load your own model
		self.model = None
		
		self.model.to(self.device)
		self.criterion = nn.CrossEntropyLoss(reduction = 'sum')
		self.opt = O.Adam(self.model.parameters(), lr = self.args.lr)
		self.best_val_acc = None
		self.scheduler = StepLR(self.opt, step_size=5, gamma=0.5)

		print("resource preparation done: {}".format(datetime.datetime.now()))

	def result_checkpoint(self, epoch, train_loss, val_loss, train_acc, val_acc, took):
		if self.best_val_acc is None or val_acc > self.best_val_acc:
			self.best_val_acc = val_acc
			torch.save({
				'accuracy': self.best_val_acc,
				'options': self.model_options,
				'model_dict': self.model.state_dict(),
			}, '{}/{}/{}/best-{}-{}-params.pt'.format(self.args.results_dir, self.args.model, self.args.dataset, self.args.model, self.args.dataset))
		self.logger.info('| Epoch {:3d} | train loss {:5.2f} | train acc {:5.2f} | val loss {:5.2f} | val acc {:5.2f} | time: {:5.2f}s |'
				.format(epoch, train_loss, train_acc, val_loss, val_acc, took))
	
	def train(self):
		self.model.train(); self.dataset.train_iter.init_epoch()
		n_correct, n_total, n_loss = 0, 0, 0
		for batch_idx, batch in enumerate(self.dataset.train_iter):
			self.opt.zero_grad()
			answer = self.model(batch)
			loss = self.criterion(answer, batch.label)
			
			n_correct += (torch.max(answer, 1)[1].view(batch.label.size()) == batch.label).sum().item()
			n_total += batch.batch_size
			n_loss += loss.item()
			
			loss.backward(); self.opt.step()
		train_loss = n_loss/n_total
		train_acc = 100. * n_correct/n_total
		return train_loss, train_acc

	def validate(self):
		self.model.eval(); self.dataset.dev_iter.init_epoch()
		n_correct, n_total, n_loss = 0, 0, 0
		with torch.no_grad():
			for batch_idx, batch in enumerate(self.dataset.dev_iter):
				answer = self.model(batch)
				loss = self.criterion(answer, batch.label)
				
				n_correct += (torch.max(answer, 1)[1].view(batch.label.size()) == batch.label).sum().item()
				n_total += batch.batch_size
				n_loss += loss.item()

			val_loss = n_loss/n_total
			val_acc = 100. * n_correct/n_total
			return val_loss, val_acc

	def execute(self):
		print(" [*] Training starts!")
		print('-' * 99)
		for epoch in range(1, self.args.epochs+1):
			start = time.time()

			train_loss, train_acc = self.train()
			val_loss, val_acc = self.validate()
			self.scheduler.step()
			
			took = time.time()-start
			self.result_checkpoint(epoch, train_loss, val_loss, train_acc, val_acc, took)

			print('| Epoch {:3d} | train loss {:5.2f} | train acc {:5.2f} | val loss {:5.2f} | val acc {:5.2f} | time: {:5.2f}s |'.format(
				epoch, train_loss, train_acc, val_loss, val_acc, took))
		self.finish()

	def finish(self):
		self.logger.info("[*] Training finished!\n\n")
		print('-' * 99)
		print(" [*] Training finished!")
		print(" [*] Please find the saved model and training log in results_dir")


In [None]:
## Start training
task = Train()
task.execute()

In [None]:
## Zip the final model and all the required files, such as vocabulary
# Replace USERID with your own, such as 2017CSZ8058
!zip -r USERID_A_model.zip **

## Upload it to Google drive and ensure that the testing notebook uses the correct link