### Author: [@SauravMaheshkar](https://twitter.com/MaheshkarSaurav)

# Packages 📦 and Basic Setup

---

## Install Packages

In [3]:
import torch
from torch import nn
import torch.nn.functional as F
from torchtext import data
from torch.autograd import Variable
from torchtext import datasets
from torchtext.vocab import Vectors, GloVe

## Project Configuration using `wandb.config`

In [6]:
import os
import wandb

# Paste your api key here
os.environ["WANDB_API_KEY"] = '7c7ab5e2b09dbcf8676c8afe3f110fddc1638ded'

# Feel free to change these and experiment !!
config = wandb.config
config.learning_rate = 2e-5
config.batch_size = 32
config.output_size = 2
config.hidden_size = 256
config.embedding_length = 300
config.epochs = 10

Error: You must call wandb.init() before wandb.config.learning_rate

# 💿 The Dataset

---

In this code cell we use torchtext legacy module to create a Dataset

In [None]:
# Ported from: https://github.com/prakashpandey9/Text-Classification-Pytorch/blob/master/load_data.py

def load_dataset(test_sen=None):
    
    tokenize = lambda x: x.split()
    TEXT = data.Field(sequential=True, tokenize=tokenize, lower=True, include_lengths=True, batch_first=True, fix_length=200)
    LABEL = data.LabelField()
    train_data, test_data = datasets.IMDB.splits(TEXT, LABEL)
    TEXT.build_vocab(train_data, vectors=GloVe(name='6B', dim=300))
    LABEL.build_vocab(train_data)

    word_embeddings = TEXT.vocab.vectors

    train_data, valid_data = train_data.split()
    train_iter, valid_iter, test_iter = data.BucketIterator.splits((train_data, valid_data, test_data), 
                                                                   batch_size=32, 
                                                                   sort_key=lambda x: len(x.text), 
                                                                   repeat=False, shuffle=True)

    vocab_size = len(TEXT.vocab)

    return TEXT, vocab_size, word_embeddings, train_iter, valid_iter, test_iter

TEXT, vocab_size, word_embeddings, train_iter, valid_iter, test_iter = load_dataset()

downloading aclImdb_v1.tar.gz


aclImdb_v1.tar.gz: 100%|██████████| 84.1M/84.1M [00:01<00:00, 43.0MB/s]
.vector_cache/glove.6B.zip: 862MB [02:40, 5.36MB/s]                           
100%|█████████▉| 399999/400000 [00:52<00:00, 7643.86it/s]


# ✍️ Model Architecture
---

In [None]:
class LSTMClassifier(nn.Module):
	def __init__(self, batch_size, output_size, hidden_size, vocab_size, embedding_length, weights):
		super(LSTMClassifier, self).__init__()
		self.batch_size = batch_size
		self.output_size = output_size
		self.hidden_size = hidden_size
		self.vocab_size = vocab_size
		self.embedding_length = embedding_length
		
		self.word_embeddings = nn.Embedding(vocab_size, embedding_length)
		self.word_embeddings.weight = nn.Parameter(weights, requires_grad=False) 
		self.lstm = nn.LSTM(embedding_length, hidden_size) # Our main hero for this tutorial
		self.label = nn.Linear(hidden_size, output_size)
		
	def forward(self, input_sentence, batch_size=None):
		input = self.word_embeddings(input_sentence) 
		input = input.permute(1, 0, 2) 
		if batch_size is None:
			h_0 = Variable(torch.zeros(1, self.batch_size, self.hidden_size).cuda()) 
			c_0 = Variable(torch.zeros(1, self.batch_size, self.hidden_size).cuda()) 
		else:
			h_0 = Variable(torch.zeros(1, batch_size, self.hidden_size).cuda())
			c_0 = Variable(torch.zeros(1, batch_size, self.hidden_size).cuda())
		output, (final_hidden_state, final_cell_state) = self.lstm(input, (h_0, c_0))
		final_output = self.label(final_hidden_state[-1]) 
		
		return final_output

# 🧱 + 🏗 = 🏠 Training

---

## 🥼 Helper Function

In [None]:
def clip_gradient(model, clip_value):
    params = list(filter(lambda p: p.grad is not None, model.parameters()))
    for p in params:
        p.grad.data.clamp_(-clip_value, clip_value)
    
def train_model(model, train_iter, epoch):
    total_epoch_loss = 0
    total_epoch_acc = 0
    model.cuda()
    optim = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()))
    steps = 0
    model.train()
    for idx, batch in enumerate(train_iter):
        text = batch.text[0]
        target = batch.label
        target = torch.autograd.Variable(target).long()
        if torch.cuda.is_available():
            text = text.cuda()
            target = target.cuda()
        if (text.size()[0] is not 32): 
            continue
        optim.zero_grad()
        prediction = model(text)
        loss = loss_fn(prediction, target)
        wandb.log({"Training Loss": loss.item()})
        num_corrects = (torch.max(prediction, 1)[1].view(target.size()).data == target.data).float().sum()
        acc = 100.0 * num_corrects/len(batch)
        wandb.log({"Training Accuracy": acc.item()})
        loss.backward()
        clip_gradient(model, 1e-1)
        optim.step()
        steps += 1
        
        if steps % 100 == 0:
            print (f'Epoch: {epoch+1}, Idx: {idx+1}, Training Loss: {loss.item():.4f}, Training Accuracy: {acc.item(): .2f}%')
        
        total_epoch_loss += loss.item()
        total_epoch_acc += acc.item()
        
    return total_epoch_loss/len(train_iter), total_epoch_acc/len(train_iter)

def eval_model(model, val_iter):
    total_epoch_loss = 0
    total_epoch_acc = 0
    model.eval()
    with torch.no_grad():
        for idx, batch in enumerate(val_iter):
            text = batch.text[0]
            if (text.size()[0] is not 32):
                continue
            target = batch.label
            target = torch.autograd.Variable(target).long()
            if torch.cuda.is_available():
                text = text.cuda()
                target = target.cuda()
            prediction = model(text)
            loss = loss_fn(prediction, target)
            wandb.log({"Evaluation Loss": loss.item()})
            num_corrects = (torch.max(prediction, 1)[1].view(target.size()).data == target.data).sum()
            acc = 100.0 * num_corrects/len(batch)
            wandb.log({"Evaluation Accuracy": acc.item()})
            total_epoch_loss += loss.item()
            total_epoch_acc += acc.item()

    return total_epoch_loss/len(val_iter), total_epoch_acc/len(val_iter)

## Training

In [None]:
model = LSTMClassifier(config.batch_size, config.output_size, config.hidden_size, vocab_size, config.embedding_length, word_embeddings)
loss_fn = F.cross_entropy

# Create a wandb run to log all your metrics
run = wandb.init(project='...', entity='...', reinit=True)

wandb.watch(model)

for epoch in range(config.epochs):
    train_loss, train_acc = train_model(model, train_iter, epoch)
    val_loss, val_acc = eval_model(model, valid_iter)
    
    print(f'Epoch: {epoch+1:02}, Train Loss: {train_loss:.3f}, Train Acc: {train_acc:.2f}%, Val. Loss: {val_loss:3f}, Val. Acc: {val_acc:.2f}%')

run.finish()