In [1]:
from dataset.legacy.tweets_dataset import TweetsDataset
dataset = TweetsDataset(transform=False)

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\naman\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\naman\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [2]:
import torchtext
train_iterator, valid_iterator = torchtext.legacy.data.BucketIterator.splits((dataset.train_dataset, dataset.validation_dataset), batch_size = 1, sort_key = lambda x: len(x.review), sort_within_batch=True, device = "cuda")

In [3]:
size_of_vocab = len(dataset.Tweet.vocab)
embedding_dim = 300
num_hidden_nodes = 20

In [4]:
from model.lstm import Lstm
from model.encoder_decoder import EncoderDecoder
encoder = Lstm(vocab_size = size_of_vocab, embedding_dim= embedding_dim, hidden_dim = num_hidden_nodes, staggered_input = True)
decoder = Lstm(vocab_size = None, embedding_dim= encoder.fc.out_features, hidden_dim = num_hidden_nodes, output_dim = num_hidden_nodes, staggered_input = False)

In [5]:
model = EncoderDecoder(encoder=encoder, decoder=decoder, num_classes=3)

In [6]:
import torch.optim as optim
import torch.nn as nn
import torch
device = "cuda"
# define optimizer and loss
optimizer = optim.Adam(model.parameters(), lr=2e-4)
criterion = nn.CrossEntropyLoss()

# define metric
def binary_accuracy(preds, y):
    #round predictions to the closest integer
    _, predictions = torch.max(preds, 1)
    
    correct = (predictions == y).float() 
    acc = correct.sum() / len(correct)
    return acc
    
# push to cuda if available
model = model.to(device)
criterion = criterion.to(device)

In [7]:
def train(model, iterator, optimizer, criterion):
    
    # initialize every epoch 
    epoch_loss = 0
    epoch_acc = 0
    
    # set the model in training phase
    model.train()  
    
    for batch in iterator:
        
        # resets the gradients after every batch
        optimizer.zero_grad()   
        
        # retrieve text and no. of words
        tweet, tweet_lengths = batch.review  
        # convert to 1D tensor
        predictions = model(tweet, tweet_lengths)
        # compute the loss
        loss = criterion(predictions, batch.rating)        
        
        # compute the binary accuracy
        acc = binary_accuracy(predictions, batch.rating)   
        
        # backpropage the loss and compute the gradients
        loss.backward()       
        
        # update the weights
        optimizer.step()      
        
        # loss and accuracy
        epoch_loss += loss.item()  
        epoch_acc += acc.item()    
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [8]:
def evaluate(model, iterator, criterion):
    
    # initialize every epoch
    epoch_loss = 0
    epoch_acc = 0

    # deactivating dropout layers
    model.eval()
    
    # deactivates autograd
    with torch.no_grad():
    
        for batch in iterator:
        
            # retrieve text and no. of words
            tweet, tweet_lengths = batch.review
            
            # convert to 1d tensor
            predictions = model(tweet, tweet_lengths)
            
            # compute loss and accuracy
            loss = criterion(predictions, batch.rating)
            acc = binary_accuracy(predictions, batch.rating)
            
            # keep track of loss and accuracy
            epoch_loss += loss.item()
            epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [9]:
N_EPOCHS = 10
best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):
     
    # train the model
    train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
    
    # evaluate the model
    valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)
    
    # save the best model
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'saved_weights.pt')
    
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}% \n')

	Train Loss: 0.824 | Train Acc: 62.99%
	 Val. Loss: 0.652 |  Val. Acc: 73.17% 

	Train Loss: 0.627 | Train Acc: 76.10%
	 Val. Loss: 0.602 |  Val. Acc: 80.49% 

	Train Loss: 0.482 | Train Acc: 84.56%
	 Val. Loss: 0.591 |  Val. Acc: 81.46% 

	Train Loss: 0.350 | Train Acc: 88.78%
	 Val. Loss: 0.660 |  Val. Acc: 76.59% 

	Train Loss: 0.258 | Train Acc: 91.20%
	 Val. Loss: 0.747 |  Val. Acc: 73.17% 

	Train Loss: 0.212 | Train Acc: 92.49%
	 Val. Loss: 0.823 |  Val. Acc: 74.15% 

	Train Loss: 0.187 | Train Acc: 93.27%
	 Val. Loss: 0.891 |  Val. Acc: 72.68% 

	Train Loss: 0.157 | Train Acc: 93.70%
	 Val. Loss: 0.911 |  Val. Acc: 74.63% 

	Train Loss: 0.135 | Train Acc: 93.87%
	 Val. Loss: 0.943 |  Val. Acc: 73.66% 

	Train Loss: 0.090 | Train Acc: 95.69%
	 Val. Loss: 1.059 |  Val. Acc: 77.56% 

