# LSTM in Pytorch

In [93]:
#library imports
import sys
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import re
import spacy

import pickle
from collections import Counter
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
import string
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from sklearn.metrics import mean_squared_error,accuracy_score,confusion_matrix,precision_score,recall_score,f1_score
from tqdm import tqdm
tqdm.pandas()

## Basic LSTM in Pytorch with random numbers

In [3]:
#input
x = torch.tensor([[1,2, 12,34, 56,78, 90,80],
                 [12,45, 99,67, 6,23, 77,82],
                 [3,24, 6,99, 12,56, 21,22]])

In [4]:
print(x.shape)

torch.Size([3, 8])


#### using two different models

In [5]:
model1 = nn.Embedding(100, 7, padding_idx=0)
model2 = nn.LSTM(input_size=7, hidden_size=3, num_layers=1, batch_first=True)

In [6]:
print(model1)
for name, param in model1.named_parameters():
    print (name, param.data.shape)

Embedding(100, 7, padding_idx=0)
weight torch.Size([100, 7])


In [7]:
out1 = model1(x)
out2 = model2(out1)

In [8]:
print(model2)
for name, param in model2.named_parameters():
    print (name, param.data.shape)

LSTM(7, 3, batch_first=True)
weight_ih_l0 torch.Size([12, 7])
weight_hh_l0 torch.Size([12, 3])
bias_ih_l0 torch.Size([12])
bias_hh_l0 torch.Size([12])


In [9]:
print(out1.shape)
print(out1)

torch.Size([3, 8, 7])
tensor([[[ 1.0901, -0.1593,  0.6650,  0.2456,  0.3406, -1.3329,  0.6954],
         [ 0.3798,  0.5201,  0.1932,  1.1549,  0.0685, -0.7466, -0.2904],
         [-0.6632,  1.4525, -0.1910,  0.4362,  0.5490, -1.6861,  1.6541],
         [ 1.5351, -0.7867, -0.4847, -0.0082, -0.6175, -0.5503, -0.2015],
         [-1.5917,  0.4777,  0.3365, -0.5456, -0.3524,  0.9582,  1.1184],
         [ 0.8526, -1.0249,  0.1919, -1.9348,  0.8062, -0.0530, -0.1203],
         [-0.4034,  1.5814, -0.4121,  1.4274,  0.6267, -0.1648,  0.3458],
         [-0.1021, -0.2201,  0.7095,  0.1200, -3.0810,  0.7647, -0.1864]],

        [[-0.6632,  1.4525, -0.1910,  0.4362,  0.5490, -1.6861,  1.6541],
         [-0.2482, -1.8267,  0.7440, -0.9848, -0.6675,  0.6528, -0.5892],
         [ 1.8834,  1.0524,  0.9729,  0.5428,  0.4874,  0.5897, -0.9407],
         [-0.0559,  1.2702,  0.0935, -1.0264,  0.1006,  1.0052, -0.2658],
         [ 0.7261, -1.0326, -1.8372,  2.1531, -0.4839,  1.7049,  0.3869],
         [-0.0

In [10]:
print(type(out2))

<class 'tuple'>


In [11]:
out, (ht, ct) = model2(out1)
print(ct.shape)

torch.Size([1, 3, 3])


In [12]:
print(out.shape)
print(ht.shape)
print(ct.shape)

torch.Size([3, 8, 3])
torch.Size([1, 3, 3])
torch.Size([1, 3, 3])


In [13]:
print(out)
print(ht)

tensor([[[-0.0359,  0.0102,  0.3304],
         [-0.0726, -0.0199,  0.2845],
         [-0.0528, -0.0963,  0.4883],
         [-0.1604,  0.0295,  0.4522],
         [ 0.0797, -0.0154,  0.3148],
         [ 0.4156, -0.0356,  0.2419],
         [ 0.2728, -0.0817,  0.2443],
         [ 0.0333,  0.0397,  0.2731]],

        [[ 0.0080, -0.1103,  0.3283],
         [ 0.1577, -0.0756,  0.3162],
         [ 0.0252, -0.0298,  0.1012],
         [ 0.1188, -0.0686,  0.0190],
         [-0.0015,  0.0803,  0.0512],
         [ 0.0831, -0.0361,  0.2573],
         [ 0.0801, -0.2291,  0.5756],
         [ 0.3401, -0.0572,  0.2958]],

        [[ 0.1622, -0.0333,  0.0864],
         [-0.0161, -0.2400,  0.2744],
         [-0.0327,  0.0672,  0.2472],
         [-0.0866,  0.1174,  0.0610],
         [-0.1790, -0.0513,  0.3716],
         [ 0.1584, -0.0326,  0.2205],
         [ 0.0134, -0.3732,  0.0160],
         [ 0.1165, -0.1066,  0.0177]]], grad_fn=<TransposeBackward0>)
tensor([[[ 0.0333,  0.0397,  0.2731],
         [ 0.3

#### using nn.sequential

In [14]:
model3 = nn.Sequential(nn.Embedding(100, 7, padding_idx=0),
                        nn.LSTM(input_size=7, hidden_size=3, num_layers=1, batch_first=True))

In [15]:
print(model3)
for name, param in model3.named_parameters():
    print (name, param.data.shape)

Sequential(
  (0): Embedding(100, 7, padding_idx=0)
  (1): LSTM(7, 3, batch_first=True)
)
0.weight torch.Size([100, 7])
1.weight_ih_l0 torch.Size([12, 7])
1.weight_hh_l0 torch.Size([12, 3])
1.bias_ih_l0 torch.Size([12])
1.bias_hh_l0 torch.Size([12])


In [16]:
out, (ht, ct) = model3(x)
print(out)

tensor([[[-4.2407e-02, -2.3003e-02, -3.0795e-01],
         [-5.3531e-02, -6.1740e-02, -2.5085e-01],
         [-3.4837e-02, -4.7687e-02, -2.3117e-01],
         [-5.1555e-04,  1.3455e-01, -2.2131e-01],
         [ 1.5301e-01,  3.5757e-01, -2.7589e-01],
         [ 5.1307e-02,  1.1507e-02, -2.0547e-01],
         [ 7.4796e-02,  2.1338e-02, -1.9023e-01],
         [-4.2027e-03, -1.1740e-02, -1.1846e-01]],

        [[-1.1426e-02,  2.2446e-02,  6.9458e-04],
         [ 5.5026e-02, -4.5224e-02, -6.1695e-02],
         [ 6.1524e-02,  1.3966e-01, -3.3928e-01],
         [ 3.1143e-02,  2.7456e-01, -1.0514e-01],
         [ 8.5409e-02,  1.3588e-01, -3.1427e-01],
         [ 1.7498e-01,  1.9168e-01, -2.5169e-01],
         [ 1.4623e-01,  1.8034e-01, -8.6848e-02],
         [ 6.9871e-02,  2.4040e-02, -6.4555e-02]],

        [[ 1.2535e-01,  5.7884e-02,  1.7001e-02],
         [ 3.8908e-02,  1.2217e-01, -3.1478e-01],
         [ 6.7457e-02,  9.7066e-02, -3.9773e-01],
         [ 9.5190e-02,  1.7204e-01, -5.2000e-0

In [17]:
print(out.shape)
print(ht.shape)
print(ct.shape)

torch.Size([3, 8, 3])
torch.Size([1, 3, 3])
torch.Size([1, 3, 3])


## Multiclass Text Classification

We are going to predict item ratings based on customer reviews bsed on this dataset from Kaggle:
https://www.kaggle.com/nicapotato/womens-ecommerce-clothing-reviews

In [18]:
# from google.colab import drive
# drive.mount('/content/drive')

In [19]:
#loading the data
# reviews = pd.read_csv("/content/drive/MyDrive/data/NLP_sentiment_analysis_data/train.csv")
reviews = pd.read_csv("train.csv")
print(reviews.shape)
reviews.head()

(50000, 3)


Unnamed: 0.1,Unnamed: 0,reviews,ratings
0,0,"This book was very informative, covering all a...",4
1,1,I am already a baseball fan and knew a bit abo...,5
2,2,I didn't like this product it smudged all unde...,1
3,3,I simply love the product. I appreciate print ...,5
4,4,It goes on very easily and makes my eyes look ...,5


In [20]:
# reviews['Title'] = reviews['Title'].fillna('')
# reviews['Review Text'] = reviews['Review Text'].fillna('')
# reviews['review'] = reviews['Title'] + ' ' + reviews['Review Text']

In [21]:
#keeping only relevant columns and calculating sentence lengths
reviews = reviews[['reviews', 'ratings']]
reviews.columns = ['reviews', 'ratings']
reviews['review_length'] = reviews['reviews'].apply(lambda x: len(x.split()))
reviews.head()

Unnamed: 0,reviews,ratings,review_length
0,"This book was very informative, covering all a...",4,10
1,I am already a baseball fan and knew a bit abo...,5,23
2,I didn't like this product it smudged all unde...,1,14
3,I simply love the product. I appreciate print ...,5,13
4,It goes on very easily and makes my eyes look ...,5,13


In [22]:
#changing ratings to 0-numbering
zero_numbering = {1:0, 2:1, 3:2, 4:3, 5:4}
reviews['ratings'] = reviews['ratings'].apply(lambda x: zero_numbering[x])
print(type(reviews['ratings']))
reviews.head()

<class 'pandas.core.series.Series'>


Unnamed: 0,reviews,ratings,review_length
0,"This book was very informative, covering all a...",3,10
1,I am already a baseball fan and knew a bit abo...,4,23
2,I didn't like this product it smudged all unde...,0,14
3,I simply love the product. I appreciate print ...,4,13
4,It goes on very easily and makes my eyes look ...,4,13


In [23]:
#mean sentence length
np.mean(reviews['review_length'])

17.58756

In [24]:
#tokenization
# tok = spacy.load('en')
tok = spacy.load('en_core_web_sm')

def tokenize (text):
    text = re.sub(r"[^\x00-\x7F]+", " ", text)
    regex = re.compile('[' + re.escape(string.punctuation) + '0-9\\r\\t\\n]') # remove punctuation and numbers
    nopunct = regex.sub(" ", text.lower())
    return [token.text for token in tok.tokenizer(nopunct)]

In [25]:
# #count number of occurences of each word
# counts = Counter()
# for index, row in reviews.iterrows():
#     if index%1000==0:
#       print(index)
#     counts.update(tokenize(row['reviews']))

In [26]:

# with open('/content/drive/MyDrive/data/NLP_sentiment_analysis_data/count.pickle', 'wb') as outputfile:
#   pickle.dump(counts,outputfile)

In [27]:
# with open('/content/drive/MyDrive/data/NLP_sentiment_analysis_data/count.pickle', 'rb') as inputfile:
#   counts=pickle.load(inputfile)
with open('count.pickle', 'rb') as inputfile:
  counts=pickle.load(inputfile)

In [28]:
# #deleting infrequent words
# print("num_words before:",len(counts.keys()))
# for word in list(counts):
#     if counts[word] < 2:
#         del counts[word]
# print("num_words after:",len(counts.keys()))

In [29]:
#creating vocabulary
vocab2index = {"":0, "UNK":1}
words = ["", "UNK"]
for word in counts:
    vocab2index[word] = len(words)
    words.append(word)

In [30]:
def encode_sentence(text, vocab2index, N=70):
    tokenized = tokenize(text)
    encoded = np.zeros(N, dtype=int)
#     encoded = [0]*N
    enc1 = np.array([vocab2index.get(word, vocab2index["UNK"]) for word in tokenized])
#     enc1 = [vocab2index.get(word, vocab2index["UNK"]) for word in tokenized]
    length = min(N, len(enc1))
    encoded[:length] = enc1[:length]
    return encoded, length

In [31]:
# reviews['encoded'] = reviews['reviews'].apply(lambda x: np.array(encode_sentence(x,vocab2index )))

reviews['encoded'] = reviews['reviews'].progress_apply(lambda x: np.array(encode_sentence(x,vocab2index )))
print(type(reviews['encoded']))
reviews.head()

  This is separate from the ipykernel package so we can avoid doing imports until
100%|██████████| 50000/50000 [00:03<00:00, 12977.56it/s]

<class 'pandas.core.series.Series'>





Unnamed: 0,reviews,ratings,review_length,encoded
0,"This book was very informative, covering all a...",3,10,"[[2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 0, 0, 0,..."
1,I am already a baseball fan and knew a bit abo...,4,23,"[[13, 14, 15, 16, 17, 18, 19, 20, 16, 21, 22, ..."
2,I didn't like this product it smudged all unde...,0,14,"[[13, 31, 32, 33, 2, 34, 35, 36, 9, 37, 38, 39..."
3,I simply love the product. I appreciate print ...,4,13,"[[13, 42, 43, 23, 34, 7, 13, 44, 45, 46, 47, 4..."
4,It goes on very easily and makes my eyes look ...,4,13,"[[35, 50, 51, 5, 52, 19, 53, 38, 39, 54, 55, 1..."


In [32]:
#check how balanced the dataset is
Counter(reviews['ratings'])

Counter({3: 6871, 4: 33193, 0: 4059, 1: 2265, 2: 3612})

In [34]:
X = list(reviews['encoded'])
y = list(reviews['ratings'])
from sklearn.model_selection import train_test_split
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2)
print(type(X_train))

<class 'list'>


#### Pytorch Dataset

In [35]:
class ReviewsDataset(Dataset):
    def __init__(self, X, Y):
        self.X = X
        self.y = Y
        
    def __len__(self):
        return len(self.y)
    
    def __getitem__(self, idx):
        return torch.from_numpy(self.X[idx][0]), self.y[idx], self.X[idx][1]

In [43]:
train_ds = ReviewsDataset(X_train, y_train)
valid_ds = ReviewsDataset(X_valid, y_valid)

In [74]:
def train_model(model, epochs=10, lr=0.001):
    parameters = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = torch.optim.Adam(parameters, lr=lr)
    for i in range(epochs):
        model.train()
        sum_loss = 0.0
        total = 0
        for x, y, l in train_dl:
            x = x.long().cuda()
            y = y.long().cuda()
            y_pred = model(x, l)
            optimizer.zero_grad()
            loss = F.cross_entropy(y_pred, y)
            loss.backward()
            optimizer.step()
            sum_loss += loss.item()*y.shape[0]
            total += y.shape[0]
        val_loss, val_acc, val_rmse = validation_metrics(model, val_dl)
        if i % 5 == 1:
            print("train loss %.3f, val loss %.3f, val accuracy %.3f, and val rmse %.3f" % (sum_loss/total, val_loss, val_acc, val_rmse))

def validation_metrics (model, valid_dl):
    model.eval()
    correct = 0
    total = 0
    sum_loss = 0.0
    sum_rmse = 0.0
    for x, y, l in valid_dl:
        x = x.long().cuda()
        y = y.long()
        y_hat = model(x, l)
        loss = F.cross_entropy(y_hat.detach().cpu(), y)
        pred = torch.max(y_hat, 1)[1]
        correct += (pred.cpu() == y).float().sum()
        total += y.shape[0]
        sum_loss += loss.item()*y.shape[0]
        sum_rmse += np.sqrt(mean_squared_error(pred.cpu(), y.unsqueeze(-1)))*y.shape[0]
    return sum_loss/total, correct/total, sum_rmse/total

In [75]:
batch_size = 5000
vocab_size = len(words)
train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
val_dl = DataLoader(valid_ds, batch_size=batch_size)

### LSTM with fixed length input

In [76]:
class LSTM_fixed_len(torch.nn.Module) :
    def __init__(self, vocab_size, embedding_dim, hidden_dim) :
        super().__init__()
        self.embeddings = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
        self.linear = nn.Linear(hidden_dim, 5)
        self.dropout = nn.Dropout(0.2)
        
    def forward(self, x, l):
        x = self.embeddings(x)
        x = self.dropout(x)
        lstm_out, (ht, ct) = self.lstm(x)
        return self.linear(ht[-1])

In [77]:
model_fixed =  LSTM_fixed_len(vocab_size, 50, 50)
model_fixed=model_fixed.cuda()

In [78]:
train_model(model_fixed, epochs=30, lr=0.01)

train loss 1.094, val loss 1.086, val accuracy 0.663, and val rmse 1.462
train loss 1.079, val loss 1.079, val accuracy 0.663, and val rmse 1.462
train loss 1.079, val loss 1.079, val accuracy 0.663, and val rmse 1.462
train loss 1.079, val loss 1.079, val accuracy 0.663, and val rmse 1.462
train loss 1.079, val loss 1.079, val accuracy 0.663, and val rmse 1.462
train loss 1.079, val loss 1.079, val accuracy 0.663, and val rmse 1.462


In [97]:
def test_model (model, test_dl):
    model.eval()
    correct = 0
    total = 0
    sum_loss = 0.0
    sum_rmse = 0.0
    for x, y, l in test_dl:
        x = x.long().cuda()
        y = y.long()
        y_hat = model(x, l)
        loss = F.cross_entropy(y_hat.detach().cpu(), y)
        pred = torch.max(y_hat, 1)[1]
        pred=pred.cpu()
        test_acc=accuracy_score(y,pred)
        test_recall_score=recall_score(y,pred,average='weighted')
        test_precision_score=precision_score(y,pred,average='weighted')
        test_f1_score=f1_score(y,pred,average='weighted')
        test_confusion_matrix=confusion_matrix(y,pred)
                
        print(" Test accuracy is "+str(test_acc))
        print(" test_recall_score is "+str(test_recall_score))
        print(" test_precision_score is "+str(test_precision_score))
        print(" test_f1_score is "+str(test_f1_score))
        print(test_confusion_matrix)
    return
            
#         correct += (pred.cpu() == y).float().sum()
#         total += y.shape[0]
#         sum_loss += loss.item()*y.shape[0]
#         sum_rmse += np.sqrt(mean_squared_error(pred.cpu(), y.unsqueeze(-1)))*y.shape[0]
#     return sum_loss/total, correct/total, sum_rmse/total


In [98]:
test_reviews = pd.read_csv("gold_test.csv")
test_reviews = test_reviews[['reviews', 'ratings']]
test_reviews.columns = ['reviews', 'ratings']
# print(len(test_reviews))

In [99]:
test_batch=len(test_reviews)
test_reviews['ratings'] = test_reviews['ratings'].apply(lambda x: zero_numbering[x])
test_reviews['encoded'] = test_reviews['reviews'].progress_apply(lambda x: np.array(encode_sentence(x,vocab2index )))
X_test = list(test_reviews['encoded'])
y_test = list(test_reviews['ratings'])
test_ds = ReviewsDataset(X_test, y_test)
test_dl = DataLoader(test_ds, batch_size=test_batch)
test_model(model_fixed, test_dl)

# test_loss, test_acc, test_rmse = test_model(model_fixed, test_dl)
# print(" test loss %.3f, test accuracy %.3f, and test rmse %.3f" % (test_loss, test_acc, test_rmse))

  This is separate from the ipykernel package so we can avoid doing imports until
100%|██████████| 10000/10000 [00:01<00:00, 5200.86it/s]


 Test accuracy is 0.5784
 test_recall_score is 0.5784
 test_precision_score is 0.33454656
 test_f1_score is 0.42390593005575267
[[   0    0    0    0 1271]
 [   0    0    0    0  630]
 [   0    0    0    0  911]
 [   0    0    0    0 1404]
 [   0    0    0    0 5784]]


  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
train_model(model_fixed, epochs=30, lr=0.01)

### LSTM with variable length input

In [100]:
class LSTM_variable_input(torch.nn.Module) :
    def __init__(self, vocab_size, embedding_dim, hidden_dim) :
        super().__init__()
        self.hidden_dim = hidden_dim
        self.dropout = nn.Dropout(0.3)
        self.embeddings = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
        self.linear = nn.Linear(hidden_dim, 5)
        
    def forward(self, x, s):
        x = self.embeddings(x)
        x = self.dropout(x)
        x_pack = pack_padded_sequence(x, s, batch_first=True, enforce_sorted=False)
        out_pack, (ht, ct) = self.lstm(x_pack)
        out = self.linear(ht[-1])
        return out

In [102]:
model = LSTM_variable_input(vocab_size, 50, 50)
model=model.cuda()

In [104]:
train_model(model, epochs=30, lr=0.1)

train loss 1.028, val loss 0.990, val accuracy 0.672, and val rmse 1.397
train loss 0.698, val loss 0.804, val accuracy 0.715, and val rmse 1.030
train loss 0.624, val loss 0.775, val accuracy 0.715, and val rmse 0.945
train loss 0.592, val loss 0.781, val accuracy 0.719, and val rmse 0.933
train loss 0.581, val loss 0.787, val accuracy 0.718, and val rmse 0.932
train loss 0.566, val loss 0.787, val accuracy 0.723, and val rmse 0.941


In [105]:
test_model(model, test_dl)

 Test accuracy is 0.6642
 test_recall_score is 0.6642
 test_precision_score is 0.6202604795344706
 test_f1_score is 0.6368367907781483
[[ 776  137  126   57  175]
 [ 228   98  111   67  126]
 [ 185   90  221  186  229]
 [  73   27  166  291  847]
 [ 106   36  104  282 5256]]


In [106]:
train_model(model, epochs=30, lr=0.05)

train loss 0.552, val loss 0.786, val accuracy 0.722, and val rmse 0.918
train loss 0.528, val loss 0.790, val accuracy 0.724, and val rmse 0.921
train loss 0.515, val loss 0.788, val accuracy 0.723, and val rmse 0.913
train loss 0.502, val loss 0.804, val accuracy 0.722, and val rmse 0.908
train loss 0.495, val loss 0.805, val accuracy 0.723, and val rmse 0.921
train loss 0.493, val loss 0.805, val accuracy 0.722, and val rmse 0.925


In [107]:
test_model(model, test_dl)

 Test accuracy is 0.6585
 test_recall_score is 0.6585
 test_precision_score is 0.6277692002131692
 test_f1_score is 0.640291551685078
[[ 759  153  161   47  151]
 [ 191  105  150   75  109]
 [ 139  111  268  188  205]
 [  56   44  203  303  798]
 [  75   47  150  362 5150]]


### LSTM with pretrained Glove word embeddings

Download weights from : https://nlp.stanford.edu/projects/glove/

In [108]:
def load_glove_vectors(glove_file="./glove.6B.50d.txt"):
    """Load the glove word vectors"""
    word_vectors = {}
    with open(glove_file) as f:
        for line in f:
            split = line.split()
            word_vectors[split[0]] = np.array([float(x) for x in split[1:]])
    return word_vectors

In [109]:
def get_emb_matrix(pretrained, word_counts, emb_size = 50):
    """ Creates embedding matrix from word vectors"""
    vocab_size = len(word_counts) + 2
    vocab_to_idx = {}
    vocab = ["", "UNK"]
    W = np.zeros((vocab_size, emb_size), dtype="float32")
    W[0] = np.zeros(emb_size, dtype='float32') # adding a vector for padding
    W[1] = np.random.uniform(-0.25, 0.25, emb_size) # adding a vector for unknown words 
    vocab_to_idx["UNK"] = 1
    i = 2
    for word in word_counts:
        if word in word_vecs:
            W[i] = word_vecs[word]
        else:
            W[i] = np.random.uniform(-0.25,0.25, emb_size)
        vocab_to_idx[word] = i
        vocab.append(word)
        i += 1   
    return W, np.array(vocab), vocab_to_idx

In [110]:
word_vecs = load_glove_vectors()
pretrained_weights, vocab, vocab2index = get_emb_matrix(word_vecs, counts)

In [111]:
class LSTM_glove_vecs(torch.nn.Module) :
    def __init__(self, vocab_size, embedding_dim, hidden_dim, glove_weights) :
        super().__init__()
        self.embeddings = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.embeddings.weight.data.copy_(torch.from_numpy(glove_weights))
        self.embeddings.weight.requires_grad = False ## freeze embeddings
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
        self.linear = nn.Linear(hidden_dim, 5)
        self.dropout = nn.Dropout(0.2)
        
    def forward(self, x, l):
        x = self.embeddings(x)
        x = self.dropout(x)
        lstm_out, (ht, ct) = self.lstm(x)
        return self.linear(ht[-1])

In [112]:
model_glove = LSTM_glove_vecs(vocab_size, 50, 50, pretrained_weights)
model_glove=model_glove.cuda()

In [113]:
train_model(model_glove, epochs=30, lr=0.1)

train loss 1.084, val loss 1.080, val accuracy 0.663, and val rmse 1.462
train loss 1.079, val loss 1.079, val accuracy 0.663, and val rmse 1.462
train loss 1.078, val loss 1.079, val accuracy 0.663, and val rmse 1.462
train loss 1.079, val loss 1.079, val accuracy 0.663, and val rmse 1.462
train loss 1.079, val loss 1.080, val accuracy 0.663, and val rmse 1.462
train loss 1.079, val loss 1.079, val accuracy 0.663, and val rmse 1.462


In [114]:
test_model(model_glove, test_dl)

 Test accuracy is 0.5784
 test_recall_score is 0.5784
 test_precision_score is 0.33454656
 test_f1_score is 0.42390593005575267
[[   0    0    0    0 1271]
 [   0    0    0    0  630]
 [   0    0    0    0  911]
 [   0    0    0    0 1404]
 [   0    0    0    0 5784]]


  _warn_prf(average, modifier, msg_start, len(result))


In [115]:
train_model(model_glove, epochs=30, lr=0.05)

train loss 1.079, val loss 1.080, val accuracy 0.663, and val rmse 1.462
train loss 1.079, val loss 1.079, val accuracy 0.663, and val rmse 1.462
train loss 1.079, val loss 1.080, val accuracy 0.663, and val rmse 1.462
train loss 1.079, val loss 1.079, val accuracy 0.663, and val rmse 1.462
train loss 1.079, val loss 1.080, val accuracy 0.663, and val rmse 1.462
train loss 1.079, val loss 1.079, val accuracy 0.663, and val rmse 1.462


In [116]:
test_model(model_glove, test_dl)

 Test accuracy is 0.5784
 test_recall_score is 0.5784
 test_precision_score is 0.33454656
 test_f1_score is 0.42390593005575267
[[   0    0    0    0 1271]
 [   0    0    0    0  630]
 [   0    0    0    0  911]
 [   0    0    0    0 1404]
 [   0    0    0    0 5784]]


  _warn_prf(average, modifier, msg_start, len(result))


## Predicting ratings using regression instead of classification

In [None]:
def train_model_regr(model, epochs=10, lr=0.001):
    parameters = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = torch.optim.Adam(parameters, lr=lr)
    for i in range(epochs):
        model.train()
        sum_loss = 0.0
        total = 0
        for x, y, l in train_dl:
            x = x.long()
            y = y.float()
            y_pred = model(x, l)
            optimizer.zero_grad()
            loss = F.mse_loss(y_pred, y.unsqueeze(-1))
            loss.backward()
            optimizer.step()
            sum_loss += loss.item()*y.shape[0]
            total += y.shape[0]
        val_loss = validation_metrics_regr(model, val_dl)
        if i % 5 == 1:
            print("train mse %.3f val rmse %.3f" % (sum_loss/total, val_loss))

def validation_metrics_regr (model, valid_dl):
    model.eval()
    correct = 0
    total = 0
    sum_loss = 0.0
    for x, y, l in valid_dl:
        x = x.long()
        y = y.float()
        y_hat = model(x, l)
        loss = np.sqrt(F.mse_loss(y_hat, y.unsqueeze(-1)).item())
        total += y.shape[0]
        sum_loss += loss.item()*y.shape[0]
    return sum_loss/total

In [None]:
class LSTM_regr(torch.nn.Module) :
    def __init__(self, vocab_size, embedding_dim, hidden_dim) :
        super().__init__()
        self.embeddings = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
        self.linear = nn.Linear(hidden_dim, 1)
        self.dropout = nn.Dropout(0.2)
        
    def forward(self, x, l):
        x = self.embeddings(x)
        x = self.dropout(x)
        lstm_out, (ht, ct) = self.lstm(x)
        return self.linear(ht[-1])

In [None]:
model =  LSTM_regr(vocab_size, 50, 50)

In [None]:
train_model_regr(model, epochs=30, lr=0.05)

In [None]:
train_model_regr(model, epochs=30, lr=0.05)