# 1. Initialisation

In [None]:
!pip install sentence_transformers > /dev/null
from sentence_transformers import SentenceTransformer

In [None]:
import nltk
import numpy as np
from string import punctuation
import re
from keras.preprocessing import text
import pandas as pd
nltk.download('stopwords')
from keras.preprocessing.sequence import skipgrams
import torch
import torch.nn as nn
import torch.nn.functional as F
import pickle
from torch.optim import Adam, RMSprop
nltk.download('punkt')
from sklearn.utils import shuffle
import warnings
warnings.filterwarnings('ignore')
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import mean_squared_error
from google.colab import auth
import gspread
from oauth2client.client import GoogleCredentials
import re
from sklearn.utils import shuffle
import time
from datetime import datetime
import os
from tqdm.auto import tqdm
import json
from google.colab import drive
from torch.utils.data import DataLoader,TensorDataset

device = torch.device('cuda')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


# 2. Getting data from question bank

In [None]:
auth.authenticate_user()
gc = gspread.authorize(GoogleCredentials.get_application_default())

wb = gc.open_by_url('https://docs.google.com/spreadsheets/d/1RHA3WUqdXuJruEIxS1spgjRhUnmTujVml83D1wMt_HI')
test_data = pd.DataFrame(wb.worksheet('Test_data').get_all_values())
question_bank = pd.DataFrame(wb.worksheet('Question_Bank').get_all_values())

test_data.columns = test_data.iloc[0,:]
question_bank.columns = question_bank.iloc[0,:]
question_bank = question_bank.iloc[1:,:]
test_data = test_data.iloc[1:, :]
question_bank_answers = [answer for answer in question_bank.Answer.values if len(answer)>0]
question_bank_answers.extend(answer for answer in question_bank.Answer_2.values if len(answer)>0)
question_bank_answers.extend(answer for answer in question_bank.Answer_3.values if len(answer)>0)

In [None]:
question_bank_answers = [x for x in question_bank_answers if x is not np.nan]

# 3. BERT

In [None]:
df = pd.merge(left = test_data,right=question_bank,left_on='Question',right_on='Question',how='left'
              ).drop(columns=['Category','Unique_Question_ID','Answer','Difficulty level',
                              'MaxMarks'])[['Unique_ID', 'Topic','Question', 'Actual_answer',
                                            'Answer_2','Answer_3','Answer_variation','Expected_score']]

In [None]:
df.isnull().sum()

0
Unique_ID            0
Topic                0
Question             0
Actual_answer        0
Answer_2            23
Answer_3            23
Answer_variation     0
Expected_score       0
dtype: int64

In [None]:
df.replace(np.nan, '', inplace=True)

In [None]:
df.isnull().sum()

0
Unique_ID           0
Topic               0
Question            0
Actual_answer       0
Answer_2            0
Answer_3            0
Answer_variation    0
Expected_score      0
dtype: int64

In [None]:
bert = SentenceTransformer('bert-large-nli-stsb-mean-tokens')

Downloading:   0%|          | 0.00/690 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/3.98k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/632 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/122 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/229 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/411 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/191 [00:00<?, ?B/s]

In [None]:
bert_answer1 = bert.encode([str(d).lower() for d in df.Actual_answer])
bert_answer2 = bert.encode([str(d).lower() for d in df.Answer_2])
bert_answer3 = bert.encode([str(d).lower() for d in df.Answer_3])
bert_variations = bert.encode([str(d).lower() for d in df.Answer_variation])

len(bert_answer1),len(bert_answer2),len(bert_answer3),len(bert_variations)

(209, 209, 209, 209)

In [None]:
test_data['BERT-1'] = [cosine_similarity(a.reshape(1, -1), b.reshape(1, -1)) for a,b in zip(bert_answer1, bert_variations)]
test_data['BERT-2'] = [cosine_similarity(a.reshape(1, -1), b.reshape(1, -1)) for a,b in zip(bert_answer2, bert_variations)]
test_data['BERT-3'] = [cosine_similarity(a.reshape(1, -1), b.reshape(1, -1)) for a,b in zip(bert_answer3, bert_variations)]
test_data['BERT-1'] = test_data['BERT-1'].apply(lambda x: x[0][0])
test_data['BERT-1'] = test_data['BERT-1'].apply(lambda x: 0 if x<0.1 else round(x,4))
test_data['BERT-2'] = test_data['BERT-2'].apply(lambda x: x[0][0])
test_data['BERT-2'] = test_data['BERT-2'].apply(lambda x: 0 if x<0.1 else round(x,4))
test_data['BERT-3'] = test_data['BERT-3'].apply(lambda x: x[0][0])
test_data['BERT-3'] = test_data['BERT-3'].apply(lambda x: 0 if x<0.1 else round(x,4))

"""

This part is new. If the average BERT score is greater than 0.5, I am taking the max of the BERT scores, else min of the BERT scores. We may use this logic or use the max as we are currently doing.
There is hardly any difference in the MSE on this test_set. But I think there will be appreciable results overall. We should discuss this.

"""
test_data['BERT-mean'] = test_data[['BERT-1','BERT-2','BERT-3']].mean(axis=1)
test_data['BERT-max'] = test_data[['BERT-1','BERT-2','BERT-3']].apply(lambda x: max(x), axis=1)
test_data['BERT-min'] = test_data[['BERT-1','BERT-2','BERT-3']].apply(lambda x: min(x), axis=1)
test_data['greater'] = test_data['BERT-mean'].apply(lambda x: x>0.5).astype('int')
test_data['BERT'] = test_data['greater']*test_data['BERT-max'] +(1- test_data['greater'])*test_data['BERT-min']

test_data['BERT'] = test_data['BERT-max']

In [None]:
test_data.drop(['LSA_TFIDF','LSA_Count','Doc2Vec','Minilml12v4','Minilml6v1','BERT-1','BERT-2','BERT-3','BERT-mean','BERT-min','BERT-max','greater'], axis=1, inplace=True)

In [None]:
test_data

Unnamed: 0,Unique_ID,Topic,Question,Actual_answer,Answer_variation,Expected_score,BERT
1,1,SVM,What is a hyperplane in SVM,It is the decision boundary in a multidimensio...,The decision boundary that helps to segregate ...,1,0.7607
2,2,SVM,What is a hyperplane in SVM,It is the decision boundary in a multidimensio...,It is the decision boundary in a multidimensio...,1,1.0000
3,3,SVM,What is a hyperplane in SVM,It is the decision boundary in a multidimensio...,decision boundary in higher dimensions to sepa...,0.9,0.8465
4,4,SVM,What is a hyperplane in SVM,It is the decision boundary in a multidimensio...,a plane that separates different classes,0.7,0.5157
5,5,SVM,What is a hyperplane in SVM,It is the decision boundary in a multidimensio...,plane supported by support vectors,0.6,0.9520
...,...,...,...,...,...,...,...
205,205,Decision Tree,Why is decision tree called as Greedy algorithm?,Because it only looks in the forward direction...,Optimal decision is made at each step and is n...,0.7,0.6610
206,206,Decision Tree,Why is decision tree called as Greedy algorithm?,Because it only looks in the forward direction...,No changes made in the tree structure later on,0.6,0.4993
207,207,Decision Tree,Why is decision tree called as Greedy algorithm?,Because it only looks in the forward direction...,A decision once made is final,0.4,0.6571
208,208,Decision Tree,Why is decision tree called as Greedy algorithm?,Because it only looks in the forward direction...,greedy is set as a hyperparameter for the algo...,0,0.1927


In [None]:
mean_squared_error(test_data['Expected_score'], test_data['BERT'])

0.10453045172248805

# 4. Embedding network

## 4a. Model class

In [None]:
class skipgram(nn.Module):
    
    def __init__(self, vocab_size, embedding_dim=64):
        super(skipgram, self).__init__()

        self.u_embeddings = nn.Embedding(vocab_size, embedding_dim,sparse=False)   


    def forward(self, u_pos, v_pos ):

        embed_u = self.u_embeddings(u_pos.long())
        embed_v = self.u_embeddings(v_pos.long())


        return embed_u, embed_v

## 4b. Training the embedding network

### 4b1. Data preparation

#### Skip_gram and word2id prep

In [None]:
wpt = nltk.WordPunctTokenizer()
stop_words = nltk.corpus.stopwords.words('english')

def normalize_document(doc):
    doc = re.sub(r'[^a-zA-Z\s]', '', doc, re.I|re.A)
    doc = doc.lower().strip()
    tokens = wpt.tokenize(doc)
    filtered_tokens = [token for token in tokens if token not in stop_words]
    doc = ' '.join(filtered_tokens)
    return doc

normalize_corpus = np.vectorize(normalize_document)

In [None]:
!gdown https://drive.google.com/uc?id=1Vx-So-KFWgnfK1RDAcJ5AGxRVvpbQvW6
!gdown https://drive.google.com/uc?id=12Uy_Y97UzHj2309Yalm36cBzyPuKaunl
!gdown https://drive.google.com/uc?id=1YOR8VTy1hieMumk2geyhDT-QQS6OSnfz


# handsonml = []
# with open('/content/OReilly-Hands-On-Machine-Learning.txt','r') as f:
#     for line in f:
#         handsonml.append(line)
stats = []
with open('/content/An_Introduction_to_Statistical_Learning.txt','r') as f:
    for line in f:
        stats.append(line)

datamining = []
with open('/content/Data Mining and Predictive Analytics.txt','r') as f:
    for line in f:
        datamining.append(line)

Downloading...
From: https://drive.google.com/uc?id=1Vx-So-KFWgnfK1RDAcJ5AGxRVvpbQvW6
To: /content/An_Introduction_to_Statistical_Learning.txt
100% 861k/861k [00:00<00:00, 54.9MB/s]
Downloading...
From: https://drive.google.com/uc?id=12Uy_Y97UzHj2309Yalm36cBzyPuKaunl
To: /content/Data Mining and Predictive Analytics.txt
100% 1.35M/1.35M [00:00<00:00, 38.9MB/s]
Downloading...
From: https://drive.google.com/uc?id=1YOR8VTy1hieMumk2geyhDT-QQS6OSnfz
To: /content/OReilly-Hands-On-Machine-Learning.txt
100% 975k/975k [00:00<00:00, 62.0MB/s]


In [None]:
data = []

# for item in handsonml:
#     data.append(item)

for item in datamining:
    data.append(item)

for item in stats:
    data.append(item)

for item in question_bank_answers:
    data.append(item)

In [None]:
data = shuffle(data,random_state=1234)

In [None]:
alice=data

alice = [[x] for x in alice]
remove_terms = punctuation + '0123456789'
alice = [[word.lower() for word in sent if word not in remove_terms] for sent in alice]
alice = [' '.join(tok_sent) for tok_sent in alice]
alice = list(map(normalize_corpus,alice))
alice = [str(sent) for sent in alice if len(str(sent).split()) > 2] # TODO: Make  this > 1 and train the model again #
tokenizer = text.Tokenizer()
tokenizer.fit_on_texts(alice)
word2id = tokenizer.word_index
word2id['PAD'] = 0
id2word = {v:k for k, v in word2id.items()}
wids = [[word2id[w] for w in text.text_to_word_sequence(doc)] for doc in alice]
vocab_size = len(word2id)

In [None]:
ws = 4
skip_grams = [skipgrams(wid, vocabulary_size=vocab_size, window_size=ws,seed=1234) for wid in wids]

In [None]:
len(skip_grams), vocab_size

(16495, 7709)

In [None]:
with open("word2id.json", "w") as outfile:
    json.dump(word2id, outfile)

In [None]:
# Opening JSON file
with open('word2id.json','r') as json_file:
    data = json.load(json_file)
  
    # Print the type of data variable
    print("Type:", type(data))

Type: <class 'dict'>


In [None]:
k = 32
list(data.keys())[k], data[list(data.keys())[k]],"|",list(word2id.keys())[k],word2id[list(word2id.keys())[k]]

('probability', 33, '|', 'probability', 33)

#### DATA PREP FOR BATCH TRAINING

In [None]:
pred, tar = list(zip(*skip_grams))[0],list(zip(*skip_grams))[1]
first_ele, second_ele = np.column_stack(np.row_stack(pred))
mylabel = np.hstack(tar)
len(first_ele), len(second_ele), len(mylabel)

(1118260, 1118260, 1118260)

In [None]:
mylabel[np.where(mylabel == 0)] = -1

In [None]:
mylabel

array([ 1,  1,  1, ...,  1,  1, -1])

In [None]:
print(skip_grams[0][0][0],skip_grams[0][1][0])
print(first_ele[0],second_ele[0],mylabel[0])

[567, 884] 1
567 884 1


In [None]:
first_ele = torch.from_numpy(first_ele)
second_ele = torch.from_numpy(second_ele)
mylabel = torch.from_numpy(mylabel)
first_ele

tensor([ 567,    1, 1022,  ...,   50,  114,  114])

In [None]:
len(first_ele),len(first_ele)-524288*2

(1118260, 69684)

In [None]:
x1_train,x1_test = first_ele[:int(len(first_ele)*0.8)],first_ele[int(len(first_ele)*0.8):]
x2_train,x2_test = second_ele[:int(len(second_ele)*0.8)],second_ele[int(len(second_ele)*0.8):]
y_train,y_test = mylabel[:int(len(first_ele)*0.8)],mylabel[int(len(first_ele)*0.8):]

In [None]:
len(x1_train),len(x2_train),len(y_train), len(x1_test),len(x2_test),len(y_test)

(894608, 894608, 894608, 223652, 223652, 223652)

In [None]:
train_data_loader = DataLoader(TensorDataset(first_ele,second_ele,mylabel),
                               batch_size=524288,shuffle=True,drop_last=True)

In [None]:
counter = 0
for f,s,lb in train_data_loader:
    print(len(f),len(s),len(lb))
    if counter == 0:
        break
    counter += 1

524288 524288 524288
524288 524288 524288


### 4b2. Training the embedding model class

In [None]:
drive.mount('/content/drive',force_remount=True, timeout_ms=54000000)

Mounted at /content/drive


In [None]:
if True:
    try:
        del model_sk
    except:
        print('No model to delete')
    else:
        print('model deleted')
    finally:
        model_sk = skipgram(vocab_size).to(device)
        print('model created')

loss_function = nn.CosineEmbeddingLoss()
optimizer = torch.optim.Adam(model_sk.parameters(), lr=1e-2)


No model to delete
model created


In [None]:
# Run thin when training of word embedding is required.
#   training Loss: 1.3842790126800537 	 test Loss:  0.6941006183624268, embed_dim = 64
last_train_loss =  1.384
if True:
    epochs=20
    for epoch in range(epochs):

        tloss = 0
        for f,s,lb in tqdm(train_data_loader):
            pair_first_elem = f
            pair_second_elem = s
            labels = lb
            u = pair_first_elem.type(torch.float).to(device)            
            v = pair_second_elem.type(torch.float).to(device)
            l = labels.type(torch.float).to(device)

            u_t,v_t = model_sk(u, v)
            # loss = loss_function(p, l)
            loss = loss_function(u_t,v_t,l)

            optimizer.zero_grad()
            loss.backward()
            tloss+=loss.item()
            optimizer.step()
        print('Epoch:', epoch, '\t training Loss:', tloss, end=' ')

        # Performance on test data
        ut = x1_test.type(torch.float).to(device)            
        vt = x2_test.type(torch.float).to(device)
        lt = y_test.type(torch.float).to(device)

        # p = model_sk(ut, vt)
        ut_t,vt_t = model_sk(ut, vt)
        # testloss = loss_function(p, lt)
        testloss = loss_function(ut_t,vt_t,lt)

        print('\t test Loss: ', testloss.item())

        # if tloss < 1:
        #     print('training loss: ',tloss)
        #     print('stopping training')
        #     break

  0%|          | 0/2 [00:00<?, ?it/s]

Epoch: 0 	 training Loss: 0.8905025720596313 	 test Loss:  0.44091907143592834


  0%|          | 0/2 [00:00<?, ?it/s]

Epoch: 1 	 training Loss: 0.8787016868591309 	 test Loss:  0.43491101264953613


  0%|          | 0/2 [00:00<?, ?it/s]

Epoch: 2 	 training Loss: 0.8669116795063019 	 test Loss:  0.4287092685699463


  0%|          | 0/2 [00:00<?, ?it/s]

Epoch: 3 	 training Loss: 0.853960394859314 	 test Loss:  0.42232510447502136


  0%|          | 0/2 [00:00<?, ?it/s]

Epoch: 4 	 training Loss: 0.8412169814109802 	 test Loss:  0.41577455401420593


  0%|          | 0/2 [00:00<?, ?it/s]

Epoch: 5 	 training Loss: 0.8279577791690826 	 test Loss:  0.40907299518585205


  0%|          | 0/2 [00:00<?, ?it/s]

Epoch: 6 	 training Loss: 0.814611405134201 	 test Loss:  0.40222790837287903


  0%|          | 0/2 [00:00<?, ?it/s]

Epoch: 7 	 training Loss: 0.8005391359329224 	 test Loss:  0.39524704217910767


  0%|          | 0/2 [00:00<?, ?it/s]

Epoch: 8 	 training Loss: 0.7864361703395844 	 test Loss:  0.38813793659210205


  0%|          | 0/2 [00:00<?, ?it/s]

Epoch: 9 	 training Loss: 0.772198349237442 	 test Loss:  0.3809088170528412


  0%|          | 0/2 [00:00<?, ?it/s]

Epoch: 10 	 training Loss: 0.7578393220901489 	 test Loss:  0.3735724687576294


  0%|          | 0/2 [00:00<?, ?it/s]

Epoch: 11 	 training Loss: 0.7432254254817963 	 test Loss:  0.36613771319389343


  0%|          | 0/2 [00:00<?, ?it/s]

Epoch: 12 	 training Loss: 0.7279910445213318 	 test Loss:  0.3586173355579376


  0%|          | 0/2 [00:00<?, ?it/s]

Epoch: 13 	 training Loss: 0.7130223214626312 	 test Loss:  0.35102084279060364


  0%|          | 0/2 [00:00<?, ?it/s]

Epoch: 14 	 training Loss: 0.6978701651096344 	 test Loss:  0.34335917234420776


  0%|          | 0/2 [00:00<?, ?it/s]

Epoch: 15 	 training Loss: 0.6823867559432983 	 test Loss:  0.3356455862522125


  0%|          | 0/2 [00:00<?, ?it/s]

Epoch: 16 	 training Loss: 0.6671961843967438 	 test Loss:  0.3278932571411133


  0%|          | 0/2 [00:00<?, ?it/s]

Epoch: 17 	 training Loss: 0.6517157852649689 	 test Loss:  0.3201161026954651


  0%|          | 0/2 [00:00<?, ?it/s]

Epoch: 18 	 training Loss: 0.6362051367759705 	 test Loss:  0.312327116727829


  0%|          | 0/2 [00:00<?, ?it/s]

Epoch: 19 	 training Loss: 0.620259702205658 	 test Loss:  0.30454424023628235


In [None]:
x = torch.from_numpy(np.array([word2id[x] for x in ['supervised','learning','technique','linear','regression']])).to(device)
model_sk.u_embeddings(x).detach().cpu().numpy()[1][0]

-0.114922374

In [None]:
"""
check this later.
"""

# from sklearn.metrics.pairwise import euclidean_distances

# weights = weights.view(-1,100)
# distance_matrix = euclidean_distances(weights.detach().numpy())

# similar_words = {search_term: [id2word[idx] for idx in distance_matrix[word2id[search_term]-1].argsort()[1:6]+1] 
#                    for search_term in ['little', 'small', 'would', 'child','girl']}

# similar_words

'\ncheck this later.\n'

## 4c. Saving the model

In [None]:
# Saving the model

# Vocab size = 7709
# embedding_dim = 64
# batch_size in DataLoader 524288
# random_shate for data list shuffle = 1234

if True:
    # for para in model.to(device).parameters():
    for para in model_sk.parameters():
        para.requires_grad=False
    torch.save(model_sk.state_dict(), 'embedding_network.pt')

# 5. Loading the trained embedding network

In [None]:
class skipgram(nn.Module):
    
    def __init__(self, vocab_size, embedding_dim=64):
        super(skipgram, self).__init__()

        self.u_embeddings = nn.Embedding(vocab_size, embedding_dim,sparse=False)   


    def forward(self, u_pos, v_pos ):

        embed_u = self.u_embeddings(u_pos.long())
        embed_v = self.u_embeddings(v_pos.long())


        return embed_u, embed_v

In [None]:
"""
Creating an instance of the skipgram class and loading the saved state_dict

"""
######### LOADING WORD2ID FROM JSON ################
# Opening JSON file
# Path need to be changed later
with open('word2id.json','r') as json_file:
    word2id = json.load(json_file)
  
    # Print the type of data variable
    print("Type:", type(word2id))

####################################################

embedding_dim = 64
vocab_size = 7709
embeddings = skipgram(vocab_size)
embeddings = embeddings.to(device)

if True:  # When you train word embedding above then make this true
    # embeddings.load_state_dict(torch.load('/content/drive/MyDrive/Colab Notebooks/CAPSTONETERM3/FinalModels/embedding_network.pt')) # path need to be changed
    embeddings.load_state_dict(torch.load('/content/embedding_network.pt')) # path need to be changed
    embeddings = embeddings.to(device)
    print('model loaded')
for para in embeddings.parameters():
    para.requires_grad = False

emb_layer = embeddings.u_embeddings
# emb_layer = embeddings.v_embeddings

Type: <class 'dict'>
model loaded


# 6. Neural network

## 6a. Model class

In [None]:
class tester(nn.Module):

    def __init__(self, hidden_size, num_layers,e_layer ,embedding_dim, bidir=False):

        super(tester, self).__init__()
        self.hidden_size = hidden_size
        self.embedding_dim = embedding_dim
        self.bidir=bidir
        self.num_layers = num_layers

        self.embed = emb_layer

        self.gru1 = nn.GRU(input_size=self.embedding_dim, 
                            hidden_size=self.hidden_size, 
                            num_layers=self.num_layers, 
                            batch_first=True,
                            bidirectional = self.bidir,
                            dropout=0.0)
        self.gru2 = nn.GRU(input_size=self.embedding_dim, 
                            hidden_size=self.hidden_size, 
                            num_layers=self.num_layers, 
                            batch_first=True,
                            bidirectional = self.bidir,
                            dropout=0.0)
        

    def forward(self, x, y,hx,hy):
        x = self.embed(x.long()) 
        y = self.embed(y.long())

        # print('after embed x size',x.size())
        # print('after embed y size',y.size())

        x, hx = self.gru1(x,hx)
        y, hy = self.gru2(y,hy)

        # print('after gru x size',x)
        # print('after gru y size',y)  
        # print('*'*80)
        x = x[:,-1]  #out[:,-1]
        y = y[:,-1]

        # print('after -1 x size',torch.sum(x))
        # print('after -1 y size',torch.sum(y))
        # print('*'*80)

        # x = self.relu(self.normx(x))
        # y = self.relu(self.normx(y))

        x = F.leaky_relu(x)
        y = F.leaky_relu(y)

        out = F.cosine_similarity(x,y)
        return out,hx,hy


    def init_hidden(self, batch_size):
        weight = next(self.parameters()).data
        hidden = weight.new(self.num_layers, batch_size, self.hidden_size).zero_().to(device)
        return hidden

## 6b. Training the neural network

### 6b1. Data preparation before sending the answers

In [None]:
def coder(sentence, max_len):
    BAD_SYMBOLS_RE = re.compile("""[/$%{}^'"#\\\\|@,;[\]\-`~_]""")
    sent=[]
    try:
        sentence = re.sub('=','equals', sentence)
        sentence = re.sub(pattern=BAD_SYMBOLS_RE,repl=' ', string = sentence)
        sentence = re.sub('[^\w+]',' ', sentence)

        words = sentence.lower().split()
        
        for word in words:
            try:
                sent.append(word2id[word])
            except:
                pass
        
    except:
        pass
    
        
    x = torch.from_numpy(np.array(sent))
    pad_size = list(x.shape)
    pad_size[0] = max_len - x.size(0)
    x = torch.cat([x, torch.zeros(*pad_size)], dim=0)
    x = torch.cat([x, torch.zeros(*pad_size)], dim=0)
    x = x[0:max_len]
    return x

In [None]:
max_len = 50 # max length of answers

actuals = test_data.Actual_answer.tolist()
variations = test_data.Answer_variation.tolist()
expectedScore = test_data.Expected_score.tolist()
dfd = pd.DataFrame({'actuals':actuals, 'variations':variations,'expectedScore':expectedScore})
dfs = shuffle(dfd)

actuals = dfs['actuals']
variations = dfs['variations']
scores = np.array(dfs['expectedScore'].tolist(),dtype=np.float64)

actuals_coded = np.array([coder(x, max_len).numpy() for x in actuals])
variations_coded = np.array([coder(x, max_len).numpy() for x in variations])

In [None]:
actuals_coded_train = actuals_coded[:146]
variations_coded_train = variations_coded[:146]
scores_train = scores[:146]

actuals_coded_test = actuals_coded[146:]
variations_coded_test = variations_coded[146:]
scores_test = scores[146:]

actuals_coded_train = torch.from_numpy(actuals_coded_train).to(device)
variations_coded_train = torch.from_numpy(variations_coded_train).to(device)
scores_train = torch.from_numpy(scores_train).to(device)

In [None]:
train_data_loader_2 = DataLoader(TensorDataset(actuals_coded_train,variations_coded_train,scores_train),batch_size=146,shuffle=True)

### 6b2. Training

In [None]:
# embedding_dim = 64 --> should be same as skipgram(class) u_embeddinglayer 
hs = 50
nl = 2
# emb_layer = embeddings.u_embeddings ---> formed while loading embedding layer

try:
    del model
except:
    print('No previous model')
else:
    print('model deleted')
finally:
    model = tester(hidden_size = hs,e_layer=emb_layer, embedding_dim = 64, num_layers=nl).to(device)
    print('model created')
opt = Adam(params = model.parameters(), lr = 1e-3)
criterion = nn.MSELoss()

model deleted
model created


In [None]:
model_state_dict_dict = {}

def train_gru(data_loader,model, epochs):

    model.train()
    


    global model_state_dict_dict
    
    for epoch in range(epochs):

        for act, var, scr in tqdm(train_data_loader_2):
            
            hx = model.init_hidden(batch_size=act.size()[0])
            hy = model.init_hidden(batch_size=act.size()[0])
            hx = hx.data
            hy = hy.data

            out,hx,hy = model(act, var,hx,hy)
            loss = criterion(out, scr.float())

            training_loss = loss.item()
            loss.backward()

            opt.step()
            opt.zero_grad()
            
            model_state_dict = model.state_dict()

            model_state_dict_dict[epoch] = model_state_dict


        if (epoch+1) % 1 == 0:
            print(f"Epoch {epoch+1}/{epochs} *** Training loss: {training_loss} ")

    return model



def test_gru(actuals_coded_test,variations_coded_test,scores_test,model):
  
    actuals_coded_test = torch.from_numpy(actuals_coded_test).to(device)
    variations_coded_test = torch.from_numpy(variations_coded_test).to(device)
    scores_test = torch.from_numpy(scores_test).to(device)

        
    hxt = model.init_hidden(batch_size=actuals_coded_test.size()[0]).data
    hyt = model.init_hidden(batch_size=actuals_coded_test.size()[0]).data

    tout,hxt,hyt = model(actuals_coded_test, variations_coded_test,hxt,hyt)
    tloss = criterion(tout, scores_test.float())
    testing_loss = tloss.item()


    print(f"Testing_loss: {testing_loss} ")

In [None]:
if True:
    tmodel = train_gru(data_loader=train_data_loader_2,model=model, epochs=100)

  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 1/100 *** Training loss: 0.27111580967903137 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 2/100 *** Training loss: 0.16423383355140686 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 3/100 *** Training loss: 0.15739986300468445 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 4/100 *** Training loss: 0.17138659954071045 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 5/100 *** Training loss: 0.16600261628627777 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 6/100 *** Training loss: 0.1552886962890625 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 7/100 *** Training loss: 0.15130729973316193 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 8/100 *** Training loss: 0.15562856197357178 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 9/100 *** Training loss: 0.15814530849456787 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 10/100 *** Training loss: 0.1558043658733368 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 11/100 *** Training loss: 0.15276892483234406 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 12/100 *** Training loss: 0.1513267457485199 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 13/100 *** Training loss: 0.15154820680618286 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 14/100 *** Training loss: 0.15247200429439545 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 15/100 *** Training loss: 0.153202086687088 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 16/100 *** Training loss: 0.15332819521427155 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 17/100 *** Training loss: 0.15287406742572784 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 18/100 *** Training loss: 0.15214021503925323 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 19/100 *** Training loss: 0.1515045166015625 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 20/100 *** Training loss: 0.15125241875648499 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 21/100 *** Training loss: 0.151441290974617 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 22/100 *** Training loss: 0.15185898542404175 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 23/100 *** Training loss: 0.15216687321662903 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 24/100 *** Training loss: 0.15215915441513062 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 25/100 *** Training loss: 0.1518794447183609 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 26/100 *** Training loss: 0.15152639150619507 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 27/100 *** Training loss: 0.15128958225250244 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 28/100 *** Training loss: 0.15124520659446716 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 29/100 *** Training loss: 0.15135017037391663 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 30/100 *** Training loss: 0.15150006115436554 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 31/100 *** Training loss: 0.15159693360328674 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 32/100 *** Training loss: 0.15159188210964203 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 33/100 *** Training loss: 0.15149372816085815 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 34/100 *** Training loss: 0.151352196931839 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 35/100 *** Training loss: 0.15122860670089722 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 36/100 *** Training loss: 0.15116554498672485 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 37/100 *** Training loss: 0.1511685699224472 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 38/100 *** Training loss: 0.15120726823806763 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 39/100 *** Training loss: 0.15123462677001953 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 40/100 *** Training loss: 0.1512152999639511 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 41/100 *** Training loss: 0.15114256739616394 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 42/100 *** Training loss: 0.15103714168071747 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 43/100 *** Training loss: 0.15092989802360535 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 44/100 *** Training loss: 0.15084151923656464 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 45/100 *** Training loss: 0.15077213943004608 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 46/100 *** Training loss: 0.15070326626300812 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 47/100 *** Training loss: 0.15061017870903015 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 48/100 *** Training loss: 0.1504746973514557 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 49/100 *** Training loss: 0.1502930223941803 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 50/100 *** Training loss: 0.15007510781288147 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 51/100 *** Training loss: 0.14983609318733215 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 52/100 *** Training loss: 0.14958485960960388 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 53/100 *** Training loss: 0.14931297302246094 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 54/100 *** Training loss: 0.14899949729442596 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 55/100 *** Training loss: 0.14862577617168427 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 56/100 *** Training loss: 0.1481815129518509 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 57/100 *** Training loss: 0.14766983687877655 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 58/100 *** Training loss: 0.14709773659706116 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 59/100 *** Training loss: 0.1464608907699585 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 60/100 *** Training loss: 0.14573518931865692 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 61/100 *** Training loss: 0.14489221572875977 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 62/100 *** Training loss: 0.1439167708158493 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 63/100 *** Training loss: 0.1428128182888031 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 64/100 *** Training loss: 0.1415894478559494 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 65/100 *** Training loss: 0.14020989835262299 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 66/100 *** Training loss: 0.13862700760364532 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 67/100 *** Training loss: 0.1368563324213028 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 68/100 *** Training loss: 0.1348724067211151 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 69/100 *** Training loss: 0.13265685737133026 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 70/100 *** Training loss: 0.13015110790729523 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 71/100 *** Training loss: 0.12736298143863678 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 72/100 *** Training loss: 0.12431361526250839 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 73/100 *** Training loss: 0.12101225554943085 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 74/100 *** Training loss: 0.11743415147066116 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 75/100 *** Training loss: 0.11361061781644821 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 76/100 *** Training loss: 0.10946731269359589 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 77/100 *** Training loss: 0.10505019873380661 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 78/100 *** Training loss: 0.1003452017903328 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 79/100 *** Training loss: 0.09545730799436569 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 80/100 *** Training loss: 0.09050607681274414 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 81/100 *** Training loss: 0.08549138903617859 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 82/100 *** Training loss: 0.08050568401813507 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 83/100 *** Training loss: 0.07561883330345154 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 84/100 *** Training loss: 0.07090096920728683 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 85/100 *** Training loss: 0.0663156658411026 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 86/100 *** Training loss: 0.06217827647924423 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 87/100 *** Training loss: 0.058322723954916 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 88/100 *** Training loss: 0.05476478487253189 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 89/100 *** Training loss: 0.05125048756599426 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 90/100 *** Training loss: 0.04794219881296158 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 91/100 *** Training loss: 0.044719550758600235 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 92/100 *** Training loss: 0.04161612689495087 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 93/100 *** Training loss: 0.03871346265077591 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 94/100 *** Training loss: 0.03599343076348305 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 95/100 *** Training loss: 0.033666908740997314 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 96/100 *** Training loss: 0.03232843056321144 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 97/100 *** Training loss: 0.030652619898319244 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 98/100 *** Training loss: 0.028048641979694366 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 99/100 *** Training loss: 0.02669711597263813 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 100/100 *** Training loss: 0.026168225333094597 


In [None]:
test_gru(actuals_coded_test=actuals_coded_test,variations_coded_test=variations_coded_test,scores_test=scores_test,model=tmodel)

Testing_loss: 0.12302373349666595 


In [None]:
thx = tmodel.init_hidden(batch_size=len(actuals_coded)).data
thy = tmodel.init_hidden(batch_size=len(actuals_coded)).data
output,thx,thy = tmodel(torch.from_numpy(actuals_coded).to(device), torch.from_numpy(variations_coded).to(device),thx,thy)
output = output.detach().cpu().numpy()

mean_squared_error(scores, output)

0.054374763586958

In [None]:
actuals_coded = torch.from_numpy(actuals_coded).to(device)
variations_coded = torch.from_numpy(variations_coded).to(device)
scores = torch.from_numpy(scores).to(device)

train_data_loader_full = DataLoader(TensorDataset(actuals_coded,variations_coded,scores),batch_size=209,shuffle=True)

In [None]:
model_state_dict_dict = {}

In [None]:
if True:
    final_model = train_gru(data_loader=train_data_loader_2,model=tmodel, epochs=50)

  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 1/50 *** Training loss: 0.024752266705036163 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 2/50 *** Training loss: 0.02355591021478176 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 3/50 *** Training loss: 0.02308559976518154 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 4/50 *** Training loss: 0.022238455712795258 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 5/50 *** Training loss: 0.021638600155711174 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 6/50 *** Training loss: 0.021361365914344788 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 7/50 *** Training loss: 0.020684078335762024 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 8/50 *** Training loss: 0.02028919942677021 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 9/50 *** Training loss: 0.020001230761408806 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 10/50 *** Training loss: 0.01951606757938862 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 11/50 *** Training loss: 0.019271768629550934 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 12/50 *** Training loss: 0.018858782947063446 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 13/50 *** Training loss: 0.018451448529958725 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 14/50 *** Training loss: 0.018046565353870392 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 15/50 *** Training loss: 0.01761329360306263 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 16/50 *** Training loss: 0.017367811873555183 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 17/50 *** Training loss: 0.017093412578105927 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 18/50 *** Training loss: 0.01685984805226326 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 19/50 *** Training loss: 0.01664872094988823 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 20/50 *** Training loss: 0.016387399286031723 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 21/50 *** Training loss: 0.01619880460202694 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 22/50 *** Training loss: 0.015943024307489395 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 23/50 *** Training loss: 0.0157389584928751 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 24/50 *** Training loss: 0.015528332442045212 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 25/50 *** Training loss: 0.015277236700057983 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 26/50 *** Training loss: 0.015071981586515903 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 27/50 *** Training loss: 0.014845470897853374 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 28/50 *** Training loss: 0.01460950169712305 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 29/50 *** Training loss: 0.014409852214157581 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 30/50 *** Training loss: 0.01418234035372734 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 31/50 *** Training loss: 0.013954472728073597 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 32/50 *** Training loss: 0.013744846917688847 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 33/50 *** Training loss: 0.013524839654564857 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 34/50 *** Training loss: 0.01330858189612627 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 35/50 *** Training loss: 0.013100884854793549 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 36/50 *** Training loss: 0.012884821742773056 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 37/50 *** Training loss: 0.0126643693074584 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 38/50 *** Training loss: 0.01246353704482317 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 39/50 *** Training loss: 0.012273519299924374 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 40/50 *** Training loss: 0.01207758765667677 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 41/50 *** Training loss: 0.01188626792281866 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 42/50 *** Training loss: 0.011699778027832508 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 43/50 *** Training loss: 0.011511703953146935 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 44/50 *** Training loss: 0.01132613979279995 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 45/50 *** Training loss: 0.011141680181026459 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 46/50 *** Training loss: 0.010957407765090466 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 47/50 *** Training loss: 0.010767250321805477 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 48/50 *** Training loss: 0.01056823879480362 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 49/50 *** Training loss: 0.010368905030190945 


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 50/50 *** Training loss: 0.010164747014641762 


In [None]:
# # Training on all data
# final_model = tester(hidden_size=50,num_layers=2,e_layer=emb_layer,embedding_dim=64)

# final_model.load_state_dict(torch.load('tester.pt'))
# final_model = final_model.to(device)
# for param in final_model.parameters():
#     param.requires_grad = True

In [None]:
# # Should we proceed with above trained model or instantiate new one
# if True:
#     train_gru(actuals_coded_train=actuals_coded,variations_coded_train=variations_coded,
#             scores_train=scores,actuals_coded_test=actuals_coded_test,
#             variations_coded_test=variations_coded_test,scores_test=scores_test,model=final_model, epochs=50)

In [None]:
# model.load_state_dict(model_state_dict_dict[103])
thx = final_model.init_hidden(batch_size=len(actuals_coded))
thy = final_model.init_hidden(batch_size=len(actuals_coded))
output,thx,thy = final_model(actuals_coded, variations_coded,thx,thy)
output = output.detach().cpu().numpy()
# loss = F.mse_loss(output, torch.from_numpy(scores).to(device).float())
scores = scores.detach().cpu().numpy()
mean_squared_error(scores, output)

0.043161549351883974

## 6c. SAVING TESTER MODEL

In [None]:
# Run this only when you train word embedding
# Saving the model

# hs = 50
# embedding_dim = 64
# num_layer = 2

if True:
    for para in final_model.parameters():
        para.requires_grad=False
    torch.save(final_model.state_dict(), 'final_tester.pt')

# 7. Loading tester model 

In [None]:
class tester(nn.Module):

    def __init__(self, hidden_size, num_layers,e_layer ,embedding_dim, bidir=False):

        super(tester, self).__init__()
        self.hidden_size = hidden_size
        self.embedding_dim = embedding_dim
        self.bidir=bidir
        self.num_layers = num_layers

        self.embed = emb_layer

        self.gru1 = nn.GRU(input_size=self.embedding_dim, 
                            hidden_size=self.hidden_size, 
                            num_layers=self.num_layers, 
                            batch_first=True,
                            bidirectional = self.bidir,
                            dropout=0.0)
        self.gru2 = nn.GRU(input_size=self.embedding_dim, 
                            hidden_size=self.hidden_size, 
                            num_layers=self.num_layers, 
                            batch_first=True,
                            bidirectional = self.bidir,
                            dropout=0.0)
        

    def forward(self, x, y,hx,hy):
        x = self.embed(x.long()) 
        y = self.embed(y.long())


        x, hx = self.gru1(x,hx)
        y, hy = self.gru2(y,hy)

        x = x[:,-1]  #out[:,-1]
        y = y[:,-1]


        x = F.leaky_relu(x)
        y = F.leaky_relu(y)

        out = F.cosine_similarity(x,y)
        return out,hx,hy


    def init_hidden(self, batch_size):
        weight = next(self.parameters()).data
        hidden = weight.new(self.num_layers, batch_size, self.hidden_size).zero_().to(device)
        return hidden

In [None]:
"""
Creating an instance of the skipgram class and loading the saved state_dict

"""
######### LOADING WORD2ID FROM JSON ################
# Opening JSON file
# Path need to be changed later
with open('word2id.json','r') as json_file:
    word2id = json.load(json_file)
  
    # Print the type of data variable
    print("Type:", type(word2id))

####################################################

embedding_dim = 64
vocab_size = 7709
embeddings = skipgram(vocab_size)
embeddings = embeddings.to(device)

if True:  # When you train word embedding above then make this true
    # embeddings.load_state_dict(torch.load('/content/drive/MyDrive/Colab Notebooks/CAPSTONETERM3/FinalModels/embedding_network.pt')) # path need to be changed
    embeddings.load_state_dict(torch.load('embedding_network.pt')) # path need to be changed
    embeddings = embeddings.to(device)
    print('model loaded')
for para in embeddings.parameters():
    para.requires_grad = False

emb_layer = embeddings.u_embeddings
# emb_layer = embeddings.v_embeddings

Type: <class 'dict'>
model loaded


In [None]:
## Also provide embedding layer i.e. e_layer=emb_layer
nnModel = tester(hidden_size=50,num_layers=2,e_layer=emb_layer,embedding_dim=64)

In [None]:
path = '/content/final_tester.pt'

if True:  # When you train word embedding above then make this true
    nnModel.load_state_dict(torch.load(path)) # path need to be changed
    nnModel = nnModel.to(device)
    print('model loaded')

for para in nnModel.parameters():
    para.requires_grad = False

model loaded


# PREDICTION USING NN MODEL

In [None]:
test_data.head(2)

Unnamed: 0,Unique_ID,Topic,Question,Actual_answer,Answer_variation,Expected_score,LSA_TFIDF,LSA_Count,Doc2Vec,BERT,Minilml12v4,Minilml6v1,NN_scores
1,1,SVM,What is a hyperplane in SVM,It is the decision boundary in a multidimensio...,The decision boundary that helps to segregate ...,1,,,,,,,0.985938
2,2,SVM,What is a hyperplane in SVM,It is the decision boundary in a multidimensio...,It is the decision boundary in a multidimensio...,1,,,,,,,0.989983


In [None]:
max_len = 50
a1 = test_data.Actual_answer.tolist()
a2 = test_data.Answer_variation.tolist()

actuals_coded = np.array([coder(x, max_len).numpy() for x in a1])
variations_coded = np.array([coder(x, max_len).numpy() for x in a2])

In [None]:
actuals_coded_t = torch.from_numpy(actuals_coded).to(device)
variations_coded_t = torch.from_numpy(variations_coded).to(device)

In [None]:
# model.load_state_dict(model_state_dict_dict[103])
fthx = nnModel.init_hidden(batch_size=len(actuals_coded_t))
fthy = nnModel.init_hidden(batch_size=len(actuals_coded_t))
nnPred,thx,thy = nnModel(actuals_coded_t, variations_coded_t,fthx,fthy)
nnPred = nnPred.detach().cpu().numpy()
# loss = F.mse_loss(output, torch.from_numpy(scores).to(device).float())

mean_squared_error(scores, nnPred)

0.24978994478084524

In [None]:
# nnPred = nnModel(actuals_coded_t,variations_coded_t)
# nnPred = nnPred.detach().cpu().numpy()

In [None]:
test_data['NN_scores'] = nnPred

In [None]:
test_data.head(60)

Unnamed: 0,Unique_ID,Topic,Question,Actual_answer,Answer_variation,Expected_score,LSA_TFIDF,LSA_Count,Doc2Vec,BERT,Minilml12v4,Minilml6v1,NN_scores
1,1,SVM,What is a hyperplane in SVM,It is the decision boundary in a multidimensio...,The decision boundary that helps to segregate ...,1.0,,,,,,,0.941322
2,2,SVM,What is a hyperplane in SVM,It is the decision boundary in a multidimensio...,It is the decision boundary in a multidimensio...,1.0,,,,,,,0.93238
3,3,SVM,What is a hyperplane in SVM,It is the decision boundary in a multidimensio...,decision boundary in higher dimensions to sepa...,0.9,,,,,,,0.941478
4,4,SVM,What is a hyperplane in SVM,It is the decision boundary in a multidimensio...,a plane that separates different classes,0.7,,,,,,,0.690361
5,5,SVM,What is a hyperplane in SVM,It is the decision boundary in a multidimensio...,plane supported by support vectors,0.6,,,,,,,0.847451
6,6,SVM,What is a hyperplane in SVM,It is the decision boundary in a multidimensio...,plane for classification,0.3,,,,,,,0.291937
7,7,SVM,What is a hyperplane in SVM,It is the decision boundary in a multidimensio...,plane in multidimension space,0.2,,,,,,,0.598775
8,8,SVM,What is a hyperplane in SVM,It is the decision boundary in a multidimensio...,support vector plane,0.0,,,,,,,0.054217
9,9,SVM,What is C in SVM?,A model hyperparameter which is the regularisa...,It is a hyperparameter to control the strength...,1.0,,,,,,,0.924951
10,10,SVM,What is C in SVM?,A model hyperparameter which is the regularisa...,A model hyperparameter which is the regularisa...,1.0,,,,,,,0.919996


In [None]:
test_data.tail(60)

Unnamed: 0,Unique_ID,Topic,Question,Actual_answer,Answer_variation,Expected_score,LSA_TFIDF,LSA_Count,Doc2Vec,BERT,Minilml12v4,Minilml6v1,NN_scores
150,150,KNN,What is kd-tree?,k dimensional trees are constructed in the fea...,Hyperparameter to make calculations faster,0.2,,,,,,,0.447222
151,151,KNN,What is kd-tree?,k dimensional trees are constructed in the fea...,a technique to make decision trees using knn,0.0,,,,,,,0.042982
152,152,KNN,What is kd-tree?,k dimensional trees are constructed in the fea...,a hyperparameter to construct trees,0.0,,,,,,,0.634462
153,153,KNN,When does KNN fail?,When there are extremely large number of features,if we fix the size of observations and increa...,1.0,,,,,,,0.935257
154,154,KNN,When does KNN fail?,When there are extremely large number of features,When there are extremely large number of features,1.0,,,,,,,0.958216
155,155,KNN,When does KNN fail?,When there are extremely large number of features,When the number of features is very large,0.9,,,,,,,0.874233
156,156,KNN,When does KNN fail?,When there are extremely large number of features,Very large number of features,0.9,,,,,,,0.904626
157,157,KNN,When does KNN fail?,When there are extremely large number of features,When there are a lot of features,0.8,,,,,,,0.908716
158,158,KNN,When does KNN fail?,When there are extremely large number of features,When the number of variables is very large,0.3,,,,,,,0.162198
159,159,KNN,When does KNN fail?,When there are extremely large number of features,When the data is very large,0.0,,,,,,,0.176405


In [None]:
mean_squared_error(test_data['Expected_score'].astype(np.float64),test_data['NN_scores'])

0.04316154935188397

In [None]:
from sklearn.model_selection import train_test_split
X = test_data[['BERT','NN_scores']]
y = test_data['Expected_score'].astype('float')
X_train, X_test, y_train, y_test = train_test_split(X, y ,test_size = 0.2, random_state=123)

In [None]:
from sklearn.ensemble import  RandomForestRegressor
from sklearn.ensemble import  GradientBoostingRegressor
from sklearn.ensemble import  AdaBoostRegressor
from xgboost import XGBRegressor
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
import statsmodels.api as sm

r = RandomForestRegressor(random_state=12)
r.fit(X_train, y_train)
yp = r.predict(X_test)
print('Random Forest',mean_squared_error(y_test, yp))

g = GradientBoostingRegressor(random_state=12)
g.fit(X_train, y_train)
yp = g.predict(X_test)
print('GB Regressor',mean_squared_error(y_test, yp))

a = AdaBoostRegressor(random_state=12)
a.fit(X_train, y_train)
yp = a.predict(X_test)
print('AdaBoost',mean_squared_error(y_test, yp))

x = XGBRegressor(random_state=12)
x.fit(X_train, y_train)
yp = x.predict(X_test)
print('XGB Regressor',mean_squared_error(y_test, yp))
s = SVR(kernel='linear')

s.fit(X_train, y_train)
yp = s.predict(X_test)
print('SVR',mean_squared_error(y_test, yp))

d = DecisionTreeRegressor(criterion='mse',random_state=1234)
d.fit(X_train, y_train)
yp = d.predict(X_test)
print('Decision tree',mean_squared_error(y_test, yp))

X_train = sm.add_constant(X_train)
X_test = sm.add_constant(X_test)

l = sm.OLS(y_train, X_train).fit()
yy = l.predict(X_test)
print('Linear regression',mean_squared_error(y_test, yy))

Random Forest 0.10150892857142861
GB Regressor 0.0989456198872764
AdaBoost 0.10638571701411015
XGB Regressor 0.10293400007751802
SVR 0.09311575284906884
Decision tree 0.1854761904761905
Linear regression 0.09167963066643091


In [None]:
ml_model= l

In [None]:
X_ = sm.add_constant(X)

In [None]:
test_data['ml_model'] = ml_model.predict(X_)

In [None]:
test_data[['BERT','NN_scores','ml_model','Expected_score']].tail(50)

Unnamed: 0,BERT,NN_scores,ml_model,Expected_score
160,0.7457,0.261627,0.48119,0.0
161,0.2757,0.760682,0.289768,1.0
162,1.0,0.560704,0.866733,1.0
163,0.931,0.697562,0.870139,0.9
164,0.8551,0.575868,0.738999,0.7
165,0.7347,0.28425,0.482132,0.5
166,0.786,0.276097,0.525975,0.3
167,0.3676,0.570942,0.281534,0.0
168,0.3587,0.577053,0.276255,1.0
169,1.0,0.886171,1.027993,1.0


In [None]:
# (intercept, BERT_score, NN_score)
ml_model.predict([[1,0.21,0.22]])