### 1. Dataset Generation

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import gensim.downloader as api
wv = api.load('word2vec-google-news-300')
from nltk import word_tokenize


In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
df = pd.read_csv("data.tsv", sep='\t', on_bad_lines="skip")
df = df[['review_body', 'star_rating']]

def categorize(x):
    if x > 3:
        return 3
    elif x < 3:
        return 1
    else:
        return 2

# Form classes
df = df[df['star_rating'].apply(lambda x: isinstance(x, int))]
df = df[df['review_body'].str.split().str.len() > 2] # check for appropriate length, bumped to 2
# df['review_split'] = df['review_body'].str.split()
df['rating_class'] = df['star_rating'].apply(lambda x: categorize(x))
df.drop(columns=['star_rating'], inplace=True)

In [4]:
class_one = df.query('rating_class == 1').sample(n=20_000).reset_index(drop=True)
class_two = df.query('rating_class == 2').sample(n=20_000).reset_index(drop=True)
class_three = df.query('rating_class == 3').sample(n=20_000).reset_index(drop=True)
data_set = pd.concat([class_one, class_two, class_three]).reset_index(drop=True)

data_set.to_csv("data_selection.csv", index=False)
data_set['review_split'] = data_set['review_body'].apply(lambda x: word_tokenize(x))
del df

In [5]:
# data_set = pd.read_csv("data_selection.csv")
# data_set['review_split'] = data_set['review_body'].apply(lambda x: word_tokenize(x))
data_set['length'] = data_set['review_split'].apply(lambda x: len(x))

In [6]:
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.tag import pos_tag

def get_pos(x):
    return x if x in ['n','v','a','r','s'] else 'n'

lemmatizer = WordNetLemmatizer()
data_set['review_body'] = data_set['review_body'].apply(lambda body: " ".join([lemmatizer.lemmatize(w, get_pos(p[0].lower()))for w, p in pos_tag(nltk.word_tokenize(body), tagset='universal')]))

### 2. Word Embedding

> (a) Load pretrained model

In [7]:
import gensim.downloader as api
wv = api.load('word2vec-google-news-300')

In [8]:
def cosine_similarity(wv1, wv2):
    return np.dot(wv1, wv2) / (np.linalg.norm(wv1) * np.linalg.norm(wv2))

""" Come up with three examples
    How to make sure words are in both vocabularies?
"""
ex1 = cosine_similarity(wv['king'] - wv['man'] + wv['woman'], wv['queen'])
print(f"King - Man + Woman = Queen : {ex1}")
ex2 = cosine_similarity(wv['excellent'], wv['outstanding'])
print(f"Excellent ~ Outstanding : {ex2}")

print("Example 1:")
print(f"Foot - Leg = Hand - Arm {cosine_similarity(wv['foot'] - wv['leg'], wv['hand'] - wv['arm'])}")
print("Example 2:")
print(f"Hill + Big = Mountain: {cosine_similarity(wv['hill'] + wv['big'], wv['mountain'])}")
print("Example 3:")
print(f"Orange - Red + Blue = Green: {cosine_similarity(wv['orange'] - wv['red'] + wv['blue'], wv['green'])}")


King - Man + Woman = Queen : 0.7300517559051514
Excellent ~ Outstanding : 0.556748628616333
Example 1:
Foot - Leg = Hand - Arm 0.17670604586601257
Example 2:
Hill + Big = Mountain: 0.5208643674850464
Example 3:
Orange - Red + Blue = Green: 0.33706381916999817


> (b) Train word2vec model

In [9]:
import gensim.models

cm = gensim.models.Word2Vec(sentences=data_set['review_split'], vector_size=300, window=13)

In [60]:
""" TODO Compare to the previous three examples. 
    How to make sure words exist in both vocabularies?
"""
print(f"King - Man + Woman = Queen : {cosine_similarity(cm.wv['king'] - cm.wv['man'] + cm.wv['woman'], cm.wv['queen'])}")
print(f"Excellent ~ Outstanding: {cosine_similarity(cm.wv['excellent'], cm.wv['outstanding'])}")

print("Example 1:")
print(f"Foot - Leg = Hand - Arm {cosine_similarity(cm.wv['foot'] - cm.wv['leg'], cm.wv['hand'] - cm.wv['arm'])}")
print("Example 2:")
print(f"Hill + Big = Mountain: {cosine_similarity(cm.wv['hill'] + cm.wv['big'], cm.wv['mountain'])}")
print("Example 3:")
print(f"Orange - Red + Blue = Green: {cosine_similarity(cm.wv['orange'] - cm.wv['red'] + cm.wv['blue'], cm.wv['green'])}")


# print("Example 1:")
# print(cosine_similarity(cm.wv['foot'] - cm.wv['leg'], cm.wv['hand'] - cm.wv['arm']))
# print("Example 2:")
# print(cosine_similarity(cm.wv['hill'] + cm.wv['big'], cm.wv['mountain']))
# print("Example 3:")
# print(cosine_similarity(cm.wv['orange'] - cm.wv['red'] + cm.wv['blue'], cm.wv['green']))


King - Man + Woman = Queen : 0.08359397202730179
Excellent ~ Outstanding: 0.6846609115600586
Example 1:
Foot - Leg = Hand - Arm 0.41826921701431274
Example 2:
Hill + Big = Mountain: 0.018984710797667503
Example 3:
Orange - Red + Blue = Green: 0.6284237504005432


### 3. Simple Models

In [11]:
def avg_w2v(wv, words):
    avg = np.zeros(300)
    count = 0
    for word in words:
        try:
            avg += wv[word]
            count += 1
        except:
            pass
    if not count: return avg
    return avg / count

In [12]:
def first_features(wv, words, num):
    features = np.zeros((num, 300))
    for i in range(num):
        try:
            features[i] = wv[words[i]]
        except:
            pass
    return features.flatten()

In [13]:
avg_vectors = data_set['review_split'].apply(lambda x: avg_w2v(wv, x)).to_list()
avg_values = pd.DataFrame(np.array(avg_vectors))
w2v_model_avg = pd.concat([data_set, avg_values], axis=1)
w2v_model_avg.drop(columns=['review_split', 'review_body', 'length'], inplace=True)

In [14]:
train1, test1 = train_test_split(w2v_model_avg.query("rating_class == 1"), test_size=0.2)
train2, test2 = train_test_split(w2v_model_avg.query("rating_class == 2"), test_size=0.2)
train3, test3 = train_test_split(w2v_model_avg.query("rating_class == 3"), test_size=0.2)

avg_train = pd.concat([train1, train2, train3]).reset_index(drop=True)
avg_test = pd.concat([test1, test2, test3]).reset_index(drop=True)

In [15]:
from sklearn.linear_model import Perceptron
from sklearn import metrics

w2v_perceptron = Perceptron()

w2v_perceptron.fit(avg_train.iloc[0:avg_train.shape[0],list(range(1,avg_train.shape[1]))],
                    avg_train['rating_class'])
prediction = w2v_perceptron.predict(avg_test.iloc[0:avg_test.shape[0], list(range(1,avg_test.shape[1]))])
w2v_results = pd.DataFrame(zip(avg_test.iloc[0:avg_test.shape[0], 0],prediction), columns=['Label', 'Prediction'])

w2v_perceptron_report = metrics.classification_report(w2v_results['Label'], w2v_results['Prediction'], output_dict=True)
print(metrics.classification_report(w2v_results['Label'], w2v_results['Prediction']))

              precision    recall  f1-score   support

           1       0.84      0.26      0.40      4000
           2       0.46      0.81      0.59      4000
           3       0.72      0.66      0.69      4000

    accuracy                           0.58     12000
   macro avg       0.67      0.58      0.56     12000
weighted avg       0.67      0.58      0.56     12000



In [16]:
""" w2v SVM """
from sklearn import svm

w2v_machine = svm.LinearSVC()

w2v_machine.fit(avg_train.iloc[0:avg_train.shape[0],list(range(1,avg_train.shape[1]))],
                    avg_train['rating_class'])
prediction = w2v_machine.predict(avg_test.iloc[0:avg_test.shape[0], list(range(1,avg_test.shape[1]))])
w2v_results = pd.DataFrame(zip(avg_test.iloc[0:avg_test.shape[0], 0],prediction), columns=['Label', 'Prediction'])

w2v_svm_report = metrics.classification_report(w2v_results['Label'], w2v_results['Prediction'], output_dict=True)
print(metrics.classification_report(w2v_results['Label'], w2v_results['Prediction']))

              precision    recall  f1-score   support

           1       0.65      0.70      0.67      4000
           2       0.58      0.53      0.55      4000
           3       0.71      0.73      0.72      4000

    accuracy                           0.65     12000
   macro avg       0.65      0.65      0.65     12000
weighted avg       0.65      0.65      0.65     12000



### 4. Feedforward Neural Networks

In [17]:
import torch
from torch.utils import data
from sklearn import metrics

In [18]:
# train1, test1 = train_test_split(w2v_model_avg.query("rating_class == 1"), test_size=0.2)
# train2, test2 = train_test_split(w2v_model_avg.query("rating_class == 2"), test_size=0.2)
# train3, test3 = train_test_split(w2v_model_avg.query("rating_class == 3"), test_size=0.2)

# avg_train = pd.concat([train1, train2, train3]).reset_index(drop=True)
# avg_test = pd.concat([test1, test2, test3]).reset_index(drop=True)

In [19]:
feature_vectors = data_set['review_split'].apply(lambda x: first_features(wv, x, 10)).to_list()
feature_values = pd.DataFrame(np.array(feature_vectors))
w2v_model_first10 = pd.concat([data_set, feature_values], axis=1)
w2v_model_first10.drop(columns=['review_split', 'review_body','length'], inplace=True)

In [20]:
train1, test1 = train_test_split(w2v_model_first10.query("rating_class == 1"), test_size=0.2)
train2, test2 = train_test_split(w2v_model_first10.query("rating_class == 2"), test_size=0.2)
train3, test3 = train_test_split(w2v_model_first10.query("rating_class == 3"), test_size=0.2)

first10_train = pd.concat([train1, train2, train3]).reset_index(drop=True)
first10_test = pd.concat([test1, test2, test3]).reset_index(drop=True)

In [21]:
feature_vectors = data_set['review_split'].apply(lambda x: first_features(wv, x, 20)).to_list()
feature_values = pd.DataFrame(np.array(feature_vectors))
rnn_dataset = pd.concat([data_set, feature_values], axis=1)
rnn_dataset.drop(columns=['review_split', 'review_body','length'], inplace=True)

In [22]:
train1, test1 = train_test_split(rnn_dataset.query("rating_class == 1"), test_size=0.2)
train2, test2 = train_test_split(rnn_dataset.query("rating_class == 2"), test_size=0.2)
train3, test3 = train_test_split(rnn_dataset.query("rating_class == 3"), test_size=0.2)

rnn_train = pd.concat([train1, train2, train3]).reset_index(drop=True)
rnn_test = pd.concat([test1, test2, test3]).reset_index(drop=True)

In [23]:
# rnn_train.query("@rnn_train[3001] != 0.0").shape

In [24]:
del train1, train2, train3, test1, test2, test3, data_set

In [25]:
def train(model, learning_rate, epochs, device, train_loader, test_loader, epsilon=0.0001):
    model.cuda()
    loss_fn = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    best_accuracy = 0
    for j in range(epochs):
        model.train()
        for i, (x, y) in enumerate(train_loader):
            x = x.to(device)
            y = y.to(device)

            output = model(x)
            loss = loss_fn(output, y)
            optimizer.zero_grad()
            loss.backward()

            optimizer.step()
        new_accuracy = predict(model, device, test_loader)['accuracy']
        if (new_accuracy - epsilon < best_accuracy):
            break
        best_accuracy = max(best_accuracy, new_accuracy)
        if j % 10 == 0:
            print(f"Epoch: {j}/{epochs}, Accuracy: {best_accuracy}")
    return model
            

In [26]:
def predict(model, device, test_loader, verbose=False):
    pred = []
    actual = []
    model.cuda()
    model.eval()
    with torch.no_grad():
        for x, y in test_loader:
            x = x.to(device)
            y = y.to(device)

            output = model(x)

            pred = np.concatenate((pred, np.argmax(output.cpu().numpy(),axis=1)+1))
            actual = np.concatenate((actual, np.argmax(y.cpu().numpy(),axis=1)+1))
    
    pred = [int(p) for p in pred]
    actual = [int(a) for a in actual]
    if verbose:
        print(metrics.classification_report(actual, pred))
    return metrics.classification_report(actual, pred, output_dict=True)


> (a) Average word2vec vetors

In [27]:
class MultiLayerPerceptron(torch.nn.Module):
    def __init__(self, D_in, h1, h2, D_out, dp1=0.5):
        super(MultiLayerPerceptron, self).__init__()
        self.l1 = torch.nn.Linear(D_in, h1, dtype=torch.float32)
        self.d1 = torch.nn.Dropout(p=dp1)
        self.a1 = torch.nn.ReLU()
        self.l2 = torch.nn.Linear(h1, h2, dtype=torch.float32)
        self.a2 = torch.nn.ReLU()
        self.l3 = torch.nn.Linear(h2, D_out, dtype=torch.float32)
        self.weight_init()

    def weight_init(self):
        torch.nn.init.xavier_uniform_(self.l1.weight)
        torch.nn.init.xavier_uniform_(self.l2.weight)
        torch.nn.init.xavier_uniform_(self.l3.weight)
        # torch.nn.init.zeros_(self.l1.bias)
        # torch.nn.init.zeros_(self.l2.bias)
        # torch.nn.init.zeros_(self.l3.bias)

    def forward(self, x):
        x = self.l1(x)
        x = self.d1(x)
        x = self.a1(x)
        x = self.l2(x)
        x = self.a2(x)
        x = self.l3(x)
        return x

In [28]:
class W2V_Dataset(data.Dataset):
    def __init__(self, data):
        key = {1.0 : [1.0,0.0,0.0], 2.0: [0.0,1.0,0.0], 3.0: [0.0,0.0,1.0]}
        self.x = torch.from_numpy(data[:, 1:])
        self.y = torch.from_numpy(np.array([key[i] for i in data[:,0]]))
        
    def __len__(self):
        return len(self.y)
    
    def __getitem__(self, index):
        return self.x[index], self.y[index]

In [29]:
x_train = W2V_Dataset(avg_train.to_numpy(dtype=np.float32))
x_test = W2V_Dataset(avg_test.to_numpy(dtype=np.float32))

train_loader = data.DataLoader(dataset=x_train, batch_size=32, shuffle=True)
test_loader = data.DataLoader(dataset=x_test, batch_size=4000, shuffle=False)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# model = torch.load('mlp_model_avg.pth')
model = MultiLayerPerceptron(300,100,10,3)
model = train(model, learning_rate=0.001, epochs=500, device=device, train_loader=train_loader, test_loader=test_loader, epsilon=0.00001)
ff_report = predict(model, device, test_loader, verbose=True)
# 14 minutes for 500 epochs
# torch.save(model, 'long_mlp_model_avg.pth')


Epoch: 0/500, Accuracy: 0.6290833333333333
              precision    recall  f1-score   support

           1       0.65      0.68      0.67      4000
           2       0.60      0.43      0.50      4000
           3       0.64      0.80      0.71      4000

    accuracy                           0.64     12000
   macro avg       0.63      0.64      0.63     12000
weighted avg       0.63      0.64      0.63     12000



In [30]:
# """ Load mlp_model_avg """
# x_train = W2V_Dataset(avg_train.to_numpy(dtype=np.float32))
# x_test = W2V_Dataset(avg_test.to_numpy(dtype=np.float32))

# train_loader = data.DataLoader(dataset=x_train, batch_size=50, shuffle=True)
# test_loader = data.DataLoader(dataset=x_test, batch_size=4000, shuffle=False)

# model = torch.load('mlp_model_avg.pth')
# report = predict(model, device, test_loader)

> (b) First 10 word2vec vectors

In [31]:
x_train = W2V_Dataset(first10_train.to_numpy(dtype=np.float32))
x_test = W2V_Dataset(first10_test.to_numpy(dtype=np.float32))

train_loader = data.DataLoader(dataset=x_train, batch_size=32, shuffle=True)
test_loader = data.DataLoader(dataset=x_test, batch_size=4000, shuffle=False)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# model = torch.load('mlp_first10.pth')
model = MultiLayerPerceptron(3000,100,10,3)
model = train(model, learning_rate=0.001, epochs=500, device=device, train_loader=train_loader, test_loader=test_loader, epsilon=0.0001)
f10_report = predict(model, device, test_loader, verbose=True)

Epoch: 0/500, Accuracy: 0.5513333333333333
              precision    recall  f1-score   support

           1       0.58      0.52      0.55      4000
           2       0.50      0.49      0.49      4000
           3       0.61      0.69      0.65      4000

    accuracy                           0.56     12000
   macro avg       0.56      0.56      0.56     12000
weighted avg       0.56      0.56      0.56     12000



In [32]:
# torch.save(model, 'long_mlp_first10.pth')
# model = torch.load('mlp_first10.pth')

In [33]:
# """ Load mlp_first10 """
# x_train = W2V_Dataset(first10_train.to_numpy(dtype=np.float32))
# x_test = W2V_Dataset(first10_test.to_numpy(dtype=np.float32))

# train_loader = data.DataLoader(dataset=x_train, batch_size=1000, shuffle=True)
# test_loader = data.DataLoader(dataset=x_test, batch_size=4000, shuffle=False)

# model = torch.load('mlp_first10.pth')
# report = predict(model, device, test_loader)


### 5. Recurrent Neural Networks

In [45]:
# https://pytorch.org/docs/stable/generated/torch.nn.RNN.html
# num_layers?
class RNN(torch.nn.Module):
    def __init__(self, D_in, hidden_size, output_size, dp=0.65):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.rnn = torch.nn.RNN(D_in, hidden_size, batch_first=True)
        self.d = torch.nn.Dropout(p=dp)
        self.l2 = torch.nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # h0 = self.init_hidden(x)
        # output, new_hidden = self.rnn(x, h0)
        output, new_hidden = self.rnn(x)
        output = self.d(output) # dropout layer
        # output = output[:, -1, :] # don't need first : if not batching
        output = output[:,-1, :] 
        output = self.l2(output)
        return output

    def init_hidden(self,x):
        return torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)

In [46]:
def train_rnn(model, learning_rate, epochs, device, train_loader, test_loader, batch_size, name, epsilon=0.00001):
    model.cuda()
    running_loss = 0
    loss_fn = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    stagnate = 0
    best_running_loss = 10_000
    for j in range(epochs):
        model.train()
        for i, (x, y) in enumerate(train_loader):
            x = x.reshape(batch_size,20,300).to(device)
            y = y.to(device)

            output = model(x)
            loss = loss_fn(output, y)
            
            running_loss += loss.item() / x.size(0)
            
            optimizer.zero_grad()
            loss.backward()

            optimizer.step()
        accuracy = predict_rnn(model, device, test_loader, 4000)['accuracy']
        if (running_loss < best_running_loss):
            best_running_loss = running_loss
            stagnate = 0
            torch.save(model, name)
        else:
            stagnate += 1

        if (stagnate > 6):
            break

        if j % 10 == 0:
            print(f"Epoch: {j}/{epochs}, Accuracy: {accuracy} Running Loss: {running_loss}")
        running_loss = 0
    return model

In [47]:
def predict_rnn(model, device, test_loader, batch_size, verbose=False):
    pred = []
    actual = []
    model.cuda()
    model.eval()
    with torch.no_grad():
        for x, y in test_loader:
            x = x.reshape(batch_size, 20,300).to(device)
            y = y.to(device)

            output = model(x)

            pred = np.concatenate((pred, np.argmax(output.cpu().numpy(),axis=1)+1))
            actual = np.concatenate((actual, np.argmax(y.cpu().numpy(),axis=1)+1))
    
    pred = [int(p) for p in pred]
    actual = [int(a) for a in actual]
    if verbose:
        print(metrics.classification_report(actual, pred))
    return metrics.classification_report(actual, pred, output_dict=True)

In [64]:
batch_size = 32
test_batch = 4000
x_train = W2V_Dataset(rnn_train.to_numpy(dtype=np.float32))
x_test = W2V_Dataset(rnn_test.to_numpy(dtype=np.float32))

train_loader = data.DataLoader(dataset=x_train, batch_size=batch_size, shuffle=True)
test_loader = data.DataLoader(dataset=x_test, batch_size=test_batch, shuffle=False)

rnn_model = RNN(300,20,3)
rnn_model = train_rnn(rnn_model, learning_rate=0.0005, epochs=500, device=device, train_loader=train_loader, test_loader=test_loader, batch_size=batch_size, name="best_rnn.pth")
rnn_report = predict_rnn(rnn_model, device, test_loader, batch_size=test_batch, verbose=True)

# torch.save(rnn_model, 'long_simple_rnn.pth')

Epoch: 0/500, Accuracy: 0.49425 Running Loss: 50.383006627249415
Epoch: 10/500, Accuracy: 0.5541666666666667 Running Loss: 43.271946218092125
Epoch: 20/500, Accuracy: 0.5751666666666667 Running Loss: 41.81841569132757
Epoch: 30/500, Accuracy: 0.5964166666666667 Running Loss: 40.89222027138749
Epoch: 40/500, Accuracy: 0.5883333333333334 Running Loss: 40.16052095900022
Epoch: 50/500, Accuracy: 0.6045 Running Loss: 39.63026826425266
Epoch: 60/500, Accuracy: 0.5978333333333333 Running Loss: 39.24327415881817
Epoch: 70/500, Accuracy: 0.6045 Running Loss: 38.73418400674973
Epoch: 80/500, Accuracy: 0.6031666666666666 Running Loss: 38.5470349576085
Epoch: 90/500, Accuracy: 0.6070833333333333 Running Loss: 38.28171376479395
              precision    recall  f1-score   support

           1       0.60      0.63      0.62      4000
           2       0.52      0.51      0.52      4000
           3       0.69      0.67      0.68      4000

    accuracy                           0.61     12000
   

In [65]:
rnn_model = torch.load("best_rnn.pth")
rnn_report = predict_rnn(rnn_model, device, test_loader, batch_size=test_batch, verbose=True)

              precision    recall  f1-score   support

           1       0.61      0.62      0.61      4000
           2       0.52      0.55      0.54      4000
           3       0.70      0.65      0.68      4000

    accuracy                           0.61     12000
   macro avg       0.61      0.61      0.61     12000
weighted avg       0.61      0.61      0.61     12000



> (a) Simple RNN

> (b) Gated Recurrent Unit Cell

In [50]:
class GRU(torch.nn.Module):
    def __init__(self, D_in, hidden_size, output_size, dp=0.65):
        super(GRU, self).__init__()
        self.hidden_size = hidden_size
        self.gru = torch.nn.GRU(D_in, hidden_size, batch_first=True)
        self.d = torch.nn.Dropout(p=dp)
        self.l2 = torch.nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # h0 = self.init_hidden(x)
        # output, new_hidden = self.rnn(x, h0)
        output, new_hidden = self.gru(x)
        output = self.d(output)
        # output = output[:, -1, :] # don't need first : if not batching
        output = output[:,-1, :] 
        output = self.l2(output)
        return output

    def init_hidden(self,x):
        return torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)

In [51]:
batch_size = 32
test_batch = 4000
x_train = W2V_Dataset(rnn_train.to_numpy(dtype=np.float32))
x_test = W2V_Dataset(rnn_test.to_numpy(dtype=np.float32))

train_loader = data.DataLoader(dataset=x_train, batch_size=batch_size, shuffle=True)
test_loader = data.DataLoader(dataset=x_test, batch_size=test_batch, shuffle=False)

# gru_model = torch.load('long_gru.pth')
gru_model = GRU(300,20,3)
gru_model = train_rnn(gru_model, learning_rate=0.01, epochs=500, device=device, train_loader=train_loader, test_loader=test_loader, batch_size=batch_size, name="best_gru.pth")
gru_report = predict_rnn(gru_model, device, test_loader, batch_size=test_batch, verbose=True)

# torch.save(gru_model, 'long_gru.pth')

Epoch: 0/500, Accuracy: 0.6044166666666667 Running Loss: 43.18628474615332
Epoch: 10/500, Accuracy: 0.6159166666666667 Running Loss: 36.86315435426533
Epoch: 20/500, Accuracy: 0.6248333333333334 Running Loss: 36.19553748914076
              precision    recall  f1-score   support

           1       0.62      0.62      0.62      4000
           2       0.53      0.58      0.55      4000
           3       0.72      0.65      0.68      4000

    accuracy                           0.62     12000
   macro avg       0.62      0.62      0.62     12000
weighted avg       0.62      0.62      0.62     12000



In [52]:
gru_model = torch.load('best_gru.pth')
gru_report = predict_rnn(gru_model, device, test_loader, batch_size=test_batch, verbose=True)

              precision    recall  f1-score   support

           1       0.65      0.53      0.59      4000
           2       0.52      0.60      0.56      4000
           3       0.69      0.71      0.70      4000

    accuracy                           0.61     12000
   macro avg       0.62      0.61      0.61     12000
weighted avg       0.62      0.61      0.61     12000



> (c) LSTM Unit Cell

In [53]:
class LSTM(torch.nn.Module):
    def __init__(self, D_in, hidden_size, output_size, dp=0.65):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.lstm = torch.nn.LSTM(D_in, hidden_size, batch_first=True)
        self.d = torch.nn.Dropout(p=dp)
        self.l2 = torch.nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # h0 = self.init_hidden(x)
        # output, new_hidden = self.rnn(x, h0)
        output, new_hidden = self.lstm(x)
        output = self.d(output)
        # output = output[:, -1, :] # don't need first : if not batching
        output = output[:,-1, :] 
        output = self.l2(output)
        return output

    def init_hidden(self,x):
        return torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)

In [54]:
batch_size = 32
test_batch = 4000
x_train = W2V_Dataset(rnn_train.to_numpy(dtype=np.float32))
x_test = W2V_Dataset(rnn_test.to_numpy(dtype=np.float32))

train_loader = data.DataLoader(dataset=x_train, batch_size=batch_size, shuffle=True)
test_loader = data.DataLoader(dataset=x_test, batch_size=test_batch, shuffle=False)

# lstm_model = torch.load('long_lstm.pth')
lstm_model = LSTM(300,20,3)
lstm_model = train_rnn(lstm_model, learning_rate=0.01, epochs=500, device=device, train_loader=train_loader, test_loader=test_loader, batch_size=batch_size, name="best_lstm.pth")
lstm_report = predict_rnn(lstm_model, device, test_loader, batch_size=test_batch, verbose=True)

# torch.save(lstm_model, 'long_lstm.pth')

Epoch: 0/500, Accuracy: 0.5665833333333333 Running Loss: 47.0043888219173
Epoch: 10/500, Accuracy: 0.6195 Running Loss: 36.580284813449
Epoch: 20/500, Accuracy: 0.60425 Running Loss: 35.27969024093756
Epoch: 30/500, Accuracy: 0.6093333333333333 Running Loss: 35.22206338035841
Epoch: 40/500, Accuracy: 0.6101666666666666 Running Loss: 36.151444945751734
              precision    recall  f1-score   support

           1       0.64      0.56      0.60      4000
           2       0.51      0.58      0.54      4000
           3       0.70      0.69      0.69      4000

    accuracy                           0.61     12000
   macro avg       0.62      0.61      0.61     12000
weighted avg       0.62      0.61      0.61     12000



In [66]:
print(f"-"*10, "Accuracy", "-"*10)
print(f"   Perceptron: {round(w2v_perceptron_report['accuracy'],4)}")
print(f"          SVM: {round(w2v_svm_report['accuracy'],4)}")
print(f"    FNN (avg): {round(ff_report['accuracy'],4)}")
print(f"FNN (first10): {round(f10_report['accuracy'],4)}")
print(f"   Simple RNN: {round(rnn_report['accuracy'],4)}")
print(f"          GRU: {round(gru_report['accuracy'],4)}")
print(f"         LSTM: {round(lstm_report['accuracy'],4)}")
print(f"-"*30)

---------- Accuracy ----------
   Perceptron: 0.5771
          SVM: 0.6527
    FNN (avg): 0.6374
FNN (first10): 0.5644
   Simple RNN: 0.6066
          GRU: 0.6143
         LSTM: 0.6105
------------------------------
