In [1]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt

import spacy

import torch
import torch.nn as nn
import torch.optim as optim
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader, Dataset

from live_plot import LivePlot

In [2]:
device = 'cuda'

In [3]:
df = pd.read_csv('./sampled_df.csv')

In [4]:
texts = np.array(df['reviewText'])
labels = np.array(df['overall']).reshape(-1, 1)

In [5]:
for text in texts:
  if type(text) != str:
    print(text)
    print('warning')

In [6]:
# df = df.dropna()

In [7]:
nlp = spacy.load('en_core_web_lg')

def prepare_text(text):
  doc = nlp(text)
  vectors =  torch.tensor(np.array([token.vector for token in doc]), dtype=torch.float32)
  return vectors

In [8]:
texts_matrices = [prepare_text(text) for text in texts]

In [9]:
X = pad_sequence(texts_matrices, batch_first=True)
y = torch.tensor(labels, dtype=torch.float32)

In [10]:
# changing a shape of X because pytorch's conv1d takes the second dimension as channels (and we want word features to be channels and slide by ngram words crops)

X = X.movedim(1, 2)

In [11]:
X.shape

torch.Size([10000, 300, 358])

In [12]:
dev_size = 0.15
test_size = 0.15

bord1 = int(len(X) * (1 - (dev_size + test_size)))
bord2 = int(len(X) * (1 - test_size))

X_train, X_dev, X_test = X[:bord1].to(device), X[bord1:bord2].to(device), X[bord2:]
y_train, y_dev, y_test = y[:bord1].to(device), y[bord1:bord2].to(device), y[bord2:]

In [13]:
class CustomDataset(Dataset):
    def __init__(self, input_data, labels):
        self.input_data = input_data
        self.labels = labels

    def __len__(self):
        return len(self.input_data)

    def __getitem__(self, idx):
        return self.input_data[idx], self.labels[idx]

In [14]:
train_ds = CustomDataset(X_train, y_train)
train_dl = DataLoader(train_ds, batch_size=256, shuffle=False, drop_last=True)

In [15]:
input_channels = X_train.shape[1]
input_len = X_train.shape[2]
output_size = 1

In [16]:
from collections.abc import Iterable

class Multiple1DConv(nn.Module):
  def __init__(self, input_channels: int, out_channels: int = 64, kernel_sizes: Iterable[int] = (3, 4, 5)):
    super().__init__()

    self.multiple_1d_convs = nn.ModuleList()

    for kernel_size in kernel_sizes:

      self.multiple_1d_convs.append(
        nn.Sequential(
          nn.Conv1d(input_channels, out_channels,
                kernel_size=kernel_size, padding=(kernel_size // 2)),
          nn.AdaptiveMaxPool1d(1)
        )
      )

  def forward(self, inputs):
    outputs = torch.concat([conv_1d_max(inputs) for conv_1d_max in self.multiple_1d_convs], dim=1)
    return outputs

In [17]:
class Mulpiple1DBinaryClass(nn.Module):
  def __init__(self, input_channels, output_size, kernel_sizes=(3, 4, 5), out_channels=64):
    super().__init__()
    
    self.multiple_cnn_max_linear_relu_sigmoid = nn.Sequential(
      Multiple1DConv(input_channels, out_channels=out_channels, kernel_sizes=kernel_sizes),

      nn.Flatten(),

      nn.Linear(out_channels * len(kernel_sizes), 256),
      nn.ReLU(),
      nn.Linear(256, 256),
      nn.ReLU(),
      nn.Linear(256, output_size),
      nn.Sigmoid()
    )


  def forward(self, x):
    return self.multiple_cnn_max_linear_relu_sigmoid(x)


In [22]:
model = Mulpiple1DBinaryClass(input_channels, output_size).to(device)

train_loss_histroty = []
dev_loss_history = []

# torch.nn.utils.parameters_to_vector(model.parameters()).numel()

In [133]:
loss_fn = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.00000012, weight_decay=2)

In [131]:
plt.close()
%matplotlib qt
live_plot = LivePlot(slice_prop=0.02)

In [141]:
for epoch in range(500):
  print(f'Epoch {epoch+1}')
  optimizer.zero_grad()

  for (X_batch, y_batch) in train_dl:
    batch_outputs = model(X_batch)
    loss_batch = loss_fn(batch_outputs, y_batch)
    loss_batch.backward()
    optimizer.step()

  with torch.no_grad():
    # measure train and dev loss at the end of epoch
    train_outputs = model(X_train)
    train_loss = loss_fn(train_outputs, y_train)
    train_loss_histroty.append(train_loss.item())

    dev_outputs = model(X_dev)
    dev_loss = loss_fn(dev_outputs, y_dev)
    dev_loss_history.append(dev_loss.item())

    print(f'Train loss: {train_loss}; Dev loss: {dev_loss}')
    live_plot.update(train_loss_histroty, dev_loss_history)

Epoch 1
Train loss: 0.12130666524171829; Dev loss: 0.26089558005332947
Epoch 2
Train loss: 0.12130500376224518; Dev loss: 0.26087871193885803
Epoch 3
Train loss: 0.12130481004714966; Dev loss: 0.2609001696109772
Epoch 4
Train loss: 0.121303029358387; Dev loss: 0.26087236404418945
Epoch 5
Train loss: 0.12130201607942581; Dev loss: 0.26089900732040405
Epoch 6
Train loss: 0.1213005781173706; Dev loss: 0.2608743906021118
Epoch 7
Train loss: 0.12129942327737808; Dev loss: 0.26089465618133545
Epoch 8
Train loss: 0.12129797041416168; Dev loss: 0.2608785927295685
Epoch 9
Train loss: 0.12129751592874527; Dev loss: 0.2608935236930847
Epoch 10
Train loss: 0.12129434198141098; Dev loss: 0.2608785331249237
Epoch 11
Train loss: 0.12129401415586472; Dev loss: 0.2608911991119385
Epoch 12
Train loss: 0.12129195779561996; Dev loss: 0.26088371872901917
Epoch 13
Train loss: 0.12129085510969162; Dev loss: 0.2608869969844818
Epoch 14
Train loss: 0.12128940969705582; Dev loss: 0.26088646054267883
Epoch 15
Tr

In [None]:
plt.close()
%matplotlib inline

plt.plot(train_loss_histroty)
plt.plot(dev_loss_history)

In [103]:
torch.save(model.state_dict(), './models/best_model_mc_cnn_arch1.pt')

In [93]:
def test(X_test, y_test):
  with torch.no_grad():
    X_test = X_test.to(device)
    y_test = y_test.to(device)

    print('ACCRURACIES:')
    accuracies = []

    Xs = [X_train, X_dev, X_test]
    ys = [y_train, y_dev, y_test]
    comments = ['1) Train', '2) Dev', '3) Test']

    for x_i, y_i, comment in zip(Xs, ys, comments):
      output = model(x_i).to('cpu')
      labels_pred = torch.tensor([round(val.item()) for val in output], device='cpu')


      y_true = y_i.clone().to('cpu').reshape(-1)

      accuracy = (labels_pred == y_true).sum().item() / y_i.shape[0]
      accuracies.append(accuracy)

      print(comment)
      print((labels_pred == y_true).sum().item(), '/', len(y_true))
      print(round(accuracy*100, 3), '%')

### Performace history (one in the top is last mesure)

In [142]:
test(X_test, y_test)

ACCRURACIES:
1) Train
6874 / 7000
98.2 %
2) Dev
1342 / 1500
89.467 %
3) Test
1334 / 1500
88.933 %


In [140]:
test(X_test, y_test)

ACCRURACIES:
1) Train
6872 / 7000
98.171 %
2) Dev
1339 / 1500
89.267 %
3) Test
1334 / 1500
88.933 %


In [116]:
test(X_test, y_test)

ACCRURACIES:
1) Train
6868 / 7000
98.114 %
2) Dev
1339 / 1500
89.267 %
3) Test
1336 / 1500
89.067 %


In [112]:
test(X_test, y_test)

ACCRURACIES:
1) Train
6863 / 7000
98.043 %
2) Dev
1340 / 1500
89.333 %
3) Test
1338 / 1500
89.2 %


In [110]:
test(X_test, y_test)

ACCRURACIES:
1) Train
6858 / 7000
97.971 %
2) Dev
1339 / 1500
89.267 %
3) Test
1333 / 1500
88.867 %


In [108]:
test(X_test, y_test)

ACCRURACIES:
1) Train
6852 / 7000
97.886 %
2) Dev
1339 / 1500
89.267 %
3) Test
1334 / 1500
88.933 %


In [101]:
test(X_test, y_test)

ACCRURACIES:
1) Train
6852 / 7000
97.886 %
2) Dev
1338 / 1500
89.2 %
3) Test
1333 / 1500
88.867 %


In [99]:
test(X_test, y_test)

ACCRURACIES:
1) Train
6840 / 7000
97.714 %
2) Dev
1337 / 1500
89.133 %
3) Test
1329 / 1500
88.6 %


In [94]:
test(X_test, y_test)

ACCRURACIES:
1) Train
6820 / 7000
97.429 %
2) Dev
1336 / 1500
89.067 %
3) Test
1330 / 1500
88.667 %


In [88]:
test(X_test, y_test)

ACCRURACIES:
1) Train
6811 / 7000
0.973
2) Dev
1337 / 1500
0.8913333333333333
3) Test
1334 / 1500
0.8893333333333333


In [86]:
test(X_test, y_test)

ACCRURACIES:
1) Train
6792 / 7000
0.9702857142857143
2) Dev
1334 / 1500
0.8893333333333333
3) Test
1330 / 1500
0.8866666666666667


In [82]:
test(X_test, y_test)

ACCRURACIES:
1) Train
6782 / 7000
0.9688571428571429
2) Dev
1334 / 1500
0.8893333333333333
3) Test
1329 / 1500
0.886


In [79]:
test(X_test, y_test)

ACCRURACIES:
1) Train
6759 / 7000
0.9655714285714285
2) Dev
1333 / 1500
0.8886666666666667
3) Test
1328 / 1500
0.8853333333333333


In [77]:
test(X_test, y_test)

ACCRURACIES:
1) Train
6752 / 7000
0.9645714285714285
2) Dev
1333 / 1500
0.8886666666666667
3) Test
1329 / 1500
0.886


In [74]:
test(X_test, y_test)

ACCRURACIES:
1) Train
6726 / 7000
0.9608571428571429
2) Dev
1328 / 1500
0.8853333333333333
3) Test
1326 / 1500
0.884
