In [1]:
import torch
import torch.nn as nn
import pickle
from sklearn.metrics import accuracy_score
import numpy as np
from modules.TextCleaner import Cleaner
from modules.TextPreparation import TextPreparation

In [2]:
# Get cpu or gpu device for training
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using {} device".format(device))

Using cuda device


In [3]:
# reload train and test sets back in

with open('data/train_trans', 'rb') as handle:
    train = pickle.load(handle)
with open('data/test_trans', 'rb') as handle:
    test = pickle.load(handle)
with open('data/embeddings', 'rb') as handle:
    embeddings = pickle.load(handle)
with open('data/test_dataset', 'rb') as handle:
    df_end_test = pickle.load(handle)

In [13]:
## not used as using pytorch built in encoder ##

class EncoderLayer(nn.Module):
    def __init__(self, tokens_len, emb_size, num_heads):
        super(EncoderLayer, self).__init__()
        self.emb_size = emb_size
        self.tokens_len = tokens_len
        self.num_heads = num_heads
        self.mha = nn.MultiheadAttention(self.emb_size, 
                                        self.num_heads, 
                                        dropout=0.1, 
                                        batch_first=True
                                        )
        self.fc1 = nn.Linear(self.emb_size, self.emb_size * 4)
        self.fc2 = nn.Linear(self.emb_size * 4, self.emb_size)
        self.relu = nn.ReLU()
        self.layernorm = nn.LayerNorm(self.emb_size)
        self.dropout = nn.Dropout(0.1)
    

    def forward(self, x):
        
        x = self.layernorm(self.mha(x, x, x)[0] + x)
        x = self.layernorm(self.fc2(self.relu(self.fc1(x))) + x)

        return x


class Transformer(EncoderLayer):
    def __init__(self, tokens_len, emb_size, num_heads):
        super().__init__(tokens_len, emb_size, num_heads)
        self.emb_size = emb_size
        self.tokens_len = tokens_len
        self.num_heads = num_heads
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=self.emb_size, 
                                                        nhead=self.num_heads, 
                                                        dim_feedforward=self.emb_size * 6,
                                                        dropout=0.1,
                                                        batch_first=True)
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=2)
        self.fc1 = nn.Linear(self.emb_size, 1)
        self.sigmoid = nn.Sigmoid()

    
    def forward(self, x):

        x = self.dropout(x)
        x = self.transformer_encoder(x)
        #x = torch.sum(x, axis=1) / self.tokens_len
        x = self.fc1(x[:, 0, :])
        x = self.sigmoid(x)

        return x

In [14]:
# train transformer

torch.manual_seed(200206323)
emb_size = train.shape[-1] - 1
tokens_len = train.shape[1]
max_epochs = 5
epoch_print_num = 1
batch_size = 400
num_heads = 9
criterion = nn.BCELoss()
textPrepare = TextPreparation()
positions = textPrepare.create_pos_encodings(emb_size).float()
transformer = Transformer(  emb_size=emb_size, 
                            tokens_len=tokens_len, 
                            num_heads=num_heads,
                        ).to(device)
optimizer = torch.optim.Adam(transformer.parameters(), lr=0.002, weight_decay=0.0000001)
trainloader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True)
running_loss = 0
for epoch in range(max_epochs):
    for batch in trainloader:
        y_batch = batch[:, 0, -1].to(device)
        if len(y_batch) != batch_size:
            continue
        x_batch = batch[:, :, 0:-1] + positions[0, 0:tokens_len, :]
        x_batch = x_batch.to(device)
        optimizer.zero_grad()
        estimate = torch.squeeze(transformer.forward(x_batch.clone().detach().requires_grad_(True).float()))
        loss = criterion(estimate, y_batch.clone().detach().requires_grad_(True).float())
        loss.backward()
        optimizer.step()
        loss = loss.item()
        running_loss += loss
    if epoch % epoch_print_num == 0 and epoch > 0:
        ave_loss = round(running_loss / (epoch_print_num * (train.shape[0] / batch_size)), 6)
        print(f'Average loss per text at epoch number {epoch} : {ave_loss}')
        running_loss = 0

Average loss per text at epoch number 1 : 0.733791
Average loss per text at epoch number 2 : 0.226435
Average loss per text at epoch number 3 : 0.205577
Average loss per text at epoch number 4 : 0.193431


In [15]:
# test on test set
testloader = torch.utils.data.DataLoader(test, batch_size=test.shape[0], shuffle=False)
with torch.no_grad():
    for batch in testloader:
        x_batch = batch[:, :, 0:-1] + positions[0, 0:tokens_len, :]
        x_batch = x_batch.to(device)
        y_batch = batch[:, 0, -1]
        estimate = np.array(transformer.eval().forward(x_batch.clone().detach().float()).cpu())
accuracy = round(accuracy_score(y_batch, np.around(estimate, 0)), 2)
print(accuracy)

0.92


In [20]:
# select a sentence from the dataset

sentence = df_end_test.iloc[500, 0]
print(sentence)

AH-mazing pizza!
I hosted a corporate meeting a while back in April 2016 and ordered pizza from Payless Pizza 2  I was dubious until the first bite! Now that was a great pizza.! The people attending the meeting took some back to their hotels and asked to have the leftovers for lunch the next day! 
Since that time every time a member of the company visits town they ask for "that awesome pizza" Courteous staff, well priced, fast and DELICIOUS!   The best thing is the consistency of their product.  Its always tastes the same which is also Ah-mazing! ;-)


In [21]:
# test on custom sentences
x = sentence

cleaner = Cleaner()
x_cleaned = cleaner.clean_text(x)
print(f'length of sentence: {len(x_cleaned)}')
x_vectorised = np.reshape(textPrepare.vectorise_texts(x_cleaned, embeddings, tokens_len), (1, tokens_len, emb_size))
sampleloader = torch.utils.data.DataLoader(x_vectorised, batch_size=1, shuffle=False)
with torch.no_grad():
    for batch in sampleloader:
        batch = batch.to(device)
        #x1 = torch.sum(batch, axis=1)
        #estimate1 = np.array(neuralnet.eval().forward(x1.clone().detach().float()))[0][0]
        estimate2 = np.array(transformer.eval().forward(batch.clone().detach().float()).cpu())[0][0]
        if estimate2 >= 0.5:
            print(f'Classified as GOOD review. Certainty: {round(max([estimate2, 1 - estimate2]), 4)}')
        else:
            print(f'Classified as BAD review. Certainty: {round(max([estimate2, 1 - estimate2]), 4)}')

length of sentence: 96
Classified as GOOD review. Certainty: 0.9883999824523926
