In [None]:
import pathlib
import pickle
import string
import sys
import re

import numpy as np
import pandas as pd

In [None]:
import torch
import torch.nn.functional as F

In [None]:
current_dir = pathlib.Path('.').parent.resolve()
sys.path.append(current_dir)

In [None]:
from src.dataset import Dataset

In [None]:
dataset = Dataset(vocabulary='output/vocabulary.pkl',
                  tags='output/tags.pkl',
                  dataset='output/processed.csv.gz')

dataset.set_options(max_length=250)

In [None]:
params = {'batch_size': 64,
          'shuffle': True,
          'num_workers': 4}

data_gen = torch.utils.data.DataLoader(dataset, **params)

In [None]:
class ScaledEmbedding(torch.nn.Embedding):
    """
    Embedding layer that initialises its values
    to using a normal variable scaled by the inverse
    of the embedding dimension.
    """

    def reset_parameters(self):
        """
        Initialize parameters.
        """

        self.weight.data.normal_(0, 1.0 / self.embedding_dim)
        if self.padding_idx is not None:
            self.weight.data[self.padding_idx].fill_(0)

In [None]:
class ConvModel(torch.nn.Module):
    def __init__(self, 
                 embedding_dim=128, 
                 vocab_size=10000, 
                 seq_len=250):
        super(ConvModel,self).__init__()

        self._embedding_dim = embedding_dim
        self._vocab_size = vocab_size
        self._seq_len = seq_len
        
        self.embeddings = ScaledEmbedding(self._vocab_size, self._embedding_dim)
        self.conv = torch.nn.Conv1d(self._embedding_dim, 64, 5, padding=2)
        self.mp = torch.nn.MaxPool1d(2)
        self.fc1 = torch.nn.Linear(125 * 64, 2048)
        self.fc2 = torch.nn.Linear(2048, 1024)
        self.fc3 = torch.nn.Linear(1024, 1000)

    def forward(self, words_id):
        words_embedding = self.embeddings(words_id).permute(0,2,1)
        x = F.dropout(words_embedding, 0.2)
        x = self.conv(x)
        x = F.relu(x)
        x = F.dropout(x, 0.2)
        x = self.mp(x)
        x = x.view(-1, 125 * 64)
        x = self.fc1(x)
        x = F.dropout(x, 0.4)
        x = self.fc2(x)
        x = F.dropout(x, 0.4)
        x = self.fc3(x)
        return F.softmax(x)

In [None]:
model = ConvModel(embedding_dim=32, vocab_size=len(dataset._embedder._vocabulary), seq_len=dataset.max_length)
optimizer = torch.optim.Adam(model.parameters(),lr=0.001, weight_decay=0)
loss_fn = torch.nn.MultiLabelMarginLoss()

In [None]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")

In [None]:
max_epochs = 100

In [None]:
# Loop over epochs
for epoch in range(max_epochs):
    losses = []
    print(f'Epoch {epoch}')
    for i, (local_batch, local_labels) in enumerate(data_gen):
        if i%1000 == 0:
            print(f'Batch {i}')
        local_batch, local_labels = local_batch.to(device), local_labels.to(device)
        local_batch = local_batch.type(torch.LongTensor)
        local_labels = local_labels.type(torch.LongTensor)
        epoch_loss = 0.0
        epoch_acc = 0.0
        
        predictions = model(local_batch)
        optimizer.zero_grad()
        loss = loss_fn(predictions, local_labels)
        loss.backward()
        optimizer.step()
        losses.append(loss.mean())
    print('[%d/%d] Loss: %.3f' % (epoch+1, max_epochs, np.mean(losses)))