In [1]:
# cd /content/drive/MyDrive/commit_folder/chungwadae/torch_nlp

In [2]:
# import basic libarary
import pandas
import pickle
import math
from tqdm import tqdm

# import torch module
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import TransformerEncoder, TransformerEncoderLayer
from torch.utils.data import DataLoader, random_split

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
computer = 'docker'

In [5]:
class PositionalEncoding(nn.Module):
    def __init__(self, dimension, vocab_size, dropout):
        super().__init__()
        self.dropout = nn.Dropout(dropout)

        pe = torch.zeros(vocab_size, dimension)
        position = torch.arange(0, vocab_size, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(
            torch.arange(0, dimension, 2).float()*(-math.log(10000.0)/dimension)
        )
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)
        self.register_buffer('pe',pe)

    def forward(self, x):
        x = x + self.pe[:, :x.size(1), :]
        return self.dropout(x)

class TransoformerClassifierNetwork(nn.Module):
    def __init__(self, nhead, dim_feedforward, num_layers, dropout, vocab_size, dimension):
        super().__init__()

        self.emb = nn.Embedding(vocab_size, dimension)

        self.pos_encoder = PositionalEncoding(
            dimension = dimension,
            dropout = dropout,
            vocab_size = vocab_size
        )

        encoder_layer = TransformerEncoderLayer(
            d_model = dimension,
            nhead = nhead,
            dim_feedforward = dim_feedforward,
            dropout = dropout
        )

        self.transformer_encoder = TransformerEncoder(
            encoder_layer,
            num_layers = num_layers
        )

        self.linear = nn.Linear(dimension, 3)
        self.dimension = dimension

    def forward(self, input):
        x = self.emb(input)
        # x = self.pos_encoder(x)
        x = self.transformer_encoder(x)
        x = x.mean(dim=1)
        x = self.linear(x)
        output = nn.Softmax()(x)
        return output


In [27]:
class PositionalEncoding(nn.Module):
    def __init__(self, dimension, max_len, dropout):
        super().__init__()
        self.dropout = nn.Dropout(dropout)
        self.pos_emb = nn.Embedding(max_len, dimension).float()
        self.max_len = max_len
    def forward(self, x):
        positions = torch.arange(start=0, end=self.max_len, dtype=torch.int)
        x = x + self.pos_emb(positions)
        return self.dropout(x)

class TransoformerClassifierNetwork(nn.Module):
    def __init__(self, nhead, dim_feedforward, num_layers, dropout, vocab_size, dimension, max_len):
        super().__init__()

        self.emb = nn.Embedding(vocab_size, dimension)

        self.pos_encoder = PositionalEncoding(
            dimension = dimension,
            dropout = dropout,
            max_len = max_len
        )

        encoder_layer = TransformerEncoderLayer(
            d_model = dimension,
            nhead = nhead,
            dim_feedforward = dim_feedforward,
            dropout = dropout
        )

        self.transformer_encoder = TransformerEncoder(
            encoder_layer,
            num_layers = num_layers
        )

        self.linear = nn.Linear(dimension, 3)
        self.dimension = dimension

    def forward(self, input):
        input = input.float()
        x = self.emb(input)
        x = self.pos_encoder(x)
        x = self.transformer_encoder(x)
        x = x.mean(dim=1)
        x = self.linear(x)
        output = nn.Softmax()(x)
        return output

In [28]:
if computer == 'colab':
  train_data_path = 'data/train_data.pkl'
  train_label_path = 'data/train_label.pkl'
  test_data_path = 'data/test_data.pkl'
elif computer == 'docker':
  train_data_path = '/root/share/data/train_data.pkl'
  train_label_path = '/root/share/data/train_label.pkl'
  test_data_path = '/root/share/data/test_data.pkl'

with open(train_data_path, 'rb') as f:
  train_data = pickle.load(f)

with open(train_label_path, 'rb') as f:
  train_label = pickle.load(f)

with open(test_data_path, 'rb') as f:
  test_data = pickle.load(f)

In [29]:
# train hyperparameters
EPOCHS = 50
BATCH_SIZE = 128

In [30]:
train, valid = random_split([[train_data[i], train_label[i]] for i in range(len(train_label))], [len(train_label)-2000,2000])
train_dataloader = DataLoader(train, batch_size=BATCH_SIZE, shuffle=True)
test_dataloader = DataLoader(valid, batch_size=BATCH_SIZE, shuffle=False)

In [31]:
model = TransoformerClassifierNetwork(
    nhead = 4,
    dim_feedforward = 50,
    num_layers = 1,
    dropout = 0.2,
    vocab_size = 42000,
    dimension = 128,
    max_len = 300
).to(device)
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.001)

In [32]:
for epoch in range(EPOCHS):
  epoch_loss = 0
  epoch_correct = 0
  epoch_count = 0
  for i, data in tqdm(enumerate(train_dataloader, 1), total=len(train_dataloader),position=0, leave=True):
    inputs, labels = data
    inputs = inputs.to(device)
    labels = labels.to(device)
    optimizer.zero_grad()
    outputs = model(inputs)
    _, predicted = torch.max(outputs.data, 1)
    epoch_correct += (predicted==labels).sum().item()
    loss = loss_function(outputs, labels)
    loss.backward()
    # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
    optimizer.step()

    epoch_loss += loss.item()
    epoch_count += inputs.size(0)
    
  with torch.no_grad():
    test_epoch_loss = 0
    test_epoch_correct = 0
    test_epoch_count = 0

    for i, data in enumerate(test_dataloader, 1):
      inputs, labels = data
      inputs = inputs.to(device)
      labels = labels.to(device)
      outputs = model(inputs)
      _, predicted = torch.max(outputs.data, 1)
      test_epoch_correct += (predicted==labels).sum().item()
      loss = loss_function(outputs, labels)
      test_epoch_loss += loss.item()
      test_epoch_count += inputs.size(0)
  print(f'train : epoch={epoch}, loss={epoch_loss:.4f}, accuracy={epoch_correct/epoch_count:.4f}')
  print(f'valid : loss={test_epoch_loss:.4f}, accuracy={test_epoch_correct/test_epoch_count:.4f}')

  0%|          | 0/297 [00:00<?, ?it/s]


RuntimeError: Expected tensor for argument #1 'indices' to have one of the following scalar types: Long, Int; but got torch.cuda.FloatTensor instead (while checking arguments for embedding)