In [5]:
!pip install transformers

Collecting transformers
[?25l  Downloading https://files.pythonhosted.org/packages/50/10/aeefced99c8a59d828a92cc11d213e2743212d3641c87c82d61b035a7d5c/transformers-2.3.0-py3-none-any.whl (447kB)
[K     |████████████████████████████████| 450kB 4.7MB/s 
[?25hCollecting sacremoses
[?25l  Downloading https://files.pythonhosted.org/packages/1f/8e/ed5364a06a9ba720fddd9820155cc57300d28f5f43a6fd7b7e817177e642/sacremoses-0.0.35.tar.gz (859kB)
[K     |████████████████████████████████| 860kB 23.2MB/s 
Collecting sentencepiece
[?25l  Downloading https://files.pythonhosted.org/packages/74/f4/2d5214cbf13d06e7cb2c20d84115ca25b53ea76fa1f0ade0e3c9749de214/sentencepiece-0.1.85-cp36-cp36m-manylinux1_x86_64.whl (1.0MB)
[K     |████████████████████████████████| 1.0MB 36.4MB/s 
Building wheels for collected packages: sacremoses
  Building wheel for sacremoses (setup.py) ... [?25l[?25hdone
  Created wheel for sacremoses: filename=sacremoses-0.0.35-cp36-none-any.whl size=883999 sha256=6d8460350dab8051

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [2]:
!nvidia-smi

Sat Dec 28 01:14:33 2019       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.44       Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   33C    P0    25W / 250W |      0MiB / 16280MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|  No ru

In [0]:
epochs = 10
max_words = 100
batch_size = 32
learning_rate = 1e-5



In [6]:
import torch
from transformers import BertModel
from torch.utils.data import dataset
from torch.utils.data import dataloader
import torch.nn as nn
import numpy as np
import torch.optim as optim


class Dataset(dataset.Dataset):
    def __init__(self, stories, labels):
        super().__init__()
        self.stories = stories
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, index):
        story = torch.tensor(self.stories[index], dtype=torch.long)
        label = torch.tensor(self.labels[index], dtype=torch.long)
        return story, label


class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.bert_layer = BertModel.from_pretrained('bert-large-cased')
        self.linear = nn.Linear(1024, 2)
        self.softmax = nn.Softmax()

    def forward(self, story):
        bert_output = self.bert_layer(story)
        #linear_output = self.linear(bert_output[1])
        linear_output = self.linear(torch.mean(
            bert_output[0], dim=1, dtype=torch.float32))
        output = self.softmax(linear_output)
        return output


def get_accuracy_from_logits(logits, label):
    l = len(label)
    c = 0
    pred_output = torch.argmax(logits, dim=1)
    for i in range(l):
        if pred_output[i] == label[i]:
            c += 1
    return float(c)/l


def get_accuracy(pred, label):
    l = len(label)
    c = 0
    for i in range(l):
        if pred[i] == label[i]:
            c += 1
    return float(c) / l


def valid(model, criterion, val_loader):
    model.eval()

    with torch.no_grad():
        losses = torch.tensor([], dtype=torch.float32)
        pred = torch.tensor([], dtype=torch.long).cuda()
        truth = torch.tensor([], dtype=torch.long).cuda()
        total = 0
        for it, (seq, label) in enumerate(val_loader):
            seq, label = seq.cuda(), label.cuda()

            logits = model(seq)

            total = total + len(label)

            loss = criterion(logits, label)

            losses = torch.cat((losses, torch.tensor([loss])))

            pred = torch.cat((pred, torch.argmax(logits, dim=1)))
            truth = torch.cat((truth, label))

        acc = get_accuracy(pred, truth)
        loss = torch.sum(losses) / total
        print("Validation complete. Loss : {} Accuracy : {}\n".format(loss, acc))

        return pred


def train(model, criterion, opti, train_loader, val_loader):

    for epoch in range(epochs):

        model.train()
        for it, (seq, label) in enumerate(train_loader):
            # clear gradients
            opti.zero_grad()
            # convert to cuda tensors
            seq, label = seq.cuda(), label.cuda()

            logits = model(seq)

            # loss
            loss = criterion(logits, label)

            # backpropagation
            loss.backward()

            opti.step()

            acc = get_accuracy_from_logits(logits, label)
            print("Iteration {} of  epoch {} complete. Loss : {} Accuracy : {}".format(
                it+1, epoch+1, loss.item(), acc))

        valid(model, criterion, val_loader)


def test(model, test_loader):
    pred = []
    model.eval()
    with torch.no_grad():
        for it, (seq, label) in enumerate(test_loader):
            seq, label = seq.cuda(), label.cuda()

            logits = model(seq)

            length = len(label) // 2

            for i in range(length):
                if logits[i][1] > logits[i + 1][1]:
                    pred.append(1)
                else:
                    pred.append(2)

    return pred


if __name__ == '__main__':
    stories = np.load('drive/My Drive/sct/data/train_data.npy')
    labels = np.load('drive/My Drive/sct/data/train_labels.npy')

    test_stories = np.load('drive/My Drive/sct/data/test_data.npy')
    test_labels = np.zeros((len(test_stories,)), dtype=np.int)

    stories_from_train = np.load(
        'drive/My Drive/sct/data/val_from_train_data.npy')
    labels_from_train = np.load(
        'drive/My Drive/sct/data/val_from_train_labels.npy')

    length = len(stories)
    train_len = int(length * 0.8)

    train_stories = stories[:train_len]
    val_stories = stories[train_len:]

    train_labels = labels[:train_len]
    val_labels = labels[train_len:]

    train_data = Dataset(train_stories, train_labels)
    val_data = Dataset(val_stories, val_labels)
    test_data = Dataset(test_stories, test_labels)
    val_from_train_data = Dataset(stories_from_train, labels_from_train)

    train_loader = dataloader.DataLoader(
        train_data, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)
    val_loader = dataloader.DataLoader(
        val_data, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=False)
    test_loader = dataloader.DataLoader(
        test_data, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=False)
    val_from_train_loader = dataloader.DataLoader(
        val_from_train_data, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=False)

    print('initializing model')
    model = Model()
    model = model.cuda()
    criterion = nn.CrossEntropyLoss().cuda()
    opti = optim.Adam(model.parameters(), lr=learning_rate)
    print('start training')
    train(model, criterion, opti, train_loader, val_loader)
  
    print('validation on the official validation set')
    validation_data = Dataset(stories, labels)
    validation_loader = dataloader.DataLoader(
        validation_data, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=False)

    validation_pred = valid(model, criterion, validation_loader)

    with open('drive/My Drive/sct/data/validation_results.txt', 'w') as f:
        l = len(validation_pred)

        for i in range(l):
            f.write(str(validation_pred[i].item()) + '\n')

    print('validation on data from the official training set')
    val_from_train_pred = valid(model, criterion, val_from_train_loader)

    print('generating test result')
    pred = test(model, test_loader)

    with open('drive/My Drive/sct/data/test_results.txt', 'w') as f:
        l = len(pred)

        for i in range(l):
            f.write(str(pred[i]) + '\n')

    print('saving model')
    torch.save(model, 'drive/My Drive/sct/data/model.pkl')


initializing model
start training




Iteration 1 of  epoch 1 complete. Loss : 0.7078193426132202 Accuracy : 0.3125
Iteration 2 of  epoch 1 complete. Loss : 0.7451775074005127 Accuracy : 0.40625
Iteration 3 of  epoch 1 complete. Loss : 0.7104355096817017 Accuracy : 0.5
Iteration 4 of  epoch 1 complete. Loss : 0.7074080109596252 Accuracy : 0.5
Iteration 5 of  epoch 1 complete. Loss : 0.6810455322265625 Accuracy : 0.59375
Iteration 6 of  epoch 1 complete. Loss : 0.6752543449401855 Accuracy : 0.625
Iteration 7 of  epoch 1 complete. Loss : 0.7022028565406799 Accuracy : 0.46875
Iteration 8 of  epoch 1 complete. Loss : 0.7050313353538513 Accuracy : 0.4375
Iteration 9 of  epoch 1 complete. Loss : 0.7007884979248047 Accuracy : 0.34375
Iteration 10 of  epoch 1 complete. Loss : 0.6896082758903503 Accuracy : 0.6875
Iteration 11 of  epoch 1 complete. Loss : 0.6734763979911804 Accuracy : 0.625
Iteration 12 of  epoch 1 complete. Loss : 0.6398913860321045 Accuracy : 0.6875
Iteration 13 of  epoch 1 complete. Loss : 0.6227214932441711 Accu

  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
