In [1]:
!gdown 13d-tjvmDbKKrMfCvsJzaQ7pgHqQykJYB
!gdown 1rWTsDh0wEFIQFiSkWxBqVOa_V_J5EhBq

Downloading...
From: https://drive.google.com/uc?id=13d-tjvmDbKKrMfCvsJzaQ7pgHqQykJYB
To: /content/full_data.json
100% 2.50M/2.50M [00:00<00:00, 35.1MB/s]
Downloading...
From: https://drive.google.com/uc?id=1rWTsDh0wEFIQFiSkWxBqVOa_V_J5EhBq
To: /content/labels.torch
100% 31.4k/31.4k [00:00<00:00, 44.4MB/s]


In [6]:
import os
import torch
import pandas as pd
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import json
from transformers import AutoTokenizer, BertModel
import tqdm
import torch.nn as nn
import math
import random
    
class OurDataset(Dataset):
    def __init__(self, data_file, labels_file):
        self.full_data = json.load(open(data_file))
        self.labels = torch.load(labels_file)

        self.tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
        self.model = BertModel.from_pretrained("bert-base-uncased")

    def __len__(self):
        return len(self.full_data)

    def __getitem__(self, idx):
        inputs = self.tokenizer(self.full_data[idx], return_tensors="pt")
        outputs = self.model(**inputs)
        last_hidden_states = outputs.last_hidden_state
        return last_hidden_states, self.labels[idx]

class PositionalEncoding(nn.Module):

    def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 5000):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        pe = torch.zeros(max_len, 1, d_model)
        pe[:, 0, 0::2] = torch.sin(position * div_term)
        pe[:, 0, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x):
        """
        Arguments:
            x: Tensor, shape ``[seq_len, batch_size, embedding_dim]``
        """
        x = x + self.pe[:x.size(0)]
        return self.dropout(x)

class TransformerClassifier(nn.Module):

    def __init__(
        self,
        d_model=768,
        n_classes=3,
        nhead=4,
        dim_feedforward=512,
        num_layers=6,
        dropout=0.1,
        activation="relu",
        classifier_dropout=0.1,
    ):

        super().__init__()

        self.pos_encoder = PositionalEncoding(
            d_model=d_model,
            dropout=dropout,
            max_len=5000,
        )

        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=dim_feedforward,
            dropout=dropout,
        )
        self.transformer_encoder = nn.TransformerEncoder(
            encoder_layer,
            num_layers=num_layers,
        )
        self.head = nn.Sequential(
            nn.Linear(d_model, 256),
            nn.ReLU(),
            nn.Linear(256, n_classes),
            nn.Softmax(dim=1)
        )

        self.d_model = d_model
        self.dropout = nn.Dropout(p=classifier_dropout)

    def forward(self, x):
        x = self.pos_encoder(x)
        x = self.transformer_encoder(x)
        x = x.mean(dim=1)
        x = self.dropout(x)
        x = self.head(x)

        return x

In [7]:
ds = OurDataset("full_data.json", "labels.torch")
torch.manual_seed(29592)  # set the seed for reproducibility
# device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
device='cpu'

In [8]:
def train_model(model):
  correct = 0
  loss_fn = torch.nn.CrossEntropyLoss()
  optimizer = torch.optim.SGD(model.parameters(), lr=0.00001, momentum =0.9)
  for i in range(int(ds.__len__())):
    idx = random.randint(0, ds.__len__()-1)
    x, y = ds.__getitem__(idx)
    y = y.long()
    x = x.to(device)
    y = y.to(device)
    output = model(x)
    correct += output.argmax().eq(y).sum()
    loss = loss_fn(output, y)
    if i%100==0:
      print(i, correct, loss.item())
    loss.backward()
    optimizer.step()
  acc = correct/int((ds.__len__()))
  print(acc)

In [9]:
def eval(model):
  model.eval()
  correct = 0
  with torch.no_grad():
    for i in range(int(ds.__len__())):
      idx = random.randint(0, ds.__len__()-1)
      x, y = ds.__getitem__(idx)
      y = y.long()
      x = x.to(device)
      y = y.to(device)
      output = model(x)
      correct += output.argmax().eq(y).sum()
      if i%100==0:
        print(correct)
  acc = correct/int((ds.__len__()))
  return acc

In [10]:
import time
def time_model_evaluation(model):
    s = time.time()
    loss = eval(model)
    elapsed = time.time() - s
    print('''accuracy: {0:.3f}\nelapsed time (seconds): {1:.1f}'''.format(loss, elapsed))

import os
def print_size_of_model(model):
    torch.save(model.state_dict(), "temp.p")
    print('Size (MB):', os.path.getsize("temp.p")/1e6)
    os.remove('temp.p')

In [11]:
t = TransformerClassifier()
t.to(device)
print_size_of_model(t)

Size (MB): 91.849724




In [12]:
train_model(t)

0 tensor(1) 1.0375488996505737
100 tensor(72) 1.5511915683746338
200 tensor(138) 0.5514447093009949
300 tensor(204) 0.5514447093009949
400 tensor(270) 0.5514447093009949
500 tensor(333) 0.5514447093009949
600 tensor(400) 0.5514447093009949
700 tensor(465) 1.5514447689056396
800 tensor(527) 0.5514447093009949
900 tensor(588) 1.5514447689056396
1000 tensor(651) 0.5514447093009949
1100 tensor(712) 0.5514447093009949
1200 tensor(774) 0.5514447093009949
1300 tensor(841) 0.5514447093009949
1400 tensor(909) 0.5514447093009949
1500 tensor(970) 1.5514447689056396
1600 tensor(1042) 0.5514447093009949
1700 tensor(1102) 0.5514447093009949
1800 tensor(1158) 0.5514447093009949
1900 tensor(1225) 0.5514447093009949
2000 tensor(1291) 1.5514447689056396
2100 tensor(1357) 0.5514447093009949
2200 tensor(1417) 0.5514447093009949
2300 tensor(1482) 0.5514447093009949
2400 tensor(1540) 0.5514447093009949
2500 tensor(1597) 0.5514447093009949
2600 tensor(1652) 0.5514447093009949
2700 tensor(1717) 0.551444709300

In [13]:
import torch.quantization

quantized_model = torch.quantization.quantize_dynamic(
    t, {nn.Linear}, dtype=torch.qint8
)

In [14]:
print_size_of_model(quantized_model)

Size (MB): 77.112473


In [15]:
torch.set_num_threads(1)
time_model_evaluation(t)

tensor(1)
tensor(59)
tensor(114)
tensor(184)
tensor(244)
tensor(311)
tensor(376)
tensor(437)
tensor(503)
tensor(577)
tensor(643)
tensor(699)
tensor(772)
tensor(837)
tensor(905)
tensor(966)
tensor(1027)
tensor(1089)
tensor(1150)
tensor(1224)
tensor(1283)
tensor(1347)
tensor(1410)
tensor(1478)
tensor(1540)
tensor(1601)
tensor(1658)
tensor(1722)
tensor(1787)
tensor(1854)
tensor(1907)
tensor(1974)
tensor(2043)
tensor(2112)
tensor(2172)
tensor(2234)
tensor(2293)
tensor(2352)
tensor(2407)
tensor(2474)
tensor(2542)
tensor(2603)
tensor(2665)
tensor(2734)
tensor(2806)
tensor(2876)
tensor(2938)
tensor(2995)
tensor(3068)
tensor(3132)
tensor(3195)
tensor(3262)
tensor(3329)
tensor(3396)
tensor(3464)
tensor(3527)
tensor(3591)
tensor(3654)
tensor(3721)
tensor(3784)
tensor(3852)
tensor(3914)
tensor(3985)
tensor(4050)
tensor(4114)
tensor(4185)
tensor(4243)
tensor(4299)
tensor(4365)
tensor(4423)
tensor(4484)
tensor(4537)
tensor(4607)
tensor(4665)
tensor(4737)
tensor(4807)
tensor(4872)
accuracy: 0.641
el

In [16]:
time_model_evaluation(quantized_model)

tensor(1)
tensor(72)
tensor(136)
tensor(197)
tensor(260)
tensor(327)
tensor(399)
tensor(456)
tensor(523)
tensor(583)
tensor(650)
tensor(717)
tensor(783)
tensor(850)
tensor(915)
tensor(976)
tensor(1044)
tensor(1111)
tensor(1180)
tensor(1249)
tensor(1315)
tensor(1388)
tensor(1453)
tensor(1520)
tensor(1586)
tensor(1655)
tensor(1712)
tensor(1779)
tensor(1844)
tensor(1912)
tensor(1975)
tensor(2035)
tensor(2107)
tensor(2172)
tensor(2242)
tensor(2306)
tensor(2377)
tensor(2441)
tensor(2506)
tensor(2558)
tensor(2619)
tensor(2685)
tensor(2747)
tensor(2809)
tensor(2885)
tensor(2953)
tensor(3025)
tensor(3087)
tensor(3149)
tensor(3217)
tensor(3287)
tensor(3351)
tensor(3409)
tensor(3470)
tensor(3528)
tensor(3594)
tensor(3648)
tensor(3710)
tensor(3783)
tensor(3853)
tensor(3922)
tensor(3989)
tensor(4058)
tensor(4121)
tensor(4183)
tensor(4247)
tensor(4304)
tensor(4368)
tensor(4424)
tensor(4491)
tensor(4557)
tensor(4629)
tensor(4706)
tensor(4766)
tensor(4834)
tensor(4899)
tensor(4963)
accuracy: 0.652
el