In [None]:
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np
import math
import copy
import time

torch.set_printoptions(precision=8)



root_url = "https://raw.githubusercontent.com/hfawaz/cd-diagram/master/FordA/"

In [None]:
class FordDataset(Dataset):
  def __init__(self, split="train"):
    self.root_url = "https://raw.githubusercontent.com/hfawaz/cd-diagram/master/FordA/"
    self.data = torch.tensor(np.loadtxt(self.root_url + "FordA_TRAIN.tsv", delimiter="\t"), dtype=torch.float32) if split=="train" else torch.tensor(np.loadtxt(self.root_url + "FordA_TEST.tsv", delimiter="\t"), dtype=torch.float32)
    self.labels = self.data[:, 0] # get first element from each example
    self.sequences = self.data[:, 1:] # get all elements after first element
    self.labels[self.labels == -1] = 0 # change all -1 labels to 0
    self.num_classes = len(torch.unique(self.labels)) # count the number of unique labels

  def __len__(self):
      return self.data.shape[0]

  def __getitem__(self, idx):
    sequence = torch.reshape(self.sequences[idx], (-1, 1)) # dim: seq_len x num_features
    label = torch.reshape(self.labels[idx], (-1, )) # dim: 1 x 1

    return sequence, label

train_dataset = FordDataset("train")
test_dataset = FordDataset("test")

In [None]:
embed_size=256 # size of the embeddings
num_heads=4 # number of attention heads
ff_dim=4 # dimension of the feedforward layer in the encoder
num_transformer_blocks=4 # number of encoder blocks
mlp_units=[128] # the size of the feedforward layer used to make predictions
mlp_dropout=0.4 # dropout in the feedforward layer
dropout=0.25 # dropout in the encoder

In [None]:
def clones(module, N):
    "Produce N independent but identical layers."
    return nn.ModuleList([copy.deepcopy(module) for _ in range(N)])

In [None]:
train_dataloader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True, drop_last = True)
test_dataloader = DataLoader(dataset=test_dataset, batch_size=64, shuffle=True, drop_last = True)
len(train_dataloader)

56

In [None]:
x,y = next(iter(train_dataloader))
xp = x

print(xp.shape)
print(xp[0][0:3])

torch.Size([64, 500, 1])
tensor([[0.70227087],
        [0.58590674],
        [0.27769911]])


In [None]:
class PtMultiheadAttention(nn.Module):
  def __init__(self, head_size, num_heads, dropout=0.1):
    super(PtMultiheadAttention, self).__init__()
    assert head_size % num_heads == 0

    self.d_k = head_size // num_heads
    self.weight_matrices = clones(nn.Linear(head_size, head_size), 4)
    self.attn = None
    if dropout > 0:
      self.dropout = nn.Dropout(dropout)

  def _attention(self, query, key, value, mask=None, dropout=None):
    scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(self.d_k)
    # if mask is not None:
    #   scores = scores.masked_fill(mask == 0, -1e9)
    p_attn = scores.softmax(dim=-1)
    # if dropout is not None:
    #   p_attn = self.dropout(p_attn)
    return torch.matmul(p_attn, value), p_attn

  def forward(self, query, key, value):

    # get q, k and v
    query, key, value = [
      weights(inputs)
      for weights, inputs in zip(self.weight_matrices, (query, key, value))
    ]

    # calculate attention
    x, self.attn = self._attention(query, key, value)

    return self.weight_matrices[-1](x)

In [None]:
class PytorchEncoder(nn.Module):
  def __init__(self, inputs, embed_size, num_heads, ff_dim, dropout=0):
    super(PytorchEncoder, self).__init__()
    # attention
    self.embedding = nn.Linear(in_features=inputs.shape[-1], out_features=embed_size)
    self.attention = PtMultiheadAttention(embed_size, num_heads, dropout=0.0)
    self.linear1 = nn.Linear(embed_size, 1)
    self.dropout1 = nn.Dropout(dropout)
    self.layer_norm1 = nn.LayerNorm(normalized_shape=inputs.shape[-1], eps=1e-6)

    # feedforward
    self.conv1 = nn.Conv1d(in_channels=inputs.shape[-1], out_channels=ff_dim, kernel_size=1)
    self.relu1 = nn.ReLU()
    self.dropout2 = nn.Dropout(dropout)
    self.conv2 = nn.Conv1d(in_channels=ff_dim, out_channels=inputs.shape[-1], kernel_size=1)
    self.layer_norm2 = nn.LayerNorm(normalized_shape=inputs.shape[1], eps=1e-6)


  def forward(self, src):
    x = self.embedding(src)
    x = self.attention(x, x, x)[0]
    x = self.linear1(x)
    x = self.dropout1(x)
    x = self.layer_norm1(x)

    res = x + src
    res = res.reshape(res.shape[0], res.shape[2], res.shape[1])

    x = self.conv1(res)
    x = self.relu1(x)
    x = self.dropout2(x)
    x = self.conv2(x)
    x = self.layer_norm2(x)
    x = x + res

    return x.reshape(x.shape[0], x.shape[-1], x.shape[1])

In [None]:
from collections import OrderedDict

class EncoderClassifier(nn.Module):
  def __init__(self, inputs, embed_size, num_heads, ff_dim, dropout=0, num_blocks=4):
    super(EncoderClassifier, self).__init__()
    encoder_layer = PytorchEncoder(inputs=inputs, embed_size=embed_size, num_heads=num_heads, ff_dim=ff_dim, dropout=dropout)
    encoders = OrderedDict()
    for idx in range(num_blocks):
      encoders[f"encoder{idx}"] = encoder_layer
    self.encoder_block = nn.Sequential(encoders)
    self.avg = nn.AvgPool1d(kernel_size=1)
    self.dense1 = nn.Linear(500, mlp_units[0])
    self.relu1 = nn.ReLU()
    self.dropout1 = nn.Dropout(dropout)
    self.dense2 = nn.Linear(mlp_units[0], 2)
    self.softmax = nn.Softmax()

  def forward(self, x):
    x = self.encoder_block(x)
    x = torch.squeeze(self.avg(x), 2)
    x = self.dense1(x)
    x = self.relu1(x)
    x = self.dropout1(x)
    x = self.dense2(x)
    x = self.softmax(x)
    return x

In [None]:
cuda0 = torch.device('cuda:0')
model = EncoderClassifier(inputs=xp, embed_size=embed_size, num_heads=num_heads, ff_dim=ff_dim, dropout=dropout)
model.cuda()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

In [None]:
def train_one_epoch(epoch_index):
    running_loss = 0.
    last_loss = 0.
    correct = 0
    iterations = 0
    for i, data in enumerate(train_dataloader):
      inputs, labels = data
      optimizer.zero_grad()
      if torch.cuda.is_available():
        inputs = inputs.cuda()
        labels = labels.cuda()
      outputs = model(inputs)
      loss = criterion(outputs, labels.to(torch.long).reshape(-1))
      loss.backward()
      optimizer.step()
      running_loss += loss.item()

      predictions = torch.argmax(outputs, axis=1)
      correct_labels = labels.squeeze()

      correct += (predictions == correct_labels).int().sum()/len(labels) * 100
      iterations += 1
    last_loss = running_loss / len(train_dataloader)
    acc = (correct / iterations)

    return last_loss, acc

In [None]:
epochs = 150

for epoch in range(epochs):
    print('EPOCH {}:'.format(epoch + 1))

    model.train(True)
    avg_loss, acc = train_one_epoch(epoch)

    print(avg_loss)
    print(acc)

EPOCH 1:
0.7229920487318721
tensor(50.55803680, device='cuda:0')
EPOCH 2:
0.7068426012992859
tensor(52.48326111, device='cuda:0')
EPOCH 3:
0.7000276669859886
tensor(53.01339340, device='cuda:0')
EPOCH 4:
0.6900274146880422
tensor(54.52009201, device='cuda:0')
EPOCH 5:
0.6917337636862483
tensor(54.63169861, device='cuda:0')
EPOCH 6:
0.6820127410548074
tensor(56.72433472, device='cuda:0')
EPOCH 7:
0.676254462982927
tensor(58.00781631, device='cuda:0')
EPOCH 8:
0.6754728566323008
tensor(57.44977951, device='cuda:0')
EPOCH 9:
0.6680856421589851
tensor(58.67745972, device='cuda:0')
EPOCH 10:
0.6648365631699562
tensor(60.10044861, device='cuda:0')
EPOCH 11:
0.6519430843847138
tensor(62.44419861, device='cuda:0')
EPOCH 12:
0.6433623688561576
tensor(63.44866180, device='cuda:0')
EPOCH 13:
0.6400936064975602
tensor(63.67187881, device='cuda:0')
EPOCH 14:
0.6280620396137238
tensor(66.04353333, device='cuda:0')
EPOCH 15:
0.6242060544235366
tensor(65.95982361, device='cuda:0')
EPOCH 16:
0.61463713

In [None]:
acc = 0
iteration = 0
for data in test_dataloader:
  iteration += 1
  inputs, labels = data
  if torch.cuda.is_available():
    inputs = inputs.cuda()
    labels = labels.cuda()
  outputs = model(inputs)
  predictions = torch.argmax(outputs, axis=1)
  correct_labels = labels.squeeze().int()

  acc += (predictions == correct_labels).int().sum()/len(labels) * 100
print(acc/iteration)

tensor(79.68750000, device='cuda:0')


In [None]:
from torchvision.models import ResNet18_Weights, resnet18
weights = ResNet18_Weights.IMAGENET1K_V1
model = resnet18(weights=weights)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 114MB/s]


In [None]:
model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [None]:
type(weights.transforms()(img).unsqueeze(0))

torch.Tensor

In [None]:
from pathlib import Path

from PIL.Image import Image
from PIL.Image import open as pil_open

def load_img_from_path(file_path: Path) -> Image:
    """Loads an image from a file path."""
    with open(file_path, "rb+") as f:
        img = pil_open(f)
        # TODO: fix type hinting for load method
        img.load()  # type: ignore
    return img

In [None]:
img = load_img_from_path(Path("/content/bird.JPEG"))

In [None]:
with open(Path("/content/bird.JPEG"), "rb") as f:
  print(f)

ValueError: bad mode 'rb'

In [None]:
/content/bird.JPEG