### Import Libraries

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import os

os.chdir("/content/drive/MyDrive/Courses/CSCI566-DLA/Shashank/")

Mounted at /content/drive


In [None]:
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
from tqdm import tqdm
from torch.optim import Adam
from torchvision.datasets import MNIST
from torchvision.datasets import FashionMNIST
from torchvision.datasets import CIFAR10
from torchvision.datasets import CIFAR100
from torchvision.datasets import SVHN
from torchvision.transforms import Compose, ToTensor, Normalize, Lambda
from torch.utils.data import DataLoader
from torch.utils.data.dataset import TensorDataset
from sklearn.metrics import accuracy_score, f1_score
import sys
import os
import time
import logging

In [None]:

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
def overlay_y_on_x(x, y, num_labels):
    """Replace the first num_label pixels of data [x] with one-hot-encoded label [y]
    """
    x_ = x.clone()
    x_[:, :num_labels] *= 0.0
    x_[range(x.shape[0]), y] = x.max()
    return x_

In [None]:
class Convolutional_layer(nn.Module):

  def __init__(self, input_size: tuple, output_size: int, kernel_size: int, padding: int):
    """
      input_size: n_h, n_w, n_c
      output_size: int
      patch_size: int
    """
    super().__init__()
    self.input_size = input_size
    self.patch_size = kernel_size
    self.output_size = output_size

    n_h, n_w, n_c = input_size
    patch_dim = n_h // kernel_size
    self.patch_embedding = nn.Conv2d(n_c, output_size, kernel_size=kernel_size, stride=1, padding=padding, device=device)

    self.lrelu = nn.ReLU() #nn.LeakyReLU()

  def __normalize(self, x):
    x_shape = x.shape
    x = x.reshape(x_shape[0], -1)
    x = x / (x.norm(2, 1, keepdim=True) + 1e-4)
    x = x.reshape(x_shape)
    return x

  def __reshape(self, x):
    x = x.reshape(x.size(0), self.input_size[2], self.input_size[0], self.input_size[1])
    return x

  def forward(self, x):
    x = self.__normalize(x) 
    # print(x.shape,'normalized')
    x = self.__reshape(x) # (batch_size, channel_size, height, width)
    x = self.patch_embedding(x) # (batch_size, 64, patch_dim, patch_dim)
    # print(x.shape,'conv2d')


    # x = self.__normalize(x)
    x = self.lrelu(x)
    # print(x.shape,'relu')

    return x
  
  def train_ff(self, train_loader, epoch_range, batch_size, lr=0.03, threshold=2.0):
    optimizer = torch.optim.Adam(self.parameters(), lr=lr)
    next_pos = []
    next_neg = []
    next_label = []
    for e in epoch_range:
      for (x_pos, x_neg, label) in train_loader:
        x_pos = x_pos.to(device)
        x_neg = x_neg.to(device)
        g_pos = self.forward(x_pos).pow(2).mean(dim=1)
        g_neg = self.forward(x_neg).pow(2).mean(dim=1)

        loss = torch.log(1 + torch.exp(
            torch.cat([
                threshold - g_pos,
                g_neg - threshold
            ])
        )).mean()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        x_pos = x_pos.detach()
        x_neg = x_neg.detach()

    for (x_pos, x_neg, label) in train_loader:
      x_pos = x_pos.to(device)
      x_neg = x_neg.to(device)    
      x_pos_next = self.forward(x_pos).detach()
      x_neg_next = self.forward(x_neg).detach()
      next_pos.append(x_pos_next)
      next_neg.append(x_neg_next)
      next_label.append(label)
      x_pos = x_pos.detach()
      x_neg = x_neg.detach()
    
    next_pos = torch.cat(next_pos, dim=0)
    next_neg = torch.cat(next_neg, dim=0)
    next_label = torch.cat(next_label, dim=0)


    return DataLoader(TensorDataset(
        next_pos,
        next_neg,
        next_label
    ), batch_size=batch_size, shuffle=True)
    


In [None]:
class Flatten_layer(nn.Flatten):
  def train_ff(self,train_loader, epoch_range, batch_size, lr=0.03, threshold=15.0):
      next_pos = []
      next_neg = []
      next_label = []
      for (x_pos, x_neg, label) in train_loader:
        # print(x_pos.shape, 'xpos')
        # print(x_neg.shape, 'xneg')
        # print(label.shape, 'label')
        x_pos = x_pos.to(device)
        x_neg = x_neg.to(device)
        flat_pos = self.forward(x_pos)
        flat_neg = self.forward(x_neg)

        next_pos.append(flat_pos)
        next_neg.append(flat_neg)
        next_label.append(label)

        x_pos = x_pos.detach()
        x_neg = x_neg.detach()

        # print(flat_pos.shape, 'xpos')
        # print(flat_neg.shape, 'xneg')

      next_pos = torch.cat(next_pos, dim=0)
      next_neg = torch.cat(next_neg, dim=0)
      next_label = torch.cat(next_label, dim=0)

      return DataLoader(TensorDataset(
        next_pos,
        next_neg,
        next_label
    ), batch_size=batch_size, shuffle=True)
    

class MaxPool_layer(nn.MaxPool2d):

  def train_ff(self,train_loader, epoch_range, batch_size, lr=0.03, threshold=2.0):
    next_pos = []
    next_neg = []
    next_label = []
    for (x_pos, x_neg, label) in train_loader:
        # print(x_pos.shape, 'xpos')
        # print(x_neg.shape, 'xneg')
        # print(label.shape, 'label')
        x_pos = x_pos.to(device)
        x_neg = x_neg.to(device)
        flat_pos = self.forward(x_pos)
        flat_neg = self.forward(x_neg)

        next_pos.append(flat_pos)
        next_neg.append(flat_neg)
        next_label.append(label)

        x_pos = x_pos.detach()
        x_neg = x_neg.detach()

        # print(flat_pos.shape, 'xpos')
        # print(flat_neg.shape, 'xneg')

    next_pos = torch.cat(next_pos, dim=0)
    next_neg = torch.cat(next_neg, dim=0)
    next_label = torch.cat(next_label, dim=0)

    return DataLoader(TensorDataset(
      next_pos,
      next_neg,
      next_label
  ), batch_size=batch_size, shuffle=True)
    

In [None]:
class Linear_layer(nn.Linear):

  def __init__(self, in_features, out_features,
                 bias=True, device=None, dtype=None):
    super().__init__(in_features, out_features, bias, device, dtype)
    self.relu = torch.nn.ReLU()

  def forward(self, x):
      x_direction = x / (x.norm(2, 1, keepdim=True) + 1e-4)
      # print(x_direction.shape)
      return self.relu(
          torch.mm(x_direction, self.weight.T) +
          self.bias.unsqueeze(0))

  def train_ff(self, train_loader, epoch_range, batch_size, lr=0.03, threshold=2.0):
    optimizer = torch.optim.Adam(self.parameters(), lr=lr)
    next_pos = []
    next_neg = []
    next_label = []
    # for (x_pos, x_neg, label) in train_loader:
    #   x_pos = x_pos.to(device)
    #   x_neg = x_neg.to(device)
      # for i in epoch_range:
    for i in epoch_range:
      
      for (x_pos, x_neg, label) in train_loader:
        x_pos = x_pos.to(device)
        x_neg = x_neg.to(device)
        g_pos = self.forward(x_pos).pow(2).mean(1)
        g_neg = self.forward(x_neg).pow(2).mean(1)
          
        loss = torch.log(1 + torch.exp(torch.cat([
            -g_pos + threshold,
            g_neg - threshold]))).mean()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        x_pos = x_pos.detach()
        x_neg = x_neg.detach()
    
    for (x_pos, x_neg, label) in train_loader:
      x_pos = x_pos.to(device)
      x_neg = x_neg.to(device)  
      x_pos_next = self.forward(x_pos).detach()
      x_neg_next = self.forward(x_neg).detach()
      next_pos.append(x_pos_next)
      next_neg.append(x_neg_next)
      next_label.append(label)
      x_pos = x_pos.detach()
      x_neg = x_neg.detach()
  
    next_pos = torch.cat(next_pos, dim=0)
    next_neg = torch.cat(next_neg, dim=0)
    next_label = torch.cat(next_label, dim=0)
    
    return DataLoader(TensorDataset(
        next_pos,
        next_neg,
        next_label
    ), batch_size=batch_size, shuffle=True)

In [None]:
class Net(nn.Module):

  def __init__(self, layers, num_labels):
    super().__init__()
    self.layers = layers
    self.num_labels = num_labels

  def train_ff(self, train_loader, epochs=1000, **kwargs):
    cur_train_loader = train_loader
    batch_size = train_loader.batch_size
    for i, layer in enumerate(self.layers):
      
      is_large_batch = len(cur_train_loader) >= 5
      print(f"Training layer: {i+1} ... tqdm: {'loader' if not is_large_batch else 'epoch'}")

      cur_train_loader = tqdm(cur_train_loader) if not is_large_batch else cur_train_loader
      epoch_range = tqdm(range(epochs)) if is_large_batch else range(epochs)
      cur_train_loader = layer.train_ff(cur_train_loader, epoch_range=epoch_range, batch_size=batch_size, **kwargs)
      
  def forward(self, x):
    for layer in self.layers:
      x = layer(x)
    return x

  def predict_ff(self, data_loader):

    def predict(layers, x, num_labels):
      goodness_per_label = []
      for label in range(num_labels):
          h = overlay_y_on_x(x, label, num_labels)
          goodness = []
          for i, layer in enumerate(layers):
                h = layer(h)
                if(i != 0 and i !=1 and i != 3 and i != 4):
                  goodness += [h.pow(2).mean(1)]
          goodness_per_label += [sum(goodness).unsqueeze(1)]
      goodness_per_label = torch.cat(goodness_per_label, 1)
      return goodness_per_label.argmax(1)
    
    preds = []
    labels = []
    for x, label in data_loader:
      x = x.to(device)
      preds.append(predict(self.layers, x, self.num_labels))
      labels.append(label)

    preds = torch.cat(preds, 0)
    labels = torch.cat(labels, 0)
    return preds.cpu(), labels.cpu()

  def predict_bp(self, data_loader):
    preds = []
    labels = []
    for input, label in data_loader:
      input = input.to(device)
      pred = self.forward(input)
      preds.append(pred.argmax(1))
      labels.append(label)
    preds = torch.cat(preds, 0)
    labels = torch.cat(labels,0)
    return preds.cpu(), labels.cpu()

In [None]:
def MNIST_dataset():
  transform = Compose([
        ToTensor(),
        Normalize((0.1307,), (0.3081,)),
        Lambda(lambda x: torch.flatten(x))])
  train_data = MNIST('./data/MNIST/', train=True,
              download=True,
              transform=transform)
  test_data = MNIST('./data/MNIST/', train=False,
              download=True,
              transform=transform)
  return train_data, test_data

mnist_train_data, mnist_test_data = MNIST_dataset()
print(mnist_train_data, mnist_test_data)

Dataset MNIST
    Number of datapoints: 60000
    Root location: ./data/MNIST/
    Split: Train
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=(0.1307,), std=(0.3081,))
               Lambda()
           ) Dataset MNIST
    Number of datapoints: 10000
    Root location: ./data/MNIST/
    Split: Test
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=(0.1307,), std=(0.3081,))
               Lambda()
           )


In [None]:
def create_ff_train_dataset(train_loader, num_labels):
  pos_set = []
  neg_set = []
  label_set = []
  for input, label in tqdm(train_loader):
    x_pos = overlay_y_on_x(input, label, num_labels)
    rnd = torch.randperm(input.size(0))
    x_neg = overlay_y_on_x(input, label[rnd], num_labels)
    pos_set.append(x_pos)
    neg_set.append(x_neg)
    label_set.append(label)
  pos_set = torch.cat(pos_set, 0)
  neg_set = torch.cat(neg_set, 0)
  label_set = torch.cat(label_set, 0)
  return TensorDataset(pos_set, neg_set, label_set)

def create_ff_val_dataset(val_loader):
  inputs = []
  labels = []

  for input, label in tqdm(val_loader):
    inputs.append(input)
    labels.append(label)
  inputs = torch.cat(inputs, 0)
  labels = torch.cat(labels, 0)
  return TensorDataset(inputs, labels)

In [None]:
train_dataset = create_ff_train_dataset(DataLoader(mnist_train_data, batch_size=1024, shuffle=False), 10)

100%|██████████| 59/59 [00:14<00:00,  3.97it/s]


In [None]:
test_dataset = create_ff_val_dataset(DataLoader(mnist_test_data, batch_size=1024, shuffle=False))

100%|██████████| 10/10 [00:02<00:00,  4.08it/s]


In [None]:
layers = [
    # Convolutional_layer((28, 28, 1), 6, 3),
    # Flatten_layer(),
    # Linear_layer(26*26*6, 1225, device=device),
    # Convolutional_layer((35,35,1),16,5),
    # Flatten_layer(),
    # Linear_layer(31*31*16, 500, device=device)
    Convolutional_layer((28, 28, 1), 6, kernel_size = 5, padding=2),
    MaxPool_layer(kernel_size=2, stride=2),
    Flatten_layer(),
    Convolutional_layer((14,14,6),16, kernel_size=5, padding =0),
    MaxPool_layer(kernel_size=2, stride=2),
    Flatten_layer(),
    Linear_layer(400, 2000, device=device),
    Linear_layer(2000, 2000, device=device)
]
net = Net(layers, 10)

In [None]:
net.train_ff(DataLoader(train_dataset, batch_size=512, shuffle=False), epochs=1000, lr=0.02)

Training layer: 1 ... tqdm: epoch


100%|██████████| 1000/1000 [15:40<00:00,  1.06it/s]


Training layer: 2 ... tqdm: epoch


  0%|          | 0/1000 [00:00<?, ?it/s]

Training layer: 3 ... tqdm: epoch



  0%|          | 0/1000 [00:00<?, ?it/s]


Training layer: 4 ... tqdm: epoch


  0%|          | 0/1000 [00:00<?, ?it/s]
100%|██████████| 1000/1000 [14:23<00:00,  1.16it/s]


Training layer: 5 ... tqdm: epoch


  0%|          | 0/1000 [00:00<?, ?it/s]

Training layer: 6 ... tqdm: epoch



  0%|          | 0/1000 [00:00<?, ?it/s]


Training layer: 7 ... tqdm: epoch


  0%|          | 0/1000 [00:00<?, ?it/s]
100%|██████████| 1000/1000 [14:10<00:00,  1.18it/s]


Training layer: 8 ... tqdm: epoch


100%|██████████| 1000/1000 [14:12<00:00,  1.17it/s]


In [None]:
pred, true = net.predict_ff(DataLoader(test_dataset, batch_size=512, shuffle=False))

In [None]:
accuracy_score(true, pred)

0.1075

In [None]:
f1_score(true, pred, average=None)

array([0.        , 0.21314554, 0.03621399, 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.01830664])