In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
#from google.colab import drive 
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import models
from torch.autograd import Variable

from pathlib import Path
import random
import cv2
import math
from tqdm import trange
from time import sleep
import datetime
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
use_gpu = torch.cuda.is_available()

In [None]:
torch.backends.cudnn.benchmark = True
torch.backends.cudnn.enabled = True

In [2]:
torch.cuda.memory_cached()

0

In [None]:
torch.cuda.max_memory_cached()

In [3]:
def read_image(path):
    im = cv2.imread(str(path))
    return cv2.cvtColor(im, cv2.COLOR_BGR2RGB)

def crop(im, r, c, target_r, target_c): return im[r:r+target_r, c:c+target_c]

def center_crop(im, min_sz=None):
    #""" Returns a center crop of an image"""
    r,c,*_ = im.shape
    if min_sz is None: min_sz = min(r,c)
    start_r = math.ceil((r-min_sz)/2)
    start_c = math.ceil((c-min_sz)/2)
    return crop(im, start_r, start_c, min_sz, min_sz)

def random_crop(x, target_r, target_c):
    #""" Returns a random crop"""
    r,c,*_ = x.shape
    rand_r = random.uniform(0, 1)
    rand_c = random.uniform(0, 1)
    start_r = np.floor(rand_r*(r - target_r)).astype(int)
    start_c = np.floor(rand_c*(c - target_c)).astype(int)
    return crop(x, start_r, start_c, target_r, target_c)

def rotate_cv(im, deg, mode=cv2.BORDER_REFLECT, interpolation=cv2.INTER_AREA):
    #""" Rotates an image by deg degrees"""
    r,c,*_ = im.shape
    M = cv2.getRotationMatrix2D((c/2,r/2),deg,1)
    return cv2.warpAffine(im,M,(c,r), borderMode=mode, 
                          flags=cv2.WARP_FILL_OUTLIERS+interpolation)
    
def normalize(im):
    #"""Normalizes images with Imagenet stats."""
    imagenet_stats = np.array([[0.485, 0.456, 0.406], [0.229, 0.224, 0.225]])
    return (im/255.0 - imagenet_stats[0])/imagenet_stats[1]

def apply_transforms(x, sz=(42, 42), zoom=1.05):
    #""" Applies a random crop, rotation"""
    sz1 = int(zoom*sz[0])
    sz2 = int(zoom*sz[1])
    x = cv2.resize(x, (sz1, sz2))
    x = rotate_cv(x, np.random.uniform(-10,10))
    x = random_crop(x, sz[1], sz[0])
    if np.random.rand() >= .5:
         x = np.fliplr(x).copy()
    return x

def denormalize(img):
  imagenet_stats = np.array([[0.485, 0.456, 0.406], [0.229, 0.224, 0.225]])
  return img*imagenet_stats[1] + imagenet_stats[0]

In [4]:
#PATH = Path('CUB_200_2011')
PATH = Path('MixedDataset1/')
labels = pd.read_csv(PATH/"image_class_labels.txt", header=None, sep=" ")
labels.columns = ["id", "label"]
print("Labels:")
print(labels.head(2))
print("Data Stats:")
print(labels.describe())
classes = pd.read_csv(PATH/"classes.txt", header=None, sep=" ")
classes.columns = ["id", "class"]

Labels:
   id  label
0   1      1
1   2      1
Data Stats:
                 id         label
count  14264.000000  14264.000000
mean    7132.500000    124.978617
std     4117.806455     74.287425
min        1.000000      1.000000
25%     3566.750000     62.000000
50%     7132.500000    122.000000
75%    10698.250000    182.000000
max    14264.000000    276.000000


In [5]:
#https://debuggercafe.com/image-augmentation-using-pytorch-and-albumentations/
import torchvision.transforms as transforms
transform_train = torchvision.transforms.Compose(
            [   transforms.ToPILImage(),
                transforms.Resize([42, 42]),
                transforms.RandomCrop(42),
                transforms.RandomHorizontalFlip(),
                transforms.RandomRotation(degrees=(90,270)),
                transforms.ColorJitter(), #Randomly change the brightness, contrast and saturation of an image.
                transforms.RandomGrayscale(p=0.1),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                  std=[0.229, 0.224, 0.225])])


transform_test=torchvision.transforms.Compose(
            [transforms.ToPILImage(),
             transforms.Resize([84, 84]),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                  std=[0.229, 0.224, 0.225])])
        

In [6]:
train_test = pd.read_csv(PATH/"train_test_split.txt", header=None, sep=" ")
train_test.columns = ["id", "is_train"]

#images = list of images and names of classes 
images = pd.read_csv(PATH/"images.txt", header=None, sep=" ")
images.columns = ["id", "name"]
class CUB(Dataset):
    def __init__(self, files_path, labels, train_test, image_name, train=True, 
                 transform=False):  
        self.files_path = files_path
        self.labels = labels
        self.transform = transform
        self.train_test = train_test
        self.image_name = image_name      
        if train:
          mask = self.train_test.is_train.values == 1       
        else:
          mask = self.train_test.is_train.values == 0      
        self.filenames = self.image_name.iloc[mask]
        self.labels = self.labels[mask]
        self.num_files = self.labels.shape[0]
    def __len__(self):
        return self.num_files
    def __getitem__(self, index):
        y = self.labels.iloc[index,1] - 1
        
        file_name = self.filenames.iloc[index, 1]
        path = self.files_path/'images'/file_name
        x = read_image(path)
        if self.transform:
             x = apply_transforms(x)
             #x=transform_train(x)
        else:
            x = cv2.resize(x, (42,42))
            #x = transform_test(x)
        x = normalize(x)
        x =  np.rollaxis(x, 2) # To meet torch's input specification(c*H*W) 
        return x,y

In [7]:
train_dataset = CUB(PATH, labels, train_test, images, train= True, transform= True)
valid_dataset = CUB(PATH, labels, train_test, images, train= False, transform= False)
train_loader = DataLoader(train_dataset, batch_size=len(train_dataset), num_workers=0)
valid_loader = DataLoader(valid_dataset, batch_size=len(valid_dataset), num_workers=0)
print(torch.cuda.memory_allocated(device=None))

0


In [8]:
for i, data in enumerate(valid_loader, 0):
#  print(i)
#    # get the inputs; data is a list of [inputs, labels]
  testx, testy = data

for i, data in enumerate(train_loader, 0):
#    # get the inputs; data is a list of [inputs, labels]
#  print(i)
  trainx, trainy = data


#testx = torch.load(PATH/'testx.pt')
#trainx = torch.load(PATH/'trainx.pt')
#testy = np.load(PATH/'testy.npy')
#trainy = np.load(PATH/'trainy.npy')

In [9]:
trainy = trainy.numpy()
testy = testy.numpy()
testx=testx.float()
trainx=trainx.float()


In [None]:
torch.cuda.memory_cached()

In [None]:
#dataloader_iterator = iter(valid_loader)
#testx, testy = next(dataloader_iterator)

#dataloader_iterator = iter(train_loader)
#trainx, trainy = next(dataloader_iterator)

#trainy = trainy.numpy()
#testy = testy.numpy()
#testx=testx.float()
#trainx=trainx.float()

In [10]:
testx.shape, testy.shape, trainx.shape, trainy.shape, len(np.unique(trainy)), len(np.unique(testy))

(torch.Size([6974, 3, 42, 42]),
 (6974,),
 torch.Size([7290, 3, 42, 42]),
 (7290,),
 140,
 136)

In [11]:
class Net(nn.Module):
    """
    Image2Vector CNN which takes image of dimension (42x42x3) and return column vector length 224
    """
    def sub_block(self, in_channels, out_channels, kernel_size=3):
        block = torch.nn.Sequential(
                    torch.nn.Conv2d(kernel_size=kernel_size, in_channels=in_channels, out_channels=out_channels, padding=1),
                    torch.nn.BatchNorm2d(out_channels),
                    torch.nn.ReLU(),
                    torch.nn.MaxPool2d(kernel_size=2)
                )
        return block
    
    def __init__(self):
        super(Net, self).__init__()
        self.convnet1 = self.sub_block(3,28)
        self.convnet2 = self.sub_block(28,64)
        self.convnet3 = self.sub_block(64,128)
        self.convnet4 = self.sub_block(128,224)
        self.convnet5 = self.sub_block(224,312)

    def forward(self, x):
        x = self.convnet1(x)
        x = self.convnet2(x)
        x = self.convnet3(x)
        x = self.convnet4(x)
        x = self.convnet5(x)
        x = torch.flatten(x, start_dim=1)
        return x

In [12]:
class PrototypicalNet(nn.Module):
    def __init__(self, use_gpu=False):
        super(PrototypicalNet, self).__init__()
        self.f=Net()
        #self.f=resnet18() ### ResNet 
        self.gpu = use_gpu
        if self.gpu:
            self.f = self.f.cuda()
    
    def forward(self, datax, datay, Ns,Nc, Nq, total_classes):
        """
        Implementation of one episode in Prototypical Net
        datax: Training images
        datay: Corresponding labels of datax
        Nc: Number  of classes per episode
        Ns: Number of support data per class
        Nq:  Number of query data per class
        total_classes: Total classes in training set
        """
        k = total_classes.shape[0]
        K = np.random.choice(total_classes, Nc, replace=False)
        Query_x = torch.Tensor()
        if(self.gpu):
            Query_x = Query_x.cuda()
        Query_y = []
        Query_y_count = []
        centroid_per_class  = {}
        class_label = {}
        label_encoding = 0
        for cls in K:
            S_cls, Q_cls = self.random_sample_cls(datax, datay, Ns, Nq, cls)
            centroid_per_class[cls] = self.get_centroid(S_cls, Ns)
            class_label[cls] = label_encoding
            label_encoding += 1
            Query_x = torch.cat((Query_x, Q_cls), 0) # Joining all the query set together
            Query_y += [cls]
            Query_y_count += [Q_cls.shape[0]]
        Query_y, Query_y_labels = self.get_query_y(Query_y, Query_y_count, class_label)
        Query_x = self.get_query_x(Query_x, centroid_per_class, Query_y_labels)
        return Query_x, Query_y
    
    def random_sample_cls(self, datax, datay, Ns, Nq, cls):
        """
        Randomly samples Ns examples as support set and Nq as Query set
        """
        data = datax[(datay == cls).nonzero()]
        perm = torch.randperm(data.shape[0])
        idx = perm[:Ns]
        S_cls = data[idx]
        idx = perm[Ns : Ns+Nq]
        Q_cls = data[idx]
        if self.gpu:
            S_cls = S_cls.cuda()
            Q_cls = Q_cls.cuda()
        return S_cls, Q_cls  #return support set and Query set
    
    def get_centroid(self, S_cls, Nc):
        """
        Returns a centroid vector of support set for a class
        """
        #use f *** !!!
        return torch.sum(self.f(S_cls), 0).unsqueeze(1).transpose(0,1) / Nc
    
    def get_query_y(self, Qy, Qyc, class_label):
        """
        Returns labeled representation of classes of Query set and a list of labels.
        """
        labels = []
        m = len(Qy)
        for i in range(m):
            labels += [Qy[i]] * Qyc[i]
        labels = np.array(labels).reshape(len(labels), 1)
        label_encoder = LabelEncoder()
        Query_y = torch.Tensor(label_encoder.fit_transform(labels).astype(int)).long()
        if self.gpu:
            Query_y = Query_y.cuda()
        Query_y_labels = np.unique(labels)
        return Query_y, Query_y_labels
    
    def get_centroid_matrix(self, centroid_per_class, Query_y_labels):
        """
        Returns the centroid matrix where each column is a centroid of a class.
        """
        centroid_matrix = torch.Tensor()
        if(self.gpu):
            centroid_matrix = centroid_matrix.cuda()
        for label in Query_y_labels:
            centroid_matrix = torch.cat((centroid_matrix, centroid_per_class[label]))
        if self.gpu:
            centroid_matrix = centroid_matrix.cuda()
        return centroid_matrix
    
    def get_query_x(self, Query_x, centroid_per_class, Query_y_labels):
        """
        Returns distance matrix from each Query image to each centroid.
        """
        centroid_matrix = self.get_centroid_matrix(centroid_per_class, Query_y_labels)
        Query_x = self.f(Query_x)  #use f *****
        m = Query_x.size(0)
        n = centroid_matrix.size(0)
        # The below expressions expand both the matrices such that they become compatible to each other in order to caclulate L2 distance.
        centroid_matrix = centroid_matrix.expand(m, centroid_matrix.size(0), centroid_matrix.size(1)) # Expanding centroid matrix to "m".
        Query_matrix = Query_x.expand(n, Query_x.size(0), Query_x.size(1)).transpose(0,1) # Expanding Query matrix "n" times
        Qx = torch.pairwise_distance(centroid_matrix.transpose(1,2), Query_matrix.transpose(1,2))
        return Qx

In [None]:
protonet = PrototypicalNet(use_gpu=use_gpu)#use_gpu=use_gpu)

In [13]:
protonet = PrototypicalNet(use_gpu=use_gpu)#use_gpu=use_gpu)
optimizer = optim.SGD(protonet.parameters(), lr = 0.01, momentum=0.99)
#lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5)


In [41]:
print(torch.cuda.memory_allocated(device=None))

223773184


In [42]:
torch.cuda.empty_cache()

In [43]:
torch.cuda.max_memory_cached()

1447034880

In [44]:
torch.cuda.memory_cached()

385875968

In [18]:
def train_step(datax, datay, Ns,Nc, Nq):
    optimizer.zero_grad() #if need this for pretrained model ???
    Qx, Qy= protonet(datax, datay, Ns, Nc, Nq, np.unique(datay))
    pred = torch.log_softmax(Qx, dim=-1)
    loss = F.nll_loss(pred, Qy)
    loss.backward()
    optimizer.step()
    #lr_scheduler.step()
    acc = torch.mean((torch.argmax(pred, 1) == Qy).float())
    return loss, acc

In [19]:
import os.path as osp
save_path='save/proto_mix'
def save_model(name):
    torch.save(protonet.state_dict(), osp.join(save_path, name + '.pth'))

In [20]:
num_episode = 8000 #3000+
frame_size = 1000

In [None]:
#https://blog.paperspace.com/pytorch-memory-multi-gpu-debugging/

In [51]:
trlog = {}
    #trlog['args'] = vars(args)
trlog['train_loss'] = []
#trlog['val_loss'] = []
trlog['train_acc'] = []
#trlog['val_acc'] = []
trlog['max_acc'] = 0.0
    
frame_loss = 0
frame_acc = 0
losses = []

#model=protonet
#load='./save/proto_mix/max-acc.pth'  
#model.load_state_dict(torch.load(load))

for i in range(num_episode):
    
    if use_gpu:
        trainx = trainx.cuda() 
    loss, acc = train_step(trainx, trainy, 5, 20, 15)
    
    frame_loss += loss.data
    frame_acc += acc.data
    losses.append(loss)
    
    if( (i+1) % frame_size == 0):
        print(datetime.datetime.now(),"Frame Number:", ((i+1) // frame_size), 'Frame Loss: ', frame_loss.data.cpu().numpy().tolist()/ frame_size, 'Frame Accuracy:', (frame_acc.data.cpu().numpy().tolist() * 100) / frame_size)
        if frame_acc > trlog['max_acc']:
              trlog['max_acc'] = frame_acc
        save_model('max-acc')
            
        frame_loss = 0
        frame_acc = 0
    
                

    #trlog['train_loss'].append(tl)
    #trlog['train_acc'].append(ta)
        #trlog['val_loss'].append(vl)
        #trlog['val_acc'].append(va)

    torch.save(trlog, osp.join(save_path, 'trlog'))

    save_model('epoch-last')
        
    save_frame=1000
    if i % save_frame == 0:
        save_model('epoch-{}'.format(i))
    
    del acc, loss
    torch.cuda.empty_cache()

2020-05-07 19:05:57.843223 Frame Number: 1 Frame Loss:  0.24372451782226562 Frame Accuracy: 92.82457275390625


KeyboardInterrupt: 

In [53]:
torch.cuda.empty_cache()

In [54]:
torch.cuda.memory_cached()

385875968

In [34]:
def test_step(datax, datay, Ns,Nc, Nq):
    #model=protonet
    with torch.no_grad():
        Qx, Qy= protonet(datax, datay, Ns, Nc, Nq, np.unique(datay))
        pred = torch.log_softmax(Qx, dim=-1)
        loss = F.nll_loss(pred, Qy)
        acc = torch.mean((torch.argmax(pred, 1) == Qy).float())
        return loss, acc

In [65]:
num_test_episode = 1000 #2000+
frame_size = 100

In [68]:
avg_loss = 0
avg_acc = 0
#protonet.eval()
test_losses = []
#model=protonet
#load='./save/proto_mix/max-acc.pth'  
#model.load_state_dict(torch.load(load))

for i in range(num_test_episode):
    #if use_gpu:
        #testx = testx.cuda() 
    loss, acc = test_step(testx, testy, 5, 5, 15)
    frame_loss += loss.data
    frame_acc += acc.data
    
    if( (i+1) % frame_size == 0):
        print(datetime.datetime.now(),"Frame Number:", ((i+1) // frame_size), 'Frame Loss: ', frame_loss.data.cpu().numpy().tolist()/ frame_size, 'Frame Accuracy:', (frame_acc.data.cpu().numpy().tolist() * 100) / frame_size)
    avg_loss += loss.data
    avg_acc += acc.data
    #test_losses.append(loss)
    #del acc, loss
    torch.cuda.empty_cache()
    
print('Avg Loss: ', avg_loss.data.cpu().numpy().tolist() / num_test_episode , 'Avg Accuracy:', (avg_acc.data.cpu().numpy().tolist() * 100) / num_test_episode)


2020-05-07 19:14:53.137457 Frame Number: 1 Frame Loss:  1.767197265625 Frame Accuracy: 409.73577880859375
2020-05-07 19:14:56.026379 Frame Number: 2 Frame Loss:  3.01162109375 Frame Accuracy: 469.5752258300781
2020-05-07 19:14:58.911997 Frame Number: 3 Frame Loss:  4.1874560546875 Frame Accuracy: 531.5953979492188
2020-05-07 19:15:01.789832 Frame Number: 4 Frame Loss:  5.373756713867188 Frame Accuracy: 593.8726806640625
2020-05-07 19:15:04.534697 Frame Number: 5 Frame Loss:  6.604613037109375 Frame Accuracy: 654.0866088867188
2020-05-07 19:15:07.355679 Frame Number: 6 Frame Loss:  7.8074407958984375 Frame Accuracy: 715.1099243164062
2020-05-07 19:15:10.176715 Frame Number: 7 Frame Loss:  8.964473266601562 Frame Accuracy: 777.2021484375
2020-05-07 19:15:13.053482 Frame Number: 8 Frame Loss:  10.1263134765625 Frame Accuracy: 839.2717895507812
2020-05-07 19:15:15.941803 Frame Number: 9 Frame Loss:  11.322021484375 Frame Accuracy: 899.3596801757812
2020-05-07 19:15:18.771217 Frame Number: 

In [None]:
#test2000: 61.228% test acc with 70.5% training acc

#test2000: 60.7   train 80%

In [None]:
#resnet : 55%

In [None]:
plt.figure(figsize=(10,5))
plt.title(" Losses")
plt.plot(losses,label="Train")
plt.plot(test_losses,label="Test")
plt.xlabel("Episode")
plt.ylabel("Loss")
plt.legend()
plt.show()

In [None]:
#TODO resnet34

In [None]:
small net: D=64 , acc: 50%
Big net:resize-42, D=224, acc: 58%
        with color-based+rotation augmentation: ..., take longer to train
TDDO: resize:84