In [None]:
!nvidia-smi

In [None]:
import os
#os.environ["CUDA_VISIBLE_DEVICES"]="0"

### Importing Required Libraries

In [None]:
!pip install torch==1.4.0

In [None]:
!pip install torchvision==0.5.0

In [None]:
# %reload_ext autoreload
# %autoreload 2
# %matplotlib inline

import pandas as pd
import numpy as np
import torch
from pathlib import Path
from torch.utils.data import Dataset, DataLoader, random_split, sampler
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models, datasets, models
import random
import cv2
import matplotlib.pyplot as plt
from PIL import Image
import os
import math
import xml.etree.ElementTree as ET

In [None]:
import torch
import torchvision
from tqdm import tqdm
#from torchsummary import summary

In [None]:
SEED = 222

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(SEED)

In [None]:
torch.__version__

In [None]:
def crop_image(breed, dog, data_dir):
    
    img = plt.imread(data_dir + 'images/Images/' + breed + '/' + dog + '.jpg')
    tree = ET.parse(data_dir + 'annotations/Annotation/' + breed + '/' + dog)
    xmin = int(tree.getroot().findall('object')[0].find('bndbox').find('xmin').text)
    xmax = int(tree.getroot().findall('object')[0].find('bndbox').find('xmax').text)
    ymin = int(tree.getroot().findall('object')[0].find('bndbox').find('ymin').text)
    ymax = int(tree.getroot().findall('object')[0].find('bndbox').find('ymax').text)
    img = img[ymin:ymax, xmin:xmax, :]
    return img

In [None]:
data_dir = '/kaggle/input/stanford-dogs-dataset/'
breed_list = os.listdir(data_dir + 'images/Images/')

plt.figure(figsize=(20, 20))

for i in range(4):
    
    plt.subplot(421 + (i*2))
    
    breed = np.random.choice(breed_list)
    dog = np.random.choice(os.listdir(data_dir + 'annotations/Annotation/' + breed))
    img = plt.imread(data_dir + 'images/Images/' + breed + '/' + dog + '.jpg')
    plt.imshow(img)
    
    tree = ET.parse(data_dir + 'annotations/Annotation/' + breed + '/' + dog)
    xmin = int(tree.getroot().findall('object')[0].find('bndbox').find('xmin').text)
    xmax = int(tree.getroot().findall('object')[0].find('bndbox').find('xmax').text)
    ymin = int(tree.getroot().findall('object')[0].find('bndbox').find('ymin').text)
    ymax = int(tree.getroot().findall('object')[0].find('bndbox').find('ymax').text)
    
    plt.plot([xmin, xmax, xmax, xmin, xmin], [ymin, ymin, ymax, ymax, ymin])
    
    crop_img = crop_image(breed, dog, data_dir)
    print(crop_img.shape)
    plt.subplot(422 + (i*2))
    plt.imshow(crop_img)

In [None]:
if 'cropped_data' not in os.listdir():
    
    os.mkdir('cropped_data')
    
    for breed in breed_list:
        os.mkdir('cropped_data/' + breed)
    
print('Created {} folders to store cropped images of the different breeds.'.format(len(os.listdir('cropped_data'))))

In [None]:
if 'cropped_data_test' not in os.listdir():
    
    os.mkdir('cropped_data_test')
    
    for breed in breed_list:
        os.mkdir('cropped_data_test/' + breed)
    
print('Created {} folders to store cropped images of the different breeds.'.format(len(os.listdir('cropped_data_test'))))

In [None]:
import scipy.io
test_mat = scipy.io.loadmat('/kaggle/input/test-list2/test_list.mat')

test_files = set()

for i in range(test_mat["file_list"].shape[0]):
    test_files.add(test_mat["file_list"][i][0][0])

In [None]:
test_files

In [None]:
test_count = 0
for breed in tqdm(os.listdir('cropped_data')):    
    for file in os.listdir(data_dir + 'annotations/Annotation/' + breed):
                
        img = Image.open(data_dir + 'images/Images/' + breed + '/' + file + '.jpg')
        tree = ET.parse(data_dir + 'annotations/Annotation/' + breed + '/' + file)
        xmin = int(tree.getroot().findall('object')[0].find('bndbox').find('xmin').text)
        xmax = int(tree.getroot().findall('object')[0].find('bndbox').find('xmax').text)
        ymin = int(tree.getroot().findall('object')[0].find('bndbox').find('ymin').text)
        ymax = int(tree.getroot().findall('object')[0].find('bndbox').find('ymax').text)
        img = img.crop((xmin,ymin,xmax,ymax))
        img = img.convert('RGB')
        
        if breed+"/"+file+".jpg" in test_files:
            test_count += 1
            img.save('cropped_data_test/' + breed + '/' + file + '.jpg')
        else:
            img.save('cropped_data/' + breed + '/' + file + '.jpg')

In [None]:
# test_count

In [None]:
img_count = 0
file_name = "cropped_data_test/"
for folder in os.listdir(file_name):
    
    for _ in os.listdir(file_name + folder):    
        img_count += 1
    
print('No. of Images: {}'.format(img_count))

In [None]:
# Data Augmentation
batch_size = 128
image_size = 224

# image_transforms = {
    
#     'train':torchvision.transforms.Compose([
#             torchvision.transforms.Resize(size=(image_size, image_size)),
#             torchvision.transforms.RandomHorizontalFlip(),
#             torchvision.transforms.ToTensor(),
#             torchvision.transforms.Normalize(mean=(0.485, 0.456, 0.406),
#                                              std=(0.229, 0.224, 0.225))
#     ]),
#     'val':torchvision.transforms.Compose([
#             torchvision.transforms.Resize(size=(image_size, image_size)),
#             torchvision.transforms.ToTensor(),
#             torchvision.transforms.Normalize(mean=(0.485, 0.456, 0.406),
#                                              std=(0.229, 0.224, 0.225))
#         ])
# }

In [None]:
from torchvision import transforms

In [None]:
image_transforms = {
    # Train uses data augmentation
    'train':
    transforms.Compose([
        transforms.RandomResizedCrop(size=256, scale=(0.8, 1.0)),
        transforms.RandomRotation(degrees=15),
        transforms.ColorJitter(),
        transforms.RandomHorizontalFlip(),
        transforms.CenterCrop(size=224),  # Image net standards
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])  # Imagenet standards
    ]),
    # Validation does not use augmentation
    'val':
    transforms.Compose([
        transforms.Resize(size=256),
        transforms.CenterCrop(size=224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

In [None]:
# transform_pipe = torchvision.transforms.Compose([
#     torchvision.transforms.ToPILImage(), # Convert np array to PILImage
    
#     # Resize image to 224 x 224 as required by most vision models
#     torchvision.transforms.Resize(
#         size=(224, 224)
#     ),
    
#     # Convert PIL image to tensor with image values in [0, 1]
#     torchvision.transforms.ToTensor(),
    
#     torchvision.transforms.Normalize(
#         mean=[0.485, 0.456, 0.406],
#         std=[0.229, 0.224, 0.225]
#     )
# ])

In [None]:
train_dataset = datasets.ImageFolder(root='cropped_data')
test_dataset = datasets.ImageFolder(root='cropped_data_test')

In [None]:
test_dataset.classes, train_dataset.classes

In [None]:
train_dataset.transform

In [None]:
train_dataset.transform = image_transforms['train']
test_dataset.transform = image_transforms['val']

In [None]:
train_dataset, test_dataset

In [None]:
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

In [None]:
trainiter = iter(train_loader)
features, labels = next(trainiter)
print(features.shape, labels.shape)

In [None]:
from torch import FloatTensor

def new_parameter(*size):
    out = nn.Parameter(FloatTensor(*size), requires_grad=True)
    torch.nn.init.xavier_normal_(out)
    return out


class Attention(nn.Module):
    
    def __init__(self, attention_size):
        super(Attention, self).__init__()
        self.attention = new_parameter(attention_size, 1)

    def forward(self, x_in):
        # after this, we have (bs, feature_size, feature_size) with a diff weight per each cell
        attention_score = torch.matmul(x_in, self.attention).squeeze()
        attention_score = F.softmax(attention_score, dim=1).view(x_in.size(0), x_in.size(1), 1)
        
        return attention_score

In [None]:
features = 2048
fmap_size = 7

In [None]:
# resnet = models.resnet50()
# resnet.load_state_dict(torch.load('imagenet_models/resnet50-19c8e357.pth'), strict=False)

# # freezing parameters
# for param in resnet.parameters():
#     param.requires_grad = False

# layers = list(models.resnet50().children())[:-2]
# resnet = nn.Sequential(*layers).cuda()

# x = resnet(torch.randn(1, 3, image_size, image_size).cuda())
# x.shape

# N=2

# attention_row = Attention(features).cuda()
# attention_col = Attention(features).cuda()


# x = resnet(torch.randn(N, 3, image_size, image_size).cuda())

# x = x.view(N, fmap_size ** 2, features)
# print(x.shape)


# x = torch.bmm(torch.transpose(x, 1, 2), x)/ (fmap_size ** 2) 
# x = torch.sqrt(x + 1e-5)

# print(x.shape)

# y = attention_row(x)

# z = attention_col(x.permute(0, 2, 1))

# scored_y = x * y

# # now, sum across dim 1 to get the expected feature vector
# condensed_y = torch.sum(scored_y, dim=1)

# scored_z = x * z

# # now, sum across dim 1 to get the expected feature vector
# condensed_z = torch.sum(scored_z, dim=1)

# condensed_x = condensed_y * condensed_z

# print(condensed_y.shape, condensed_z.shape, condensed_x.shape)
# print(condensed_y, condensed_z, condensed_x)
# '''
# x = x * attn_int
# x = torch.sum(x, dim=1)
# print(x.shape)
# '''


In [None]:
x = torch.randn(2, 2048)

torch.cat([x, x], axis=1).shape

In [None]:
class CNN_Resnet(nn.Module):
    
    def __init__(self, fine_tune=False):
        
        super(CNN_Resnet, self).__init__()
        
        resnet = models.resnet50(pretrained=True)
        #resnet.load_state_dict(torch.load('imagenet_models/resnet50-19c8e357.pth'))
        
        self.attn_row =  Attention(features)
        self.attn_col =  Attention(features)
        
        
        # freezing parameters
        if not fine_tune:
            for param in resnet.parameters():
                param.requires_grad = False
        else:
            
            for param in resnet.parameters():
                param.requires_grad = True

        layers = list(resnet.children())[:-2]
        self.features = nn.Sequential(*layers).cuda()
        
        self.fc = nn.Linear(features * 2, 120)
        self.dropout = nn.Dropout(0.5)
        self.relu = nn.ReLU()
        
        # Initialize the fc layers.
        nn.init.xavier_normal_(self.fc.weight.data)
        
        if self.fc.bias is not None:
            torch.nn.init.constant_(self.fc.bias.data, val=0)
        
        
    def forward(self, x):
        
        ## X: bs, 3, 256, 256
        ## N = bs
        N = x.size()[0]
        
        ## x : bs, depth_size, fmap_size, fmap_size
        x = self.features(x)

        # bs, 
        x = x.view(N, features, fmap_size ** 2)
        
        
        x = torch.bmm(x, torch.transpose(x, 1, 2))/ (fmap_size ** 2) 
        x = torch.sqrt(x + 1e-5)
        

        y = self.attn_row(x)
        
        z = self.attn_col(x.permute(0, 2, 1))
        
        
        scored_y = x * y
        
        # now, sum across dim 1 to get the expected feature vector
        condensed_y = torch.sum(scored_y, dim=1)

        scored_z = x * z
        
        # now, sum across dim 1 to get the expected feature vector
        condensed_z = torch.sum(scored_z, dim=1)
        
        condensed_x = torch.cat([condensed_y, condensed_z], axis=1)
        
        x = self.dropout(condensed_x)
        x = self.fc(x)
        
        return x

In [None]:
## additive noise 

In [None]:
# model = CNN_Resnet().cuda()

# criterion = nn.CrossEntropyLoss()
# optimizer = optim.Adam(model.parameters(), lr=0.0001)


# lr_finder = LRFinder(model, optimizer, criterion, device="cuda")
# lr_finder.range_test(train_loader, end_lr=1, num_iter=100)
# lr_finder.plot()

In [None]:
model = CNN_Resnet().cuda()

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
model.class_to_idx = train_dataset.class_to_idx
model.idx_to_class = {
    idx: class_
    for class_, idx in model.class_to_idx.items()
}

list(model.idx_to_class.items())

In [None]:
def train(model, 
          criterion, 
          optimizer, 
          train_loader,
          val_loader, 
          save_location, 
          early_stop=3, 
          n_epochs=20, 
          print_every=1):

    #Initializing some variables
    valid_acc_max = 0
    stop_count = 0
    history = []
    model.epochs = 0

    #Loop starts here
    for epoch in range(n_epochs):
        
        train_loss = 0
        valid_loss = 0

        train_acc = 0
        valid_acc = 0

        model.train()
        
        ### batch control
        ii = 0
        
        for data, label in train_loader:
            
            ii += 1
            
            data, label = data.cuda(), label.cuda()
            output = model(data)
            
            loss = criterion(output, label)
            optimizer.zero_grad()
            
            loss.backward()
            optimizer.step()
            
            # Track train loss by multiplying average loss by number of examples in batch
            train_loss += loss.item() * data.size(0)
            
            
            # Calculate accuracy by finding max log probability
            # first output gives the max value in the row(not what we want), second output gives index of the highest val
            _, pred = torch.max(output, dim=1)
            
            # using the index of the predicted outcome above, torch.eq() will check prediction index against label index to see if prediction is correct(returns 1 if correct, 0 if not)
            correct_tensor = pred.eq(label.data.view_as(pred))
            
            #tensor must be float to calc average
            accuracy = torch.mean(correct_tensor.type(torch.FloatTensor))
            train_acc += accuracy.item() * data.size(0)
            
            if ii%10 == 0:
                print(f'Epoch: {epoch}\t{100 * (ii + 1) / len(train_loader):.2f}% complete.')
        
        model.epochs += 1
        
        with torch.no_grad():
            
            model.eval()
            
            for data, label in val_loader:
                
                data, label = data.cuda(), label.cuda()
                output = model(data)
                loss = criterion(output, label)
                valid_loss += loss.item() * data.size(0)
                
                _, pred = torch.max(output, dim=1)
                correct_tensor = pred.eq(label.data.view_as(pred))
                accuracy = torch.mean(correct_tensor.type(torch.FloatTensor))
                valid_acc += accuracy.item() * data.size(0)
            
            train_loss = train_loss / len(train_loader.dataset)
            valid_loss = valid_loss / len(val_loader.dataset)

            train_acc = train_acc / len(train_loader.dataset)
            valid_acc = valid_acc / len(val_loader.dataset)

            history.append([train_loss, valid_loss, train_acc, valid_acc])

            if (epoch + 1) % print_every == 0:
                
                print(f'\nEpoch: {epoch} \tTraining Loss: {train_loss:.4f} \tValidation Loss: {valid_loss:.4f}')
                print(f'\t\tTraining Accuracy: {100 * train_acc:.2f}%\t Validation Accuracy: {100 * valid_acc:.2f}%')

            if valid_acc > valid_acc_max:
                
                torch.save({
                    'state_dict': model.state_dict()
                    #'idx_to_class': model.idx_to_class
                }, save_location)
                
                stop_count = 0
                valid_acc_max = valid_acc
                best_epoch = epoch
                
            else:
                
                stop_count += 1
                
                # Below is the case where we handle the early stop case
                if stop_count >= early_stop:
                    
                    print(f'\nEarly Stopping Total epochs: {epoch}. Best epoch: {best_epoch} with best val acc: {100 * valid_acc_max:.2f}%')
                    model.load_state_dict(torch.load(save_location)['state_dict'])
                    model.optimizer = optimizer
                    history = pd.DataFrame(history, columns=['train_loss', 'valid_loss', 'train_acc','valid_acc'])
                    return model, history
    
    model.optimizer = optimizer
    
    history = pd.DataFrame(history, columns=['train_loss', 'valid_loss', 'train_acc', 'valid_acc'])
    
    return model, history

In [None]:
#summary(model, input_size=(3, image_size, image_size))

In [None]:
model, history = train(
    model,
    criterion,
    optimizer,
    train_loader,
    test_loader,
    save_location='/kaggle/working/dog_bcnn_resnet50_row_col_agg_conc.pt',
    early_stop=3,
    n_epochs=10,
    print_every=1)

In [None]:
class CNN_Resnet(nn.Module):
    
    def __init__(self, fine_tune=False):
        
        super(CNN_Resnet, self).__init__()
        
        resnet = models.resnet50(pretrained=True)
        #resnet.load_state_dict(torch.load('imagenet_models/resnet50-19c8e357.pth'))
        
        self.attn_row =  Attention(features)
        self.attn_col =  Attention(features)
        
        
        # freezing parameters
        if not fine_tune:
            for param in resnet.parameters():
                param.requires_grad = False
        else:
            
            for param in resnet.parameters():
                param.requires_grad = True

        layers = list(resnet.children())[:-2]
        self.features = nn.Sequential(*layers).cuda()
        
        self.fc = nn.Linear(features * 2, 120)
        self.dropout = nn.Dropout(0.5)
        self.relu = nn.ReLU()
        
        # Initialize the fc layers.
        nn.init.xavier_normal_(self.fc.weight.data)
        
        if self.fc.bias is not None:
            torch.nn.init.constant_(self.fc.bias.data, val=0)
        
        
    def forward(self, x):
        
        ## X: bs, 3, 256, 256
        ## N = bs
        N = x.size()[0]
        
        ## x : bs, depth_size, fmap_size, fmap_size
        x = self.features(x)

        # bs, 
        x = x.view(N, features, fmap_size ** 2)
        
        
        x = torch.bmm(x, torch.transpose(x, 1, 2))/ (fmap_size ** 2) 
        x = torch.sqrt(x + 1e-5)
        

        y = self.attn_row(x)
        
        z = self.attn_col(x.permute(0, 2, 1))
        
        
        scored_y = x * y
        
        # now, sum across dim 1 to get the expected feature vector
        condensed_y = torch.sum(scored_y, dim=1)

        scored_z = x * z
        
        # now, sum across dim 1 to get the expected feature vector
        condensed_z = torch.sum(scored_z, dim=1)
        
        condensed_x = torch.cat([condensed_y, condensed_z], axis=1)
        
        x = self.dropout(condensed_x)
        x = self.fc(x)
        
        return condensed_x

In [None]:
model = CNN_Resnet(fine_tune=False).cuda()
model.load_state_dict(torch.load('/kaggle/working/dog_bcnn_resnet50_row_col_agg_conc.pt')['state_dict'])

In [None]:
def get_deep_features(img_path):
    
    img = Image.open(img_path)
    arr_prep = image_transforms["val"](img)
    arr_prep = arr_prep.unsqueeze(axis=0).cuda()
    
    
    arr_prep= arr_prep.repeat(2, 1, 1, 1)
    
    feat = model(arr_prep)
    print(feat.shape)
    return feat[0].cpu().detach().numpy()


In [None]:
cropped_data/n02094258-Norwich_terrier/n02094258_103.jpg

cropped_data/n02086240-Shih-Tzu/n02086240_1078.jpg

cropped_data/n02093991-Irish_terrier/n02093991_2437.jpg

In [None]:
from scipy.spatial.distance import cosine as cs
from scipy.spatial.distance import euclidean as eu

In [None]:
eu(x2,x3)

In [None]:
eu(x1,x3)

In [None]:
torch.__version__
torchvision.__version__

In [None]:
# resnet = models.resnet50(pretrained=True)


# #resnet.load_state_dict(torch.load('/kaggle/input/pretrained-pytorch-models/resnet50-19c8e357.pth'))
# layers = list(models.resnet50().children())[:-1]
# resnet = nn.Sequential(*layers).cuda()
# resnet.eval()

In [None]:
resnet = models.resnet50(pretrained=True)


#resnet.load_state_dict(torch.load('/kaggle/input/pretrained-pytorch-models/resnet50-19c8e357.pth'))
layers = list(models.resnet50().children())[:-1]
resnet = nn.Sequential(*layers)
resnet.eval()

In [None]:

# def get_deep_features(img_path):
    
#     resnet.eval()
    
#     with torch.no_grad():
        
#         img = Image.open(img_path)
#         arr_prep = image_transforms["val"](img)
#         arr_prep = arr_prep.unsqueeze(axis=0).cuda()
#         feat = resnet(arr_prep)
#         return feat.view(1, -1).cpu().detach().numpy()

In [None]:
def get_deep_features(img_path):
    
    resnet.eval()
    
    with torch.no_grad():
        
        img = Image.open(img_path)
        arr_prep = image_transforms["val"](img)
        arr_prep = arr_prep.unsqueeze(axis=0)
        feat = resnet(arr_prep)
        return feat.view(1, -1).numpy()

In [None]:


x1 = get_deep_features('/kaggle/working/cropped_data/n02094258-Norwich_terrier/n02094258_103.jpg')
x2 = get_deep_features('/kaggle/working/cropped_data/n02109961-Eskimo_dog/n02109961_12719.jpg')
x3 = get_deep_features('/kaggle/working/cropped_data/n02109961-Eskimo_dog/n02109961_1017.jpg')

In [None]:
1 - cs(x1,x3)

In [None]:
1 - cs(x2,x3)