## Install dependency

In [None]:
!pip install -q -U albumentations                 
!pip install -q opencv-python-headless==4.1.2.30   
!pip install -q --upgrade --force-reinstall --no-deps kaggle
!pip install -q  --upgrade wandb 
!pip install -q --upgrade timm

### configs

In [None]:
NAME = "Rifat" #your name here
PROJECT_NAME = "SimCLR"
MODEL_TYPE = "Unsupervised"
ARCHITECTURE_NAME = "tf_efficientnet__b0"


## **Download Kaggle Dataset and Dataframe create**

In [None]:
import os
import pandas as pd
from imutils import paths
from google.colab import files
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split


In [None]:
uploaded = files.upload()

for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))
  
# Then move kaggle.json into the folder where the API expects to find it.
!mkdir -p ~/.kaggle/ && mv kaggle.json ~/.kaggle/ && chmod 600 ~/.kaggle/kaggle.json

**Dataset Download and unzip**

In [None]:
!kaggle datasets download -d trolukovich/apparel-images-dataset
!mkdir dataset
!unzip -q apparel-images-dataset.zip -d ./dataset

**Dataframe Create**

In [None]:
imagePaths = sorted(list(paths.list_images("/content/dataset/")))
directory="/content/dataset/"
data = []
labels = []
for imagePath in imagePaths:
	l = label = imagePath.split(os.path.sep)[-2]
	labels.append(l)

df = pd.DataFrame(labels,columns=['labels'])
df['labels'] = df['labels'].astype('category')

inverse_mapping = list(df['labels'].cat.categories)

**Dataframe Shuffle and Split**

In [None]:
df = shuffle(pd.read_csv("/content/df_apparel_multiclass.csv"))
train_dataframe , test_dataframe = train_test_split(df,test_size = 0.02)
print(len(train_dataframe))
print(len(test_dataframe))

11157
228


## **Useful imports**

In [None]:
import requests
import zipfile
import io
import numpy as np
import torch
from torchvision import transforms as T
from torchsummary import summary
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision.models import resnet18
import timm,time
import os
from PIL import Image
from collections import OrderedDict

import random

import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
import seaborn as sns

import cv2
import albumentations as A
from albumentations.pytorch import ToTensorV2
import wandb
tsne = TSNE()
# device is set to cuda if cuda is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

**Variables**

In [None]:
# defining a mapping between class names and numbers

losses_train = []
num_epochs = 20
tau = 0.05
learning_rate = 0.001
save_path_checkpoints= "/content/model/ckpts"
os.makedirs(save_path_checkpoints, exist_ok=True)



**Dataset Class**

In [None]:
class ImageDataset_alb(Dataset):
    def __init__(self, dataframe,weak_transform,strong_transform,train=True):
        self.dataframe = dataframe
        self.weak_transform = weak_transform
        self.strong_transform = strong_transform
        self.train = train
        self.all_image_names = self.dataframe[:]['ImagePath']
        
    def __len__(self):
        return len(self.all_image_names)

    def __getitem__(self, index):
        img_path = os.path.join(self.all_image_names.iloc[index])
        image = cv2.imread(img_path)                                      # added for albumentations
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 
        if  self.train:                 
          sample = {'image1': self.weak_transform(image=image)["image"],
                    'image2': self.strong_transform(image=image)["image"]}
        else:            
            sample = {'image': self.weak_transform(image=image)["image"]}
        return sample

**Dataloader and Transfrom**

In [None]:
def fun_alb_transfrom():
  strong_transform = A.Compose(
    [
        A.RandomResizedCrop(p=1.0, height=224, width=224, scale=(0.08, 1.0), ratio=(0.75, 1.3333333333333333), interpolation=0),
        A.HorizontalFlip(p=1.0),
        A.ColorJitter (brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5,  p=1.0),
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ToTensorV2(),
    ]

      
  )

  weak_transform = A.Compose(
      [
      A.Resize(p=1.0, height=224, width=224, interpolation=0),
      A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
      ToTensorV2(),

      ]
  )


  train_dataset = ImageDataset_alb(
      train_dataframe,
      weak_transform,
      strong_transform,
      train = True
      
  )

  test_dataset = ImageDataset_alb(
      test_dataframe,
      weak_transform,
      strong_transform,
      train = False
      
  )
  
  dataloader_training_dataset = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
  dataloader_testing_dataset = DataLoader(test_dataset, batch_size=len(test_dataframe), shuffle=True, num_workers=2)
  return dataloader_training_dataset,dataloader_testing_dataset


dataloader_training_dataset,dataloader_testing_dataset = fun_alb_transfrom()

**Wandb**

In [None]:
!wandb login 

[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit: 
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [None]:
class WandbLogger():
    """
    This custom callback is used for logging training metrics to wandb for monitoring.
    
    """
    def __init__(self,project,entity,name,id,config,resume = "allow",):
      self.project = project
      self.entity = entity
      self.name = name
      self.id = id
      self.config = config
      self.resume = resume
      wandb.init(project = self.project,entity = self.entity,
                 name = self.name,id = self.id, 
                 config = self.config,resume = self.resume)

In [None]:
project= PROJECT_NAME
entity="rakib1521"


name = f"{PROJECT_NAME}_{ARCHITECTURE_NAME}" #same name for multiple run is allowed but same id is not allowed
id = f"{PROJECT_NAME}_{ARCHITECTURE_NAME}"

wandb_config = {"network":ARCHITECTURE_NAME,
                "epoch":num_epochs,
                "tau": tau,
                "learning_rate": learning_rate,
                }
wandb_logger = WandbLogger(project,entity,name,id,wandb_config)    

# Model Define

In [None]:
# defining our deep learning architecture
#model = resnet18(pretrained=False)
"""
model = timm.create_model("tf_efficientnet_b0")

classifier = nn.Sequential(OrderedDict([
    ('fc1', nn.Linear(model.fc.in_features, 256)),
    ('added_relu1', nn.ReLU(inplace=True)),
    ('fc2', nn.Linear(256, 128)),
    ('added_relu2', nn.ReLU(inplace=True)),
    ('fc3', nn.Linear(128, 64))
]))

model.fc = classifier

# moving the resnet architecture to device
model.to(device)
"""

In [None]:
# defining our deep learning architecture
#model = resnet18(pretrained=False)
model = timm.create_model("tf_efficientnet_b0")

classifier = nn.Sequential(OrderedDict([
    ('fc1', nn.Linear(model.classifier.in_features, 256)),
    ('added_relu1', nn.ReLU(inplace=True)),
    ('fc2', nn.Linear(256, 128)),
    ('added_relu2', nn.ReLU(inplace=True)),
    ('fc3', nn.Linear(128, 64))
]))

model.classifier = classifier

# moving the resnet architecture to device
model.to(device)

# Loss Function

In [None]:
# Code for NT-Xent Loss function
def loss_function(a, b):
    a_norm = torch.norm(a, dim=1).reshape(-1, 1)
    a_cap = torch.div(a, a_norm)
    b_norm = torch.norm(b, dim=1).reshape(-1, 1)
    b_cap = torch.div(b, b_norm)
    a_cap_b_cap = torch.cat([a_cap, b_cap], dim=0)
    a_cap_b_cap_transpose = torch.t(a_cap_b_cap)
    b_cap_a_cap = torch.cat([b_cap, a_cap], dim=0)
    sim = torch.mm(a_cap_b_cap, a_cap_b_cap_transpose)
    sim_by_tau = torch.div(sim, tau)
    exp_sim_by_tau = torch.exp(sim_by_tau)
    sum_of_rows = torch.sum(exp_sim_by_tau, dim=1)
    exp_sim_by_tau_diag = torch.diag(exp_sim_by_tau)
    numerators = torch.exp(torch.div(torch.nn.CosineSimilarity()(a_cap_b_cap, b_cap_a_cap), tau))
    denominators = sum_of_rows - exp_sim_by_tau_diag
    num_by_den = torch.div(numerators, denominators)
    neglog_num_by_den = -torch.log(num_by_den)
    return torch.mean(neglog_num_by_den)

# Training 

In [None]:
# using SGD optimizer
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)

# Note that this training is unsupervised, it uses the NT-Xent Loss function

TRAINING = True

def get_mean_of_list(L):
    return sum(L) / len(L)

if TRAINING:
    # get resnet in train mode
    model.train()

    # run a for loop for num_epochs
    for epoch in range(num_epochs):

        # a list to store losses for each epoch
        epoch_losses_train = []

        # run a for loop for each batch
        for (_, sample_batched) in enumerate(dataloader_training_dataset):
            
            # zero out grads
            optimizer.zero_grad()

            # retrieve x1 and x2 the two image batches
            x1 = sample_batched['image1']
            x2 = sample_batched['image2']

            # move them to the device
            x1 = x1.to(device)
            x2 = x2.to(device)

            # get their outputs
            y1 = model(x1)
            y2 = model(x2)

            #print(type(y1))
            #print(type(y2))

            # get loss value
            loss = loss_function(y1, y2)
            
            # put that loss value in the epoch losses list
            epoch_losses_train.append(loss.cpu().data.item())

            # perform backprop on loss value to get gradient values
            loss.backward()

            # run the optimizer
            optimizer.step()

        # append mean of epoch losses to losses_train, essentially this will reflect mean batch loss
        losses_train.append(get_mean_of_list(epoch_losses_train))
        print("Epoch-{} Loss-{}".format(epoch+1,get_mean_of_list(epoch_losses_train)))



        wandb.log({ "train_loss": get_mean_of_list(epoch_losses_train),
                })   
        
        

        filepath=f"{save_path_checkpoints}/{PROJECT_NAME}_{MODEL_TYPE}-{ARCHITECTURE_NAME}-{epoch+1}_loss-{get_mean_of_list(epoch_losses_train)}.pt"    
        checkpoint= {
                      "epoch" : epoch+1 ,
                      "model_weight" : model.state_dict(),
                      "optimizer_state" : optimizer.state_dict()
                      }
        torch.save(checkpoint,filepath)
        print("{} saved".format(filepath))



**LOSS**

In [None]:
# Plot the training losses Graph and save it
fig = plt.figure(figsize=(10, 10))
sns.set_style('darkgrid')
plt.plot(losses_train)
plt.legend(['Training Losses'])
plt.savefig('losses.png')
#plt.close()

# Testing

In [None]:
len(inverse_mapping)

24

In [None]:
# a function used to plot t-SNE visualizations
def plot_vecs_n_labels(v,labels,fname):
    fig = plt.figure(figsize = (10, 10))
    plt.axis('off')
    sns.set_style("darkgrid")
    sns.scatterplot(v[:,0], v[:,1], hue=labels, legend='full', palette=sns.color_palette("bright", len(inverse_mapping)))
    plt.legend(inverse_mapping)
    plt.savefig(fname)
    wandb.log({"Test":wandb.Image(fname)})
    #plt.close()

model.eval();

In [None]:
# TSNE visualizations of test dataset
for (_, sample_batched) in enumerate(dataloader_testing_dataset):
     x = sample_batched['image']
     x = x.to(device)
     y = model(x)
     y_tsne = tsne.fit_transform(y.cpu().data)
     labels = list(test_dataframe['label'])
     plot_vecs_n_labels(y_tsne,labels,'tsne_test_last_layer.png')


In [None]:
wandb.finish()