In [1]:
! git clone https://github.com/nikheelpandey/BYOL-PyTorch.git
import os
os.listdir()
os.chdir('./BYOL-PyTorch')

Cloning into 'BYOL-PyTorch'...
remote: Enumerating objects: 70, done.[K
remote: Counting objects: 100% (70/70), done.[K
remote: Compressing objects: 100% (50/50), done.[K
remote: Total 70 (delta 25), reused 53 (delta 15), pack-reused 0[K
Unpacking objects: 100% (70/70), done.


In [2]:
import os
import sys
import numpy as np
from math import pi, cos 


import torch
import torchvision
import torch.nn as nn
from logger import Logger
from torch import allclose
from datetime import datetime
import torch.nn.functional as tf 
import torchvision.transforms as T
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader
from torch.testing import assert_allclose
from torchvision import datasets, transforms
from tqdm.notebook import tqdm


import kornia
from kornia import augmentation as K
import kornia.augmentation.functional as F
import kornia.augmentation.random_generator as rg
from torchvision.transforms import functional as tvF

In [3]:
uid = 'byol'
dataset_name = 'stl10'
data_dir = 'dataset'
ckpt_dir = "./ckpt/"+str(datetime.now().strftime('%m%d%H%M%S'))
log_dir = "runs/"+str(datetime.now().strftime('%m%d%H%M%S'))

if not os.path.exists(data_dir):
    os.makedirs(data_dir)
    
if not os.path.exists(ckpt_dir):
    os.makedirs(ckpt_dir)

if not os.path.exists(log_dir):
    os.makedirs(log_dir)

In [4]:
# transformations

_MEAN =  [0.5, 0.5, 0.5]
_STD  =  [0.2, 0.2, 0.2]


class InitalTransformation():
    def __init__(self):
        self.transform = T.Compose([
            T.ToTensor(),
            transforms.Normalize(_MEAN,_STD),
        ])

    def __call__(self, x):
        x = self.transform(x)
        return  x


def gpu_transformer(image_size,s=.2):
        
    train_transform = nn.Sequential(
                
                kornia.augmentation.RandomResizedCrop(image_size,scale=(0.5,1.0)),
                kornia.augmentation.RandomHorizontalFlip(p=0.5),
                kornia.augmentation.ColorJitter(0.8*s,0.8*s,0.8*s,0.2*s,p=0.3),
                kornia.augmentation.RandomGrayscale(p=0.05),
            )

    test_transform = nn.Sequential(  
                kornia.augmentation.RandomResizedCrop(image_size,scale=(0.5,1.0)),
                kornia.augmentation.RandomHorizontalFlip(p=0.5),
                kornia.augmentation.ColorJitter(0.8*s,0.8*s,0.8*s,0.2*s,p=0.3),
                kornia.augmentation.RandomGrayscale(p=0.05),
        )

    return train_transform , test_transform
                
def get_clf_train_test_transform(image_size,s=.2):
        
    train_transform = nn.Sequential(
                
                kornia.augmentation.RandomResizedCrop(image_size,scale=(0.5,1.0)),
                kornia.augmentation.RandomHorizontalFlip(p=0.5),
            )

    test_transform = nn.Sequential(  
                kornia.augmentation.RandomResizedCrop(image_size,scale=(0.5,1.0)),
                kornia.augmentation.RandomHorizontalFlip(p=0.5),
        )

    return train_transform , test_transform

In [5]:
import numpy as np
import torch.nn.functional as nnf
from torch.utils.data.dataset import Dataset
from skimage import io as skio
from skimage import transform
from torchvision import transforms as T

class SDSSDataset(Dataset):
    """Face Landmarks dataset."""

    def __init__(self, csv_file, root_dir):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.df = pd.read_csv(csv_file)
        label_dict = {'GALAXY':0,'STAR':1,'QSO':2}
        self.df['class'] = self.df['class'].apply(lambda x : label_dict[x])
        print(self.df.head(2))
        self.root_dir = root_dir
        
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir,self.df.iloc[idx]["image"])
        image = skio.imread(img_name).reshape(3,128,128).astype(np.float32)
        torch_tensor = torch.from_numpy(image).unsqueeze(0)
        
        out = nnf.interpolate(torch_tensor, size=(96, 96), mode='nearest').squeeze(0)
        
        image = out.cpu().detach().numpy().reshape(3,96,96)
        label = self.df.iloc[idx]["class"]
        return image,label

In [6]:
import pandas as pd
dataset = SDSSDataset("/kaggle/input/sdss-images/data.csv","/kaggle/input/sdss-images/images (1)/images")

def get_train_test_dataloaders(dataset = dataset, data_dir="./dataset", batch_size = 16,num_workers = 4, download=True): 
    
    train_loader = torch.utils.data.DataLoader(
        dataset = dataset,
        shuffle=True,
        batch_size= batch_size,
        num_workers = num_workers
    )
    

    test_loader = torch.utils.data.DataLoader(
        dataset = dataset,
        shuffle=True,
        batch_size= batch_size,
        num_workers = num_workers
        )
    return train_loader, test_loader

   class           image
0      1  image_0000.jpg
1      1  image_0001.jpg


In [7]:
import copy
from torch import nn
import torchvision.models as models

def loss_fn(q1,q2, z1t,z2t):
    
    l1 = - tf.cosine_similarity(q1, z1t.detach(), dim=-1).mean()
    l2 = - tf.cosine_similarity(q2, z2t.detach(), dim=-1).mean()
    
    return (l1+l2)/2


class MLPHead(nn.Module):
    def __init__(self, in_dim, hidden_size=4096, projection_size=256):
        super(MLPHead, self).__init__()

        self.net = nn.Sequential(
            nn.Linear(in_dim, hidden_size),
            nn.BatchNorm1d(hidden_size),
            nn.ReLU(inplace=True),
            nn.Linear(hidden_size, projection_size)
        )

    def forward(self, x):
        return self.net(x)
    


class BYOL(nn.Module):
    def __init__(self, backbone=None,base_target_ema=0.996,**kwargs):
        super().__init__()
        self.base_ema = base_target_ema
        
        if backbone is None:
            backbone = models.resnet50(pretrained=False)
            backbone.output_dim = backbone.fc.in_features
            backbone.fc = torch.nn.Identity()

        projector = MLPHead(in_dim=backbone.output_dim)
        
        self.online_encoder = nn.Sequential(
            backbone,
            projector)
        
        self.target_encoder = copy.deepcopy(self.online_encoder)
        self.online_predictor = MLPHead(in_dim=256,hidden_size=1024, projection_size=256)
        
            

    @torch.no_grad()
    def update_moving_average(self, global_step, max_steps):
        
        tau = 1- ((1 - self.base_ema)* (cos(pi*global_step/max_steps)+1)/2) 
        
        for online, target in zip(self.online_encoder.parameters(), self.target_encoder.parameters()):
            target.data = tau * target.data + (1 - tau) * online.data     
    
    def forward(self,x1,x2):
        
        z1 = self.online_encoder(x1)
        z2 = self.online_encoder(x2)
        
        q1 = self.online_predictor(z1)
        q2 = self.online_predictor(z2)
        
        with torch.no_grad():
            z1_t = self.target_encoder(x1)
            z2_t = self.target_encoder(x2)
       
        loss = loss_fn(q1, q2, z1_t, z2_t)
        
        return loss

In [8]:
if torch.cuda.is_available():
    dtype = torch.cuda.FloatTensor
    device = torch.device("cuda")
    # torch.cuda.set_device(device_id)
else:
    dtype = torch.FloatTensor
    device = torch.device("cpu")
    
print(device)

cuda


In [9]:
weight_decay = 1.5e-6
warmup_epochs =  10
warmup_lr = 0
momentum = 0.9
lr =  0.002
final_lr =  0
epochs = 300
stop_at_epoch = 100
batch_size = 256
knn_monitor = False
knn_interval = 5
knn_k = 200
image_size = (92,92)

In [10]:
train_loader, test_loader = get_train_test_dataloaders(batch_size=batch_size)
train_transform,test_transform = gpu_transformer(image_size)

In [11]:

from lr_scheduler import LR_Scheduler
from lars import LARS

loss_ls = []
acc_ls = []

model = BYOL().to(device)
model.online_encoder.load_state_dict(torch.load("/kaggle/input/contrastive-learning-using-byol/BYOL-PyTorch/ckpt/1119103602/byol_1119141621.pth")['online_network'])
model.target_encoder.load_state_dict(torch.load("/kaggle/input/contrastive-learning-using-byol/BYOL-PyTorch/ckpt/1119103602/byol_1119141621.pth")['target_network'])

optimizer = LARS(model.named_modules(), lr=lr, momentum=momentum, weight_decay=weight_decay)

        
scheduler = LR_Scheduler(
        optimizer, warmup_epochs, warmup_lr*batch_size/8,

        epochs, lr*batch_size/8, final_lr*batch_size/8, 
        len(train_loader),
        constant_predictor_lr=True 
        )

In [12]:
@torch.no_grad()
def prepare_data_features(model, dataset):
    # Prepare model
    network = model
    network.eval()
    network.to(device)
    # Encode all images
    data_loader = data.DataLoader(dataset, batch_size=64, num_workers=NUM_WORKERS, shuffle=False, drop_last=False)
    feats, labels = [], []
    for batch_imgs, batch_labels in tqdm(data_loader):
        batch_imgs = batch_imgs.to(device)
        batch_feats = network.online_encoder(batch_imgs)
        feats.append(batch_feats.detach().cpu())
        labels.append(batch_labels)
    
    feats = torch.cat(feats, dim=0)
    labels = torch.cat(labels, dim=0)
    
    # Sort images by labels
    labels, idxs = labels.sort()
    feats = feats[idxs]
    
    return data.TensorDataset(feats, labels)

In [13]:
import numpy as np
import torch.nn.functional as nnf
from torch.utils.data.dataset import Dataset
from skimage import io as skio
from skimage import transform
from torchvision import transforms as T

class SDSSDataset(Dataset):
    def __init__(self, csv_file, root_dir):
        self.df = pd.read_csv(csv_file)
        label_dict = {'GALAXY':0,'STAR':1,'QSO':2}
        self.df['class'] = self.df['class'].apply(lambda x : label_dict[x])
        print(self.df.head(2))
        self.root_dir = root_dir
        
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir,self.df.iloc[idx]["image"])
        image = skio.imread(img_name).reshape(3,128,128).astype(np.float32)
        torch_tensor = torch.from_numpy(image).unsqueeze(0)
        out = nnf.interpolate(torch_tensor, size=(96, 96), mode='nearest').squeeze(0)
        image = out.cpu().detach().numpy().reshape(3,96,96)
        label = self.df.iloc[idx]["class"]
        return image,label

import shutil
import pandas as pd
balanced_set = pd.read_csv("/kaggle/input/sdss-images/equal_splits.csv")
print(balanced_set.columns)
balanced_images = list(balanced_set[balanced_set.columns[1]])
balanced_images.extend(list(balanced_set[balanced_set.columns[2]]))
balanced_images.extend(list(balanced_set[balanced_set.columns[3]]))

df = pd.read_csv("/kaggle/input/sdss-images/data.csv")


balanced_images = df['image']

Index(['Unnamed: 0', 'trimmed traininng  qalaxy', 'trimmed traininng  qsd',
       'trimmed traininng  star'],
      dtype='object')


In [14]:
!rm -rf balanced_images/*
!mkdir balanced_images

In [15]:

from sklearn.model_selection import train_test_split
from copy import deepcopy

for img in balanced_images:
    shutil.copy("/kaggle/input/sdss-images/images (1)/images/"+img,"./balanced_images/"+img)
balanced_df = df.loc[df['image'].isin(balanced_images)].reset_index().drop(['index'],axis=1)
balanced_train,balanced_test = train_test_split(balanced_df,test_size=0.2,stratify=balanced_df['class'])
balanced_train.to_csv("balanced_train.csv",index=False),balanced_test.to_csv("balanced_test.csv",index=False)


(None, None)

In [16]:
img_transforms = transforms.Compose([transforms.ToTensor(),
                                     transforms.Normalize((0.5,), (0.5,))])
train_img_data = SDSSDataset("./balanced_train.csv","./balanced_images")
test_img_data = SDSSDataset("./balanced_test.csv","./balanced_images")
print("Number of training examples:", len(train_img_data))
print("Number of test examples:", len(test_img_data))

   class           image
0      1  image_7218.jpg
1      2  image_3365.jpg
   class           image
0      0  image_3051.jpg
1      1  image_1515.jpg
Number of training examples: 8000
Number of test examples: 2000


In [17]:
import torch.utils.data as data
NUM_WORKERS=0
train_feats_simclr = prepare_data_features(model, train_img_data)
test_feats_simclr = prepare_data_features(model, test_img_data)

  0%|          | 0/125 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

In [18]:
import matplotlib.pyplot as plt
plt.set_cmap('cividis')
%matplotlib inline
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('svg', 'pdf') # For export
import matplotlib
matplotlib.rcParams['lines.linewidth'] = 2.0
import seaborn as sns
sns.set()

## tqdm for loading bars
from tqdm.notebook import tqdm

## PyTorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data
import torch.optim as optim

## Torchvision
import torchvision
from torchvision.datasets import STL10
from torchvision import transforms

# PyTorch Lightning
try:
    import pytorch_lightning as pl
except ModuleNotFoundError: # Google Colab does not have PyTorch Lightning installed by default. Hence, we do it here if necessary
    !pip install --quiet pytorch-lightning>=1.4
    import pytorch_lightning as pl
from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint

class LogisticRegression(pl.LightningModule):
    def __init__(self, feature_dim, num_classes, lr, weight_decay, max_epochs=100):
        super().__init__()
        self.save_hyperparameters()
        # Mapping from representation h to classes
        self.model = nn.Linear(feature_dim, num_classes)

    def configure_optimizers(self):
        optimizer = optim.AdamW(self.parameters(), 
                                lr=self.hparams.lr, 
                                weight_decay=self.hparams.weight_decay)
        lr_scheduler = optim.lr_scheduler.MultiStepLR(optimizer, 
                                                      milestones=[int(self.hparams.max_epochs*0.6), 
                                                                  int(self.hparams.max_epochs*0.8)], 
                                                      gamma=0.1)
        return [optimizer], [lr_scheduler]
        
    def _calculate_loss(self, batch, mode='train'):
        feats, labels = batch
        preds = self.model(feats)
        loss = F.cross_entropy(preds, labels)
        acc = (preds.argmax(dim=-1) == labels).float().mean()

        self.log(mode + '_loss', loss)
        self.log(mode + '_acc', acc)
        return loss        
        
    def training_step(self, batch, batch_idx):
        return self._calculate_loss(batch, mode='train')
        
    def validation_step(self, batch, batch_idx):
        self._calculate_loss(batch, mode='val')
        
    def test_step(self, batch, batch_idx):
        self._calculate_loss(batch, mode='test')
    
    def forward(self,batch):
        return self.model(batch)

In [19]:
def train_logreg(batch_size, train_feats_data, test_feats_data, model_suffix, max_epochs=400, **kwargs):
    trainer = pl.Trainer(max_epochs=max_epochs,
                         callbacks=[ModelCheckpoint(save_weights_only=True, mode='max', monitor='val_acc'),
                                    LearningRateMonitor("epoch")],
                         check_val_every_n_epoch=10)
    trainer.logger._default_hp_metric = None
    # Data loaders
    train_loader = data.DataLoader(train_feats_data, batch_size=batch_size, shuffle=True, 
                                   drop_last=False, pin_memory=True, num_workers=0)
    test_loader = data.DataLoader(test_feats_data, batch_size=batch_size, shuffle=False, 
                                  drop_last=False, pin_memory=True, num_workers=0)
    pl.seed_everything(42)  # To be reproducable
    model = LogisticRegression(**kwargs)
    trainer.fit(model, train_loader, test_loader)
    print(trainer.checkpoint_callback.best_model_path)
    model = LogisticRegression.load_from_checkpoint(trainer.checkpoint_callback.best_model_path)
    return trainer,model#, result

In [20]:
results = {}
trainer,model = train_logreg(batch_size=256,
                                    train_feats_data=train_feats_simclr,
                                    test_feats_data=test_feats_simclr,
                                    model_suffix="any",
                                    feature_dim=train_feats_simclr.tensors[0].shape[1],
                                    num_classes=3,
                                    lr=1e-3,
                                    weight_decay=1e-3)



Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

/kaggle/working/BYOL-PyTorch/lightning_logs/version_0/checkpoints/epoch=359-step=11519.ckpt


In [21]:
import numpy as np
from sklearn.metrics import classification_report



test_loader = data.DataLoader(test_feats_simclr, batch_size=1, shuffle=False, 
                                  drop_last=False, pin_memory=True, num_workers=0)
preds = list()
labels = list()
for i in test_loader:
    preds.append(model.forward(i[0]))
    labels.append(i[1])
    
pred_argmax = [np.argmax(list(p.detach().cpu().numpy()[0])) for p in preds]
labels_argmax = [l.detach().cpu().numpy()[0] for l in labels]

classification_report(pred_argmax,labels_argmax,digits=6).split("\n")

['              precision    recall  f1-score   support',
 '',
 '           0   0.655000  0.560308  0.603965      1169',
 '           1   0.531325  0.539120  0.535194       818',
 '           2   0.052941  0.692308  0.098361        13',
 '',
 '    accuracy                       0.552500      2000',
 '   macro avg   0.413089  0.597245  0.412507      2000',
 'weighted avg   0.600504  0.552500  0.572551      2000',
 '']

In [22]:
import numpy as np
from sklearn.metrics import classification_report
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression

sklearn_X,sklearn_Y = list(),list()
for t in tqdm(train_feats_simclr):
    sklearn_X.append(t[0].cpu().detach().numpy())
    sklearn_Y.append(t[1].cpu().detach().numpy())

clf = LogisticRegression(random_state=0).fit(pd.DataFrame(sklearn_X),sklearn_Y)
    
test_X,test_Y = list(),list()
for t in tqdm(test_feats_simclr):
    test_X.append(t[0].cpu().detach().numpy())
    test_Y.append(int(t[1].cpu().detach().numpy()))

preds = clf.predict(pd.DataFrame(test_X))
classification_report(preds,test_Y,digits=6).split("\n")

  0%|          | 0/8000 [00:00<?, ?it/s]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


  0%|          | 0/2000 [00:00<?, ?it/s]

['              precision    recall  f1-score   support',
 '',
 '           0   0.691000  0.547110  0.610694      1263',
 '           1   0.473494  0.536885  0.503201       732',
 '           2   0.023529  0.800000  0.045714         5',
 '',
 '    accuracy                       0.544000      2000',
 '   macro avg   0.396008  0.627998  0.386536      2000',
 'weighted avg   0.609724  0.544000  0.569939      2000',
 '']

In [23]:
from sklearn.svm import SVC
clf = SVC(C=0.1).fit(pd.DataFrame(sklearn_X),sklearn_Y)


preds = clf.predict(pd.DataFrame(test_X))
classification_report(preds,test_Y,digits=6).split("\n")


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


['              precision    recall  f1-score   support',
 '',
 '           0   1.000000  0.500000  0.666667      2000',
 '           1   0.000000  0.000000  0.000000         0',
 '           2   0.000000  0.000000  0.000000         0',
 '',
 '    accuracy                       0.500000      2000',
 '   macro avg   0.333333  0.166667  0.222222      2000',
 'weighted avg   1.000000  0.500000  0.666667      2000',
 '']

In [24]:
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier().fit(pd.DataFrame(sklearn_X),sklearn_Y)


preds = clf.predict(pd.DataFrame(test_X))
classification_report(preds,test_Y,digits=6).split("\n")


['              precision    recall  f1-score   support',
 '',
 '           0   0.886000  0.777875  0.828424      1139',
 '           1   0.757831  0.800254  0.778465       786',
 '           2   0.341176  0.773333  0.473469        75',
 '',
 '    accuracy                       0.786500      2000',
 '   macro avg   0.661669  0.783821  0.693453      2000',
 'weighted avg   0.815199  0.786500  0.795480      2000',
 '']

In [25]:
from xgboost import XGBClassifier
clf = XGBClassifier(tree_method='gpu_hist').fit(pd.DataFrame(sklearn_X),sklearn_Y)


preds = clf.predict(pd.DataFrame(test_X))
classification_report(preds,test_Y,digits=6).split("\n")






['              precision    recall  f1-score   support',
 '',
 '           0   0.854000  0.803387  0.827921      1063',
 '           1   0.791566  0.780285  0.785885       842',
 '           2   0.400000  0.715789  0.513208        95',
 '',
 '    accuracy                       0.789500      2000',
 '   macro avg   0.681855  0.766487  0.709004      2000',
 'weighted avg   0.806150  0.789500  0.795275      2000',
 '']