In [None]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

import torch
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda, Compose
import matplotlib.pyplot as plt
from torch.utils.data import random_split
from torchvision.utils import make_grid
import torchvision
import torchvision.transforms as transforms
from torchvision.datasets.utils import download_url
from google.colab import drive
import torch.backends.cudnn as cudnn
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data.sampler import SubsetRandomSampler
import numpy as np
from google.colab import files
from torchvision import models
from torchsummary import summary
from glob import glob
import re
from itertools import compress
import pandas as pd
from torchvision.models.feature_extraction import create_feature_extractor

torch.manual_seed(0)


<torch._C.Generator at 0x7fd03e947310>

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
from torch.optim.lr_scheduler import LambdaLR
from torch.utils.data import DataLoader, SubsetRandomSampler
from torchvision.datasets import CIFAR10, STL10
from torchvision import transforms
from torchvision.models import resnet18, resnet34
from tqdm import tqdm

In [None]:
from torchvision.models.feature_extraction import get_graph_node_names

In [None]:
!git clone https://github.com/samirchar/selfSupervised_fewShot.git
from selfSupervised_fewShot.dataprep import *

Cloning into 'selfSupervised_fewShot'...
remote: Enumerating objects: 28, done.[K
remote: Counting objects: 100% (6/6), done.[K
remote: Compressing objects: 100% (5/5), done.[K
remote: Total 28 (delta 0), reused 6 (delta 0), pack-reused 22[K
Unpacking objects: 100% (28/28), done.


In [None]:
target_dataset = 'STL10'
source_dataset = 'CIFAR100'

img_size = 32
train_batch_size = 512
test_batch_size = 512
num_workers = 2

source_root = f'simsiam_best_resnet18.pt'
lincls_path = f'lincls_on_{target_dataset.lower()}'
if not os.path.exists(lincls_path):
  os.mkdir(lincls_path)

In [None]:
class PairTransform:
    """Transform an image two times, one serving as the query and other as key."""

    def __init__(self, transform):
        self.transform = transform

    def __call__(self, x):
        q = self.transform(x)
        k = self.transform(x)
        return [q, k]

In [None]:
#Train/test transforms
transform_train = transforms.Compose([
            transforms.Resize(32),
            transforms.RandomResizedCrop(32, scale=(0.2, 1.)),
            transforms.RandomHorizontalFlip(),
            transforms.RandomApply([transforms.ColorJitter(0.4,0.4,0.4,0.1)], p=0.8),
            transforms.RandomGrayscale(p=0.2),
            transforms.ToTensor(),
            transforms.Normalize((0.5071, 0.4867, 0.4408),(0.2675, 0.2565, 0.2761))])
            #transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2471, 0.2435, 0.2616))])

transform_test = transforms.Compose([
    transforms.Resize(32),                                
    transforms.ToTensor(),
    transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761))])

trainset = STL10(root='datastl',split="train",
                                 download=True,
                                 transform=PairTransform(transform_train))

trainloader = DataLoader(trainset,
                          batch_size=512,
                          shuffle=True,
                          num_workers=4,
                          drop_last=True,
                          pin_memory=True)


testset = STL10(root='datastl',split="test", transform=PairTransform(transform_test), download=True)

Downloading http://ai.stanford.edu/~acoates/stl10/stl10_binary.tar.gz to datastl/stl10_binary.tar.gz


  0%|          | 0/2640397119 [00:00<?, ?it/s]

Extracting datastl/stl10_binary.tar.gz to datastl


  cpuset_checked))


Files already downloaded and verified


In [None]:
test_loader = DataLoader(testset, batch_size=512, shuffle=False)

In [None]:
# Defining for classes required for SimSiam Model

class prediction_layer(nn.Module):
  def __init__(self, input_d=2048, hidden_d=512, output_d=2048):  
    super().__init__()
    self.layer1 = nn.Sequential(
        nn.Linear(input_d, hidden_d),
        nn.BatchNorm1d(hidden_d),
        nn.ReLU(inplace=True)
    )
    self.layer2 = nn.Linear(hidden_d, output_d)

  def forward(self, x):
    x = self.layer1(x)
    x = self.layer2(x)
    return x

class projection_layer(nn.Module):
  def __init__(self, input_d, hidden_d=2048, output_d=2048): 
    super().__init__()
    self.layer1 = nn.Sequential(
        nn.Linear(input_d, hidden_d),
        nn.BatchNorm1d(hidden_d),
        nn.ReLU(inplace=True)
    )
    self.layer2 = nn.Identity() # for CIFAR datasets, we only use Identity for second layer
    self.layer3 = nn.Sequential(
        nn.Linear(hidden_d, output_d),
        nn.BatchNorm1d(output_d, affine=False)
    )

  def forward(self, x):
    x = self.layer1(x)
    x = self.layer2(x)
    x = self.layer3(x)
    return x

class SimSiam(nn.Module):
  def __init__(self, base_encoder):
    super().__init__()

    self.backbone = base_encoder(pretrained=False)  
    self.feature_dim = self.backbone.fc.in_features
    out_dim = self.backbone.fc.in_features
    self.backbone.conv1 = nn.Conv2d(3, 64, 3, 1, 1, bias=False)
    self.backbone.maxpool = nn.Identity()
    self.backbone.fc = nn.Identity()  
    self.projector = projection_layer(out_dim)
    self.predictor = prediction_layer()

  def forward(self, x1, x2):

    bb = self.backbone
    f = self.projector
    h =  self.predictor

    bb1, bb2 = bb(x1), bb(x2)
    z1, z2 = f(bb1), f(bb2)
    p1, p2 = h(z1), h(z2)
    
    return {'z1': z1, 'z2': z2, 'p1': p1, 'p2': p2}

In [None]:
#Load pretrained model on CIFAR100
device = "cuda" if torch.cuda.is_available() else "cpu"
backbone = 'resnet18'
base_encoder = eval(backbone)
pre_model = SimSiam(base_encoder).cuda()
pre_model.load_state_dict(torch.load('simsiam_best_resnet18.pt'))

<All keys matched successfully>

In [None]:
nodes, _ = get_graph_node_names(pre_model)
print(nodes)

['x1', 'x2', 'backbone.conv1', 'backbone.bn1', 'backbone.relu', 'backbone.maxpool', 'backbone.layer1.0.conv1', 'backbone.layer1.0.bn1', 'backbone.layer1.0.relu', 'backbone.layer1.0.conv2', 'backbone.layer1.0.bn2', 'backbone.layer1.0.add', 'backbone.layer1.0.relu_1', 'backbone.layer1.1.conv1', 'backbone.layer1.1.bn1', 'backbone.layer1.1.relu', 'backbone.layer1.1.conv2', 'backbone.layer1.1.bn2', 'backbone.layer1.1.add', 'backbone.layer1.1.relu_1', 'backbone.layer2.0.conv1', 'backbone.layer2.0.bn1', 'backbone.layer2.0.relu', 'backbone.layer2.0.conv2', 'backbone.layer2.0.bn2', 'backbone.layer2.0.downsample.0', 'backbone.layer2.0.downsample.1', 'backbone.layer2.0.add', 'backbone.layer2.0.relu_1', 'backbone.layer2.1.conv1', 'backbone.layer2.1.bn1', 'backbone.layer2.1.relu', 'backbone.layer2.1.conv2', 'backbone.layer2.1.bn2', 'backbone.layer2.1.add', 'backbone.layer2.1.relu_1', 'backbone.layer3.0.conv1', 'backbone.layer3.0.bn1', 'backbone.layer3.0.relu', 'backbone.layer3.0.conv2', 'backbone.l

In [None]:
print(pre_model)

SimSiam(
  (backbone): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): Identity()
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (co

In [None]:
def feature_extractor2(model,layer_name,dataset,device,return_target = True):
  return_nodes = {layer_name:'output'}
  extractor = create_feature_extractor(model,return_nodes)
  extracted_features = []
  targets_list = []
  with torch.no_grad():
    for inputs, targets in dataset:
      inputs, targets = inputs, targets.to(device)
      features = extractor(inputs[0].cuda(non_blocking=True),inputs[1].cuda(non_blocking=True))
      output_features = features['output'].squeeze()
      extracted_features.append(output_features)
      targets_list.append(targets)

  extracted_features = torch.concat(extracted_features,dim=0)
  targets = torch.concat(targets_list,dim=0)
  
  if return_target:
    print("Extracted features shape", extracted_features.cpu().numpy().shape)
    print("Target features shape", targets.cpu().numpy().shape)
    return extracted_features.cpu().numpy(),targets.cpu().numpy()
  
  return extracted_features.cpu().numpy()

In [None]:
X_train,y_train = feature_extractor2(pre_model,'backbone.avgpool',trainloader,device)
X_test,y_test = feature_extractor2(pre_model,'backbone.avgpool',test_loader,device)

  cpuset_checked))


Extracted features shape (4608, 512)
Target features shape (4608,)
Extracted features shape (8000, 512)
Target features shape (8000,)


In [None]:
np.save('X_trainstl_SimSiam.npy',X_train)
np.save('X_teststl_SimSiam.npy',X_test)

np.save('y_trainstl_SimSiam.npy',y_train)
np.save('y_teststl_SimSiam.npy',y_test)