In [4]:
import os
import torch
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms as T
from torchvision.io import decode_image
from torchvision import models


In [23]:
# default SimCLR augmentation (code from BYOL, because I like it)
import random
class RandomApply(nn.Module):
    def __init__(self, fn, p):
        super().__init__()
        self.fn = fn
        self.p = p
    def forward(self, x):
        if random.random() > self.p:
            return x
        return self.fn(x)

DEFAULT_AUG = torch.nn.Sequential(
    RandomApply(
        T.ColorJitter(0.8, 0.8, 0.8, 0.2),
        p=0.3
    ),
    T.RandomGrayscale(p=0.2),
    T.RandomHorizontalFlip(),
    RandomApply(
        T.GaussianBlur((3, 3), (1.0, 2.0)),
        p=0.2
    ),
    T.Normalize(
        mean=torch.tensor([0.485, 0.456, 0.406]),
        std=torch.tensor([0.229, 0.224, 0.225])),
)

In [2]:
# full_fold_path = "data/raw-img"
# folders = os.listdir(full_fold_path)
# for folder in folders:
img_path = "data/raw-img/butterfly/e030b20a20e90021d85a5854ee454296eb70e3c818b413449df6c87ca3ed_640.jpg"
img_tensor = decode_image(img_path)

weights = models.EfficientNet_B0_Weights.IMAGENET1K_V1
preprocess = weights.transforms()
img_tensor_sized = preprocess(img_tensor)

en_b0 = models.efficientnet_b0(weights = weights)


In [27]:
weights.transforms

functools.partial(<class 'torchvision.transforms._presets.ImageClassification'>, crop_size=224, resize_size=256, interpolation=<InterpolationMode.BICUBIC: 'bicubic'>)

In [28]:
preprocess(img_tensor).shape

torch.Size([3, 224, 224])

In [8]:
full_fold_path = "data/raw-img"
os.listdir(full_fold_path)

['butterfly',
 'cat',
 'chicken',
 'cow',
 'dog',
 'elephant',
 'horse',
 'sheep',
 'spider',
 'squirrel']

In [37]:
class CVDataset(torch.utils.data.Dataset):
    def __init__(self, folder_path:str):
        super().__init__()
        self.data = []
        self.targets = []

        label_to_index = {
            'butterfly':0,
             'cat':1,
             'chicken':2,
             'cow':3,
             'dog':4,
             'elephant':5,
             'horse':6,
             'sheep':7,
             'spider':8,
             'squirrel':9
        }

        for folder in os.listdir(folder_path):

            target = torch.zeros(10)
            target[label_to_index[folder]] = 1
            targets_len = len(os.listdir(f"{folder_path}/{folder}")) * 2 #*2 because we create to copies with different augs
            target = target.expand(targets_len, 10)
            self.targets.extend(target)

            for file in os.listdir(f"{folder_path}/{folder}"):

                image_tensor = decode_image(f"{folder_path}/{folder}"+"/"+file)
                self.data.append(DEFAULT_AUG(preprocess(image_tensor[:3, :, :])))
                self.data.append(DEFAULT_AUG(preprocess(image_tensor[:3, :, :])))

        self.data = torch.tensor(self.data)
        self.targets = torch.tensor(self.targets)

    def __len__(self):
        return len(self.targets)

    def __getitem__(self, index):
        return self.data[index], self.targets[index]

In [38]:
cv_ds = CVDataset("data/raw-img")

KeyboardInterrupt: 

In [5]:
#I do understand, that ImageNet has these classes that I am willing to train on, but considering the quality of ImageNet (todo paste promo link) it won't bother to finetune tiny model
new_head = nn.Sequential(
    nn.Dropout(p=0.2, inplace=True),
    nn.Linear(in_features=1280, out_features=10, bias=True)
)
en_b0.classifier = new_head
en_b0

EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActivat

In [6]:
for param in en_b0.features.parameters():
    param.requires_grad = False

for param in en_b0.classifier.parameters():
    param.requires_grad = True

In [7]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
en_b0.to(device)
en_b0(img_tensor_sized.unsqueeze(0).to(device))

tensor([[-0.0129, -0.2402, -0.0178, -0.1155,  0.1697, -0.0493,  0.0060,  0.0805,
          0.0223,  0.0613]], device='cuda:0', grad_fn=<AddmmBackward0>)