In [1]:
import torch
import torch.nn as nn
from torchvision import transforms
from torchvision import models
from torch.utils.data import Dataset, DataLoader
from torchvision.datasets import ImageFolder
from datasets import Dataset, load_from_disk

import numpy as np
# import pandas as pd


In [2]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using mps device


In [3]:
path_train = './data/train/'
path_test = './data/test/'
path_final_test = './data/final_test/'

In [9]:
BATCH_SIZE = 128

In [4]:
preprocess = transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406],  std=[0.229, 0.224, 0.225])])
preprocess_tr = transforms.Compose([transforms.Resize(256), transforms.RandomRotation(degrees=20),transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406],  std=[0.229, 0.224, 0.225])]) 

In [5]:
train_dataset = ImageFolder(path_train, preprocess_tr)
test_dataset = ImageFolder(path_test, preprocess)

In [6]:
train_dataset[0][0].shape

torch.Size([3, 224, 224])

In [7]:
len(train_dataset), len(test_dataset)

(6722, 840)

In [10]:
trainDataLoader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
testDataLoader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True)

In [11]:
next(iter(trainDataLoader))[0].size(), next(iter(trainDataLoader))[1].size()

(torch.Size([128, 3, 224, 224]), torch.Size([128]))

### Model

In [13]:
resnet34 = models.resnet34(weights=True)



Downloading: "https://download.pytorch.org/models/resnet34-b627a593.pth" to /Users/sergeybashkatov/.cache/torch/hub/checkpoints/resnet34-b627a593.pth


100%|██████████| 83.3M/83.3M [00:22<00:00, 3.83MB/s]


In [14]:
resnet34

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [15]:
class CustomFC(nn.Module):
    def __init__(self, in_features, out_features):
        super().__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.fc1 = nn.Linear(in_features=in_features, out_features=out_features)
        
    def forward(self, x):
        return self.fc1(x), x
    
resnet34.fc = CustomFC(512,8)

In [16]:
resnet34 = resnet34.to(device)

In [17]:
x = torch.randn(1, 3, 224, 224).to(device)
output = resnet34(x)
print(output[0].shape, output[1].shape)

torch.Size([1, 8]) torch.Size([1, 512])


In [18]:
x, l = next(iter(trainDataLoader))
x = x.to(device)
l = l.to(device)

In [19]:
loss = nn.CrossEntropyLoss()
output = resnet34(x)[0]

In [20]:
loss = loss(output, l)

In [21]:
loss

tensor(2.2542, device='mps:0', grad_fn=<NllLossBackward0>)

In [22]:
def training_loop(model, dataloader_train, dataloader_test, model_name, n_epoch = 2):
    nf = f'model_best_{model_name}.pt'
    best_vloss = 1e+10
    best_acc = 0.0
    bntr = len(dataloader_train)
    bntst = len(dataloader_test)
    learning_rate = 1e-2
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    loss_ce = nn.CrossEntropyLoss(reduction='mean')
    # n_epoch = 30

    for epoch in range(1, n_epoch+1):
        model.train()
        loss_train = 0.0
        acc_train = 0.0
        tot_train = 0.0
        for itms in dataloader_train:
            x=itms[0]
            target=itms[1]
            x = x.to(device)
            target = target.to(device)
            outputs, _ = model(x)
            loss = loss_ce(outputs, target)
            # print(loss)
            acc_train += torch.sum(outputs.argmax(dim=1) == target).item()
            tot_train += target.size(0)
            

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            loss_train += loss.item()
            
        acc_ratio_train = acc_train / tot_train
        
        model.eval()
        loss_val = 0.0
        acc_test = 0.0
        tot_test = 0.0

        with torch.no_grad():
            for itms in dataloader_test:
                x=itms[0]
                target=itms[1]
                x = x.to(device)
                target = target.to(device)
                outputs, _ = model(x)
                loss = loss_ce(outputs, target)
            
                loss_val += loss.item()
                acc_test += torch.sum(outputs.argmax(dim=1) == target).item()
                tot_test += target.size(0)
            acc_ratio_test = acc_test / tot_test
        #     
            if best_acc < acc_ratio_test:
                torch.save(model.state_dict(), nf)
                best_acc = acc_ratio_test
            # print(best_vloss, loss_val)


    
        if epoch == 1 or epoch%50 == 0:
            print(f'Epoch: {epoch}, Epoch train loss: {loss_train/bntr}, Acc train: {acc_ratio_train}')
            print(f'Epoch val loss: {loss_val/bntst}, Acc test: {acc_ratio_test}')
            # print(f'Accuracy: {correct/total*100}')
            
    return acc_ratio_test

In [23]:
training_loop(resnet34, trainDataLoader, testDataLoader, model_name = 'resnet34', n_epoch = 500)

Epoch: 1, Epoch train loss: 2.2269179461137303, Acc train: 0.1927997619756025
Epoch val loss: 2.102506927081517, Acc test: 0.21785714285714286
Epoch: 50, Epoch train loss: 0.11747530756412812, Acc train: 0.9590895566795596
Epoch val loss: 0.8553060378347125, Acc test: 0.7738095238095238
Epoch: 100, Epoch train loss: 0.044153313995954, Acc train: 0.9848259446593276
Epoch val loss: 0.5328957268169948, Acc test: 0.8857142857142857
Epoch: 150, Epoch train loss: 0.008336893125779857, Acc train: 0.9979172865218685
Epoch val loss: 0.47915858030319214, Acc test: 0.9226190476190477
Epoch: 200, Epoch train loss: 0.023536717110612202, Acc train: 0.9931567985718536
Epoch val loss: 0.4931401440075466, Acc test: 0.9035714285714286
Epoch: 250, Epoch train loss: 0.0014132950826298736, Acc train: 0.999702469503124
Epoch val loss: 0.3819089319024767, Acc test: 0.9357142857142857
Epoch: 300, Epoch train loss: 0.009201833434383175, Acc train: 0.995983338292175
Epoch val loss: 0.5911033281258175, Acc test:

0.9107142857142857

In [24]:
def cust_acc(model, dataloader_train, dataloader_test):
    model.eval()
    acc_train = 0.0
    tot_train = 0.0
    acc_test = 0.0
    tot_test = 0.0
    with torch.no_grad():
        for itms in dataloader_train:
            x=itms[0]
            target=itms[1]
            x = x.to(device)
            target = target.to(device)
            outputs, _ = model(x)
            acc_train += torch.sum(outputs.argmax(dim=1) == target).item()
            tot_train += target.size(0)
        print(f'Train accuracy: {acc_train/tot_train*100}%')

    
        for itms in dataloader_test:
            x=itms[0]
            target=itms[1]
            x = x.to(device)
            target = target.to(device)
            outputs, _ = model(x)
            acc_test += torch.sum(outputs.argmax(dim=1) == target).item()
            tot_test += target.size(0)
        print(f'Test accuracy: {acc_test/tot_test*100}%')
        
    return acc_train/tot_train*100, acc_test/tot_test*100

In [25]:
cust_acc(resnet34, trainDataLoader, testDataLoader)

Train accuracy: 99.6132103540613%
Test accuracy: 91.07142857142857%


(99.6132103540613, 91.07142857142857)

In [26]:
resnet34.load_state_dict(torch.load('model_best_resnet34.pt', map_location=device, weights_only=False))

<All keys matched successfully>

In [27]:
cust_acc(resnet34, trainDataLoader, testDataLoader)

Train accuracy: 99.95537042546862%
Test accuracy: 93.92857142857143%


(99.95537042546862, 93.92857142857143)

In [28]:
def encode_input(model, dataloader_train, dataloader_test):
    model.eval()

    with torch.no_grad():
        for i, itms in enumerate(dataloader_train):
            x=itms[0]
            target=itms[1]
            x = x.to(device)
            target = target.to(device)
            _ , encoded = model(x)
            # encoded = encoded.cpu().numpy()
            # print(encoded.shape)
            temp = encoded.cpu().numpy()
            if i == 0:
                output = temp.copy()
            else:
                output = np.concatenate((temp, output), axis=0)

    
        for itms in dataloader_test:
            x=itms[0]
            target=itms[1]
            x = x.to(device)
            target = target.to(device)
            _ , encoded = model(x)
            temp = encoded.cpu().numpy()
            output = np.concatenate((temp, output), axis=0)
        
    return output

In [29]:
encoded = encode_input(resnet34, trainDataLoader, testDataLoader)

In [30]:
encoded.shape

(7562, 512)

In [31]:
from datasets import Dataset, load_from_disk

In [32]:
Dataset.from_dict({"data": encoded}).with_format("torch").train_test_split(test_size=0.1).save_to_disk("./data/encoded34")

Saving the dataset (0/1 shards):   0%|          | 0/6805 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/757 [00:00<?, ? examples/s]

In [33]:
df = load_from_disk("./data/encoded34", keep_in_memory=False)

In [34]:
df

DatasetDict({
    train: Dataset({
        features: ['data'],
        num_rows: 6805
    })
    test: Dataset({
        features: ['data'],
        num_rows: 757
    })
})

In [35]:
del resnet34

Done with classification

In [63]:
resnet101 = models.resnet101(weights=True)
resnet101.fc = CustomFC(2048,8)
resnet101 = resnet101.to(device)

In [64]:
resnet101

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [66]:
training_loop(resnet101, trainDataLoader, testDataLoader, model_name = 'resnet101', n_epoch = 100)

35.814603328704834 35.814603328704834
Epoch: 1, Epoch train loss: 2.0029618357712367
Epoch val loss: 5.11637190410069


35.814603328704834