In [70]:
import torch
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
import torch.nn as nn
import torchvision.models as models

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

LR=0.001
EPOCHS=10

In [2]:
device

device(type='cpu')

In [71]:
transform = transforms.Compose([
    transforms.Resize((1280, 720)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor()
])

train = ImageFolder(root='./train', transform=transform)

train_dl = DataLoader(train, batch_size=512, shuffle=True)

In [72]:
train_dl.dataset

Dataset ImageFolder
    Number of datapoints: 11288
    Root location: ./train
    StandardTransform
Transform: Compose(
               Resize(size=(256, 256), interpolation=bilinear, max_size=None, antialias=warn)
               RandomHorizontalFlip(p=0.5)
               ToTensor()
           )

In [81]:
class ResNetModel(nn.Module):
  def __init__(self):
    super().__init__()
    self.resnet = models.resnet50(pretrained=True)
    for param in self.resnet.parameters():
      param.requires_grad = False

    self.resnet.fc = nn.Sequential(
        nn.Linear(2048, 163),
    )
  
  def forward(self, x):
    return self.resnet(x)

In [83]:
def train_function(model, train_dataloader, loss_fn, device):
    train_losses = []
    train_accs = []
    optimizer = torch.optim.SGD(model.parameters(), lr=LR, momentum=0.8)

    for epoch in range(EPOCHS):
        train_loss = 0.0
        train_total = 0
        train_correct = 0
        c = 0
        model.train()
        for images, labels in train_dataloader:
          images, labels = images.to(device), labels.to(device)
          optimizer.zero_grad()
          outputs = model(images)
          loss = loss_fn(outputs.squeeze(1), labels)
          loss.backward()
          optimizer.step()

          train_loss += loss.item() * images.size(0)
          train_total += labels.size(0)
          _, predicted = torch.max(outputs.data, 1)
          train_correct += (predicted == labels).sum().item()
          c += 512

        train_losses.append(train_loss / len(train_dataloader.dataset))
        train_accs.append(train_correct / train_total)
        
        print('Epoch [{}/{}], Train Loss: {:.4f}, Train Acc: {:.4f}'.format(epoch+1, EPOCHS, train_losses[-1], train_accs[-1]))

    return train_losses, train_accs

In [84]:
resnet_model = ResNetModel().to(device)
loss_fn = nn.CrossEntropyLoss()

In [85]:
resnet_train_losses, resnet_val_losses, resnet_train_accs, resnet_val_accs = train_function(model = resnet_model, 
                                                                                            train_dataloader=train_dl,
                                                                                            loss_fn=loss_fn,
                                                                                            device=device)



Epoch [1/10], Train Loss: 5.0558, Train Acc: 0.0153
Epoch [2/10], Train Loss: 4.7892, Train Acc: 0.0774
Epoch [3/10], Train Loss: 4.5456, Train Acc: 0.1184
Epoch [4/10], Train Loss: 4.3208, Train Acc: 0.1799
Epoch [5/10], Train Loss: 4.1402, Train Acc: 0.2061
Epoch [6/10], Train Loss: 3.9640, Train Acc: 0.2522
Epoch [7/10], Train Loss: 3.8134, Train Acc: 0.2814
Epoch [8/10], Train Loss: 3.6881, Train Acc: 0.2965
Epoch [9/10], Train Loss: 3.5578, Train Acc: 0.3244
Epoch [10/10], Train Loss: 3.4593, Train Acc: 0.3327


ValueError: not enough values to unpack (expected 4, got 2)

In [115]:
from PIL import Image
import numpy as np

ren = Image.open('tmp.png').convert('RGB')
ren_np = np.asarray(ren)
ren_tensor = torch.from_numpy(ren_np)
ren_tensor = ren_tensor.reshape((1, 3, 256, 256))
ren_tensor = ren_tensor.to(torch.float32)

In [121]:
resnet_model.eval()
sorted, indices = resnet_model(ren_tensor).sort()

In [122]:
sorted

tensor([[-1111.8333, -1083.3915,  -887.9575,  -855.7018,  -834.7906,  -821.1781,
          -804.3804,  -768.9105,  -742.4035,  -728.5645,  -727.6241,  -697.8710,
          -686.5995,  -641.8184,  -639.1135,  -605.5400,  -604.9763,  -603.2915,
          -562.9579,  -559.4136,  -558.9102,  -555.8726,  -525.0624,  -479.3495,
          -449.9140,  -430.5915,  -418.9095,  -399.6167,  -387.9197,  -385.7477,
          -364.0735,  -353.8752,  -353.2198,  -350.8846,  -344.7565,  -338.6640,
          -335.6896,  -335.0277,  -327.2323,  -325.1606,  -319.2720,  -284.2197,
          -262.7811,  -261.3235,  -254.4208,  -237.6355,  -228.4844,  -224.8656,
          -221.6569,  -219.9539,  -211.6557,  -209.8185,  -194.2786,  -191.7908,
          -187.5765,  -180.6824,  -180.2841,  -179.4511,  -173.8574,  -167.7697,
          -163.0137,  -157.5823,  -150.6598,  -130.4195,  -116.5588,  -102.6623,
          -100.2419,   -94.4267,   -89.4180,   -88.6754,   -87.7941,   -84.4773,
           -68.4751,   -66.1

In [126]:
indices[0]

tensor([ 95,  34,  14, 107, 106,  28, 160,  26,  44,  23,  74,  33, 138,  93,
        141,  83, 144, 102, 140, 145,  64,  17, 117,  55,  69,  43, 116, 157,
        118, 127,   0,  42, 122,   7,  92, 150,  58, 119,  97,  45,   4, 124,
        162, 103,  51,  48, 142,  89,  71,  75,  72, 133,  32,  94,  50, 151,
          9,  66,  63,  57, 115, 147,  62, 146,   5, 130, 131, 108, 125,  99,
         65,  52, 152,  84,  36,   3, 109, 111,  70,   8, 110,  39,  86,  25,
        121,  88,  91,  73,  13, 123, 128,  11,  29,  24,  40, 134, 158,  10,
        153,   2, 120,  67,  90, 135,  59,  41,  31,  49, 154, 100, 156,  30,
         96,  80, 159, 104, 112,  79,  87, 101,  56,  20,  12,  98,  77,  38,
         47,  37,  16,  78, 126,  46, 143,  60, 161,  81,  85,  35,  22,  68,
        137, 132,   1, 129,   6, 139,  82, 105,  53,  21, 114,  18,  76, 113,
        149,  61, 136,  27,  54, 148,  19, 155,  15])

In [129]:
train_dl.dataset.find_classes('./train')[0][148]

'Фиора'