In [1]:
# Utils
from classification.model import *

## Зафиксируем **seed**

In [2]:
set_all_seeds()

## Data

### **Loading** - **Transformation** - **Augmentation**

In [10]:
from PIL import Image
from torchvision import transforms

loader = lambda image_path: Image.open(image_path).convert("RGB")

transform = transforms.Compose([
    transforms.Resize(image_size),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std),
    lambda image: image.to(device)
])

augmentation = transforms.Compose([
    
])

### Preparation

In [4]:
classes = list()

In [8]:
data = list()
labels = list()

### **Split**

In [None]:
from sklearn.model_selection import train_test_split

train_data, valid_data, train_labels, valid_labels = train_test_split(data, labels, test_size=0.25, random_state=42) # stratify=labels

### Create **Datasets**

In [None]:
dataset = ClassificationDataset(data, labels, transform=lambda data: transform(loader(data)))

train_set = ClassificationDataset(train_data, train_labels, transform=lambda data: transform(augmentation(loader(data))))
valid_set = ClassificationDataset(valid_data, valid_labels, transform=lambda data: transform(loader(data)))

### Create **DataLoader**

In [None]:
batch_size = 24

train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True)

### ***Visualization***

In [None]:
show_images(dataset, classes=classes)

## **Models**

In [11]:
from torchvision import models
from transformers import AutoModelForImageClassification

from torch_lr_finder import LRFinder

In [None]:
class Out2Logits(nn.Module):
    def __init__(self, model):
        super().__init__()
        self.model = model

    def forward(self, x):
        return self.model(x).logits

    def __getattr__(self, name):
        if name == 'model':
            return super().__getattr__('model')
        return getattr(self.model, name)
    
    def __setattr__(self, name, value):
        if name == 'model':
            super().__setattr__(name, value)
        else:
            setattr(self.model, name, value)

### *Score*

In [None]:
scores = dict()

### **Model**: `google/vit-base-patch16-224`

In [None]:
tmp_image_size = (224, 224)
transform.transforms[0] = transforms.Resize(tmp_image_size)

In [None]:
model = Out2Logits(AutoModelForImageClassification.from_pretrained("google/vit-base-patch16-224"))
model.classifier = nn.Linear(model.classifier.in_features, len(classes))
model.to(device);

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-7)

lr_finder = LRFinder(model, optimizer, loss_fn, device=device)
lr_finder.range_test(train_loader, end_lr=1, num_iter=100)
lr_finder.plot()
lr_finder.reset()

In [None]:
optimizer = optim.Adam(model.parameters(), lr=5e-5)

model_wrapped = Classifier(model, "Google-VitBase", optimizer)

In [None]:
model_wrapped.fit(train_loader, valid_loader, 15)

In [None]:
scores[model_wrapped.best_score] = model_wrapped

### **Model**: `EfficientNet_B0`

In [None]:
transform.transforms[0] = transforms.Resize(image_size)

In [None]:
model = models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.IMAGENET1K_V1)
model.classifier[-1] = nn.Linear(model.classifier[-1].in_features, len(classes))

model_wrapped = Classifier(model, "EfficientNet_B0")

In [None]:
model_wrapped.fit(train_loader, valid_loader, 10)

In [None]:
scores[model_wrapped.best_score] = model_wrapped

## Result

In [None]:
best_model_wrapped = scores[max(scores)]
best_model_wrapped.name

In [None]:
n = 3
fig_image_size = 5

fig, axes = plt.subplots(n, 1, figsize=(fig_image_size, fig_image_size * n))

for i, idx in enumerate(random.sample(range(len(valid_set)), n)):
    image, label = valid_set[idx]
    prediction = best_model_wrapped.predict(image)

    ax = axes[i]
    ax.imshow(denormalize(image).cpu().numpy().transpose(1, 2, 0))
    ax.axis('off')
    ax.set_title(f"Class: {classes[label]}\nPredict: {classes[prediction]}", fontsize=10)

plt.tight_layout()
plt.show()

## Submission

In [None]:
test_data = list()
test_set = Dataset(test_data, transform=lambda data: transform(loader(data)))

In [None]:
predict_class_id = best_model_wrapped.predict(test_set)
predict_class_names = [classes[class_id] for class_id in predict_class_id]