# Пробуем аугментацию 💩

In [1]:
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader
import torchvision
import numpy as np
import pandas as pd
import pickle
import itertools
from datetime import datetime
import locale
import sys
import os

locale.setlocale(locale.LC_ALL, '')
sys.path.insert(1, os.path.join(sys.path[0], '..'))

torch.manual_seed(42)
np.random.seed(42)

#### 🐘 <i><span style="color: #52C594">import local stuff</span></i>

In [2]:
from models import CNNClassifier
from utils import PlaneSet, build_dataset
from utils import configurate_xy_tensors  # wrap cuda and types stuff

#### Wrap a training loop in fn
---

In [3]:
def fit(model, epoch_num, train, optimizer, distance):
    loss_dict = {}
    
    for epoch in range(num_epochs):
        loss_accumulator = []

        for ind, (x, y) in enumerate(train):
            x, y = configurate_xy_tensors(x, y)
            y_hat = model(x)
            optimizer.zero_grad()

            loss = distance(y_hat, y)
            diff = loss.item()

            loss.backward()
            optimizer.step()
            loss_accumulator.append(diff)
            
        loss_dict[epoch] = np.mean(loss_accumulator)
    
    return model, loss_dict

#### Wrap a testing loop in fn
---

In [4]:
def predict(model, test):
    correct = 0

    for x, y in test:
        x, y = configurate_xy_tensors(x, y)
        y_hat = model.predict(x)
        outputs = (y_hat>0.5).to(torch.float32)
        correct += (outputs == y).float().sum()
        
        return correct / len(test)

#### Run one experiment as function
---

In [5]:
def run_experiment(augmentation_compose, ModelClass,
                   distance, num_epochs, df, images_path):
    model = ModelClass()
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device=device)
    
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    
    msk = np.random.rand(len(df)) < 0.8
    train_df = df[msk]
    test_df = df[~msk]
    
    train_dataset = build_dataset(train_df, images_path, augmentation_compose)
    test_dataset = build_dataset(test_df, images_path, None)
    train = DataLoader(train_dataset, batch_size=100, shuffle=True)
    test  = DataLoader(test_dataset, batch_size=100, shuffle=True)
    
    model, loss_dict = fit(model, num_epochs, train, optimizer, distance)
    accuracy = predict(model, test)
    
    return {augmentation_compose: (loss_dict, accuracy)}

#### Build combinations from aug list
---

In [6]:
aug_list = [
    torchvision.transforms.RandomHorizontalFlip(),
    torchvision.transforms.RandomVerticalFlip(),
    torchvision.transforms.RandomResizedCrop(size=(20,20)),
    torchvision.transforms.RandomRotation(degrees=(15,95)),
    torchvision.transforms.RandomGrayscale(p=0.1),
    torchvision.transforms.RandomInvert(p=0.4),
    torchvision.transforms.RandomPosterize(bits=2),
    torchvision.transforms.RandomSolarize(threshold=192.0),
    torchvision.transforms.RandomAdjustSharpness(sharpness_factor=2),
    torchvision.transforms.RandomAutocontrast(),
]

In [18]:
epoch_num = 20
time = (epoch_num / 10) * 308 / 3600
combinations = list(itertools.combinations(aug_list, 3))

print(
    f"колво пар: {len(combinations)}\nчасов: {len(combinations) * time}"
)

колво пар: 120
часов: 20.53333333333333


In [8]:
combinations = [torchvision.transforms.Compose(i) for i in combinations]

In [9]:
combinations[:1]

[Compose(
     RandomHorizontalFlip(p=0.5)
     RandomVerticalFlip(p=0.5)
     RandomResizedCrop(size=(20, 20), scale=(0.08, 1.0), ratio=(0.75, 1.3333), interpolation=bilinear)
 )]

#### Run all experiments

In [15]:
%%time
distance =  nn.BCELoss()
num_epochs = 5

csv_path = "../train"
images_path = "../avia-train/"

with open(csv_path, "r") as file:
    data = pd.read_csv(file)

results = {}

for ind, aug in enumerate(combinations[1:2]):
    print(f"combination {ind+1}/{len(combinations)}")
    experiment_result = run_experiment(
        aug, CNNClassifier, distance,
        num_epochs, data, images_path
    )
    
    results.update(experiment_result)

dump_name = "../augmentations_results/" + datetime.now().strftime("%H_%M_%S")
with open(dump_name, "wb") as bfile:
    pickle.dump(results, bfile)

combination 1/120
Wall time: 2min 34s


# Дальше

сравниваете чиселки акьюрасис
рисуете графики лосов с подпиской легенды где какой компоуз

значение лосса <> номер эпохи

если успеете то по эпохам сделаете аккьюраси

загрузите какую-нибудь фоточку PILом и захуячьте на ней шарпнесс, соларайз, контраст и тд и визуально подкорректируйте параметры если будет необходимо.

<span style="color:red"> исправьте ошибку почему аккьюраси больше единицы было норм </span>

In [16]:
results

{Compose(
     RandomHorizontalFlip(p=0.5)
     RandomVerticalFlip(p=0.5)
     RandomRotation(degrees=[15.0, 95.0], interpolation=nearest, expand=False, fill=0)
 ): ({0: 0.49725378584861757,
   1: 0.31887596797943113,
   2: 0.2606424463391304,
   3: 0.2284966432750225,
   4: 0.2145057860761881},
  tensor(1.4839, device='cuda:0'))}