# Пробуем аугментацию 💩

In [1]:
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader
import torchvision
import numpy as np
import pandas as pd
import pickle
import itertools
from datetime import datetime
import locale
import sys
import os

from PIL import Image
locale.setlocale(locale.LC_ALL, '')
sys.path.insert(1, os.path.join(sys.path[0], '..'))

torch.manual_seed(42)
np.random.seed(42)

#### 🐘 <i><span style="color: #52C594">import local stuff</span></i>

In [2]:
from models import CNNClassifier
from utils2 import PlaneSet, build_dataset
from utils2 import configurate_xy_tensors  # wrap cuda and types stuff

#### Wrap a training loop in fn
---

In [3]:
def fit(model, epoch_num, train, optimizer, distance, test, train_dataset):
    loss_dict = {}
    accuracy_dict = {}
    loss_dict_test = {}
    accuracy_dict_train = {}
    
    for epoch in range(num_epochs):
        loss_train_accumulator = []
        correct_test = 0
        
        for ind, (x, y) in enumerate(train):
            x, y = configurate_xy_tensors(x, y)
            y_hat = model(x)
            optimizer.zero_grad()

            loss = distance(y_hat, y)
            diff = loss.item()

            loss.backward()
            optimizer.step()
            loss_train_accumulator.append(diff)
            
        #accuracy_dict_train[epoch+1] = predict_train(model, train_dataset)   
        loss_dict[epoch+1] = np.mean(loss_train_accumulator)
        #accuracy and loss of test_dataset
        correct_test, loss_test = predict(model, test, distance)
        accuracy_dict[epoch+1] = correct_test
        loss_dict_test[epoch+1] = loss_test 
        
    return model, loss_dict, accuracy_dict, loss_dict_test #, #accuracy_dict_train

In [4]:
def predict_train(model, train):
    correct = 0
    
    for x, y in iter(train):
        y = torch.tensor([y])
        x = x.unsqueeze(0)
        x, y = configurate_xy_tensors(x, y)
        y_hat = model.predict(x)
        outputs = (y_hat>0.5).to(torch.float32)
        correct += (outputs == y).float().sum() 
        
    return correct / (len(train) * y.shape[1])

#### Wrap a testing loop in fn
---

In [5]:
def predict(model, test, distance):
    correct = 0
    loss_vals = []
    
    for x, y in iter(test):
        y = torch.tensor([y])
        x = x.unsqueeze(0)
        x, y = configurate_xy_tensors(x, y)
        y_hat = model.predict(x)
        loss = distance(y_hat, y).item()
        loss_vals.append(loss)
        
        outputs = (y_hat>0.5).to(torch.float32)
        correct += (outputs == y).float().sum() 
        
    return (correct / (len(test) * y.shape[1]), np.mean(loss_vals))


#### Run one experiment as function
---

In [6]:
def run_experiment(augmentation_compose, ModelClass,
                   distance, num_epochs, df, images_path):
    model = ModelClass()
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device=device)
    
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    msk = np.random.rand(len(df)) < 0.8
    
    train_df = df[msk]
    test_df = df[~msk]
    train_dataset = build_dataset(train_df, images_path, augmentation_compose)
    test_dataset = build_dataset(test_df, images_path, None)
    
    train = DataLoader(train_dataset, batch_size=100, shuffle=True)
    test  = DataLoader(test_dataset, batch_size=100, shuffle=True)
    model, loss_dict, accuracy_test, loss_test = fit(model, num_epochs, train, optimizer, distance, test_dataset, train_dataset)
    return {augmentation_compose: {'loss_train': loss_dict, 'loss_test': loss_test, 'accuracy_test': accuracy_test}}

#### Build combinations from aug list
---

In [7]:
aug_list = [
    torchvision.transforms.RandomHorizontalFlip(),
    torchvision.transforms.RandomVerticalFlip(),
    torchvision.transforms.RandomResizedCrop(size=(20,20)),
    torchvision.transforms.RandomRotation(degrees=(15,95)),
    torchvision.transforms.RandomGrayscale(p=0.1),
    torchvision.transforms.RandomInvert(p=0.4),
    torchvision.transforms.RandomSolarize(threshold=192.0),
    torchvision.transforms.RandomAdjustSharpness(2),
    torchvision.transforms.RandomAutocontrast(),
]

In [8]:
all_combinations = list(itertools.combinations(aug_list, 3))
all_combinations = [torchvision.transforms.Compose(i) for i in all_combinations]
combinations = all_combinations[:1]

#### Run all experiments

In [9]:
%%time
distance =  nn.BCELoss()
num_epochs = 1

csv_path = r"C:\Users\isaev\Desktop\all_cups/train.csv"
images_path = r"C:\Users\isaev\Desktop\all_cups\avia-train/"

with open(csv_path, "r") as file:
    data = pd.read_csv(file)

results = {}

for ind, aug in enumerate(combinations):
    print(f"combination {ind+1}/{len(combinations)}")
    experiment_result = run_experiment(
        aug, CNNClassifier, distance,
        num_epochs, data, images_path
    )
    
    results.update(experiment_result)

dump_name = "..augmentations_results" + datetime.now().strftime("%H_%M_%S")
with open(dump_name, "wb") as bfile:
    pickle.dump(results, bfile) 

combination 1/1


  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


Wall time: 27.1 s


In [10]:
results

{Compose(
     RandomHorizontalFlip(p=0.5)
     RandomVerticalFlip(p=0.5)
     RandomResizedCrop(size=(20, 20), scale=(0.08, 1.0), ratio=(0.75, 1.3333), interpolation=bilinear)
 ): {'loss_train': {1: 0.483103079369748},
  'loss_test': {1: 0.37451652531716656},
  'accuracy_test': {1: tensor(0.8437, device='cuda:0')}}}