In [None]:
# Импорт библиотек
import sys
sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master')
import timm

In [None]:
import os
import torch
import numpy as np
import matplotlib.pyplot as plt
import pytorch_lightning as pl
from pytorch_lightning.plugins import DDPPlugin
import warnings
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from torchmetrics import Accuracy
from sklearn.model_selection import train_test_split
import albumentations as A
import cv2
import torch.nn as nn

# warnings.filterwarnings("ignore")

In [None]:
config = {
    'model_name': 'tf_mobilenetv3_small_minimal_100',
    'train_image_folder': '../input/resized-plant2021/img_sz_256',
    'test_image_folder': '../input/plant-pathology-2021-fgvc8/test_images',
    # 'train_dataframe': '../input/sorghum-id-fgvc-9/train_cultivar_mapping.csv',
    'train_dataframe': '../input/plant-pathology-2021-fgvc8/train.csv',
    'test_dataframe': '../input/plant-pathology-2021-fgvc8/sample_submission.csv',
    'lable_column': 'labels',
    'batch_size': 120,
    'size': (256, 256),
    'train': True,
}

In [None]:
# Аугментация
def get_augmentations(augmentation):
    
    w, h = config['size'][0], config['size'][1]
    if augmentation == 'base':
        transform = [
        A.Resize(w, h, p=1),
        A.Transpose(p=0.5),
        A.VerticalFlip(p=0.5),
        A.HorizontalFlip(p=0.5),
        A.ShiftScaleRotate(p=0.5),
        A.Cutout(max_h_size=int(h * 0.3), max_w_size=int(w * 0.3), num_holes=1, p=0.5),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),  
        ]      
    else:
        transform = [
        A.Resize(w, h, p=1),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),     
        ] 
    return A.Compose(transform)

In [None]:
class SorghumDataset(Dataset):
    def __init__(self, df, test, augmentations):
        if test:
            self.image_paths = df['image'].to_list()
            self.targets = np.zeros(len(df))
        else:
            self.image_paths = df['image'].to_list()
            self.targets = df['id'].to_list()
            
        self.augmentations = get_augmentations(augmentations)
        
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, item):
        
        image = cv2.imread(self.image_paths[item])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        if self.augmentations is not None:
            augmented = self.augmentations(image=image)
            image = augmented["image"]
            
        image = np.transpose(image, (2, 0, 1)).astype(np.float32)
        targets = self.targets[item]
        
        return torch.tensor(image), torch.tensor(targets)

In [None]:
class LoaderGenerator:
    def __init__(self, config):
        self.batch_size = config['batch_size']
        self.train_image_folder = config['train_image_folder']
        self.test_image_folder = config['test_image_folder']
        
        self.train_dataframe = pd.read_csv(config['train_dataframe'])
        self.test_dataframe = pd.read_csv(config['test_dataframe'])
        
        self.lable = config['lable_column']
        
        self.train_dataframe = self.label_encoder(self.train_dataframe)
        self.class_map = dict(sorted(self.train_dataframe[['id', self.lable]].values.tolist()))
        self.train_dataframe = self.train_dataframe[['image', 'id']]
        
        self.train_val_prepare()
        self.test_prepare()
        
    def label_encoder(self, df):
        from sklearn import preprocessing
        le = preprocessing.LabelEncoder()
        le.fit(df[self.lable])
        df['id'] = le.transform(df[self.lable])        
        return df
                
    def train_val_prepare(self):
        self.train_dataframe['image'] =  self.train_image_folder + '/' + self.train_dataframe['image']
        self.train_dataframe, self.val_dataframe = train_test_split(self.train_dataframe, test_size=0.3, stratify=self.train_dataframe['id'], shuffle=True, random_state=107)
    
    def test_prepare(self):
        self.test_dataframe = self.test_dataframe.rename(columns={"filename": "image"})
        self.test_dataframe['image'] =  self.test_image_folder + '/' + self.test_dataframe['image']
        
    def loader(self, df, test, augmentations, batch_size, drop_last, shuffle):
        dataset = SorghumDataset(df, test, augmentations)
        loader = DataLoader(dataset, batch_size=batch_size, num_workers=2, drop_last=drop_last, shuffle=shuffle)
        return loader

    def get_loaders(self, aug):
        train_loader = self.loader(df=self.train_dataframe, test=False,
                                   augmentations=aug, batch_size=self.batch_size, drop_last=False, shuffle=True)
        val_loader = self.loader(df=self.val_dataframe, test=False,
                                 augmentations=False, batch_size=self.batch_size, drop_last=False, shuffle=False)
        test_loader = self.loader(df=self.test_dataframe, test=True,
                                 augmentations=False, batch_size=self.batch_size, drop_last=False, shuffle=False)
        return train_loader, val_loader, test_loader, self.class_map

In [None]:
show_config = {
    'model_name': 'tf_mobilenetv3_small_minimal_100',
    'train_image_folder': '../input/resized-plant2021/img_sz_256',
    'test_image_folder': '../input/plant-pathology-2021-fgvc8/test_images',
    # 'train_dataframe': '../input/sorghum-id-fgvc-9/train_cultivar_mapping.csv',
    'train_dataframe': '../input/plant-pathology-2021-fgvc8/train.csv',
    'test_dataframe': '../input/plant-pathology-2021-fgvc8/sample_submission.csv',
    'lable_column': 'labels',
    'batch_size': 1,
    'size': (256, 256),
    'train': True,
}

#Show Augmentation
%matplotlib inline
plt.rcParams['figure.figsize'] = (20,20)

loader_generator = LoaderGenerator(show_config)
train_loader, _, _, _ = loader_generator.get_loaders('base')
  
for num, (image, label) in enumerate(train_loader):
    image = image.cpu()[0]
    label = label.cpu()[0]
    plt.subplot(5, 5, num+1)
    plt.title(f'label: {label}')
    plt.imshow(image.permute(1,2,0) * np.array([0.229, 0.224, 0.225]) + np.array([0.485, 0.456, 0.406]))
    if num == 24:
        break

In [None]:
from torch.optim import Adamax

def get_optimizer(name, parameters, learning_rate):
    if name == 'Adamax':
        optimizer = Adamax(parameters, lr=learning_rate)
    return optimizer

In [None]:
from torch.nn import CrossEntropyLoss

def get_loss(name):
    if name == 'CrossEntropyLoss':
        loss = CrossEntropyLoss()
    return loss

In [None]:
class CustomModel(nn.Module):
    def __init__(self, model_name, pretrained=False, num_classes=100):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained)
        print('classifier', num_classes)
        self.model.classifier = nn.Linear(self.model.classifier.in_features, num_classes)
        # self.model.head = nn.Linear(self.model.head.in_features, feature_num)
    def forward(self, x):
        x = self.model(x)
        return x

In [None]:
class Model(pl.LightningModule):
    def __init__(self, config, learning_rate, name_loss, name_optimizer, aug):
        super(Model, self).__init__()
        self.aug = aug
        self.loader_generator = LoaderGenerator(config)
        self.train_loader, self.val_loader, self.test_loader, self.class_map = self.loader_generator.get_loaders(self.aug)
        self.model_name = config['model_name']
        self.name_optimizer = name_optimizer
        self.name_loss = name_loss
        self.learning_rate = learning_rate
        self.n_classes = len(self.class_map)
        self.load_model()
        self.loss = get_loss(name_loss)
        self.metric_train = Accuracy()
        self.metric_val = Accuracy()
        self.result = []
        self.submission = pd.read_csv(config['test_dataframe'])

    def load_model(self):
        # self.model = timm.create_model(self.model_name, pretrained=True)
        self.model = CustomModel(self.model_name, pretrained=False, num_classes=self.n_classes)
        # self.model = torch.load('model.pth', map_location='cuda')
        
    def forward(self, x):
        x = self.model(x)
        return x
    
    def log_scores(self, scores, status, score_name):
        self.log(f'{status}_{score_name}', scores, on_step=False, on_epoch=True,
                 prog_bar=True, logger=True, sync_dist=True)
        
    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = self.loss(logits, y)
        self.log('train_loss', loss, on_step=False, on_epoch=True, prog_bar=False, logger=True)
        preds_class = torch.argmax(logits, 1)
        self.metric_train.update(preds_class, y)
        return loss

    def training_epoch_end(self, outputs):
        acc = self.metric_train.compute()
        self.log_scores(acc, status='train', score_name='acc')

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = self.loss(logits, y)
        self.log('val_loss', loss, on_step=False, on_epoch=True, prog_bar=False, logger=True, sync_dist=True)
        preds_class = torch.argmax(logits, 1)
        self.metric_val.update(preds_class, y)
        return loss

    def validation_epoch_end(self, outputs):
        acc = self.metric_val.compute()
        self.log_scores(acc, status='val', score_name='acc')
        torch.save(self.model, 'model.pth')

    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        preds_class = torch.argmax(logits, 1)
        self.result.extend(preds_class.tolist())

    def test_epoch_end(self, outputs):
        self.submission['cultivar'] = self.result
        self.submission['cultivar'] = self.submission['cultivar'].map(self.class_map)
        self.submission.to_csv('submission.csv', index=False)

    def configure_optimizers(self):
        optimizer = get_optimizer(name=self.name_optimizer, parameters=self.parameters(),
                            learning_rate=self.learning_rate)
        lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, patience=3)
        scheduler = {'scheduler': lr_scheduler, 'reduce_on_plateau': True,
                     'monitor': 'val_loss', 'name': 'lr_value'}
        return [optimizer], [scheduler]

    def train_dataloader(self):
        return self.train_loader

    def val_dataloader(self):
        return self.val_loader
    
    def test_dataloader(self):
        return self.test_loader

In [None]:
def get_callbacks():
    lr_callback = pl.callbacks.LearningRateMonitor(logging_interval='epoch')
    bar_callback = pl.callbacks.TQDMProgressBar(refresh_rate=1, process_position=0)
    checkpoint_callback = pl.callbacks.ModelCheckpoint(monitor='val_acc',
                                                       filename='{epoch:02d}-{val_acc:.3f}',
                                                       save_top_k=1,
                                                       mode='max',
                                                       every_n_epochs=1)
    callbacks = [lr_callback, bar_callback, checkpoint_callback]
    return callbacks

In [None]:
trainer = pl.Trainer(
    benchmark=True,
    sync_batchnorm=True,
    gpus=-1,
    num_nodes=1,
    log_every_n_steps=1,
    max_epochs=2,
    num_sanity_val_steps=0,
    callbacks=get_callbacks(),
)   

if config['train']:
    model = Model(config=config, learning_rate=0.001, name_loss='CrossEntropyLoss', name_optimizer='Adamax', aug = 'base')
    trainer.fit(model)
else:
    model = Model.load_from_checkpoint('../input/sorghum-model/epoch191-step19775.ckpt', config=config, learning_rate=0.001, name_loss='CrossEntropyLoss', name_optimizer='Adamax', aug = 'test')
    trainer.test(model)