In [None]:
import numpy as np
import pandas as pd

In [None]:
train_df = pd.read_csv('../input/plant-pathology-2021-fgvc8/train.csv').sample(frac=1, random_state=666)#грузим датасет с котором метки категориальные и имя файла(картинки)
train_df['path'] =  train_df['image'].apply(lambda x: '../input/plant2021-downscaled-images-dataset/' + x)#создаем третью колонку в датасете с полным путем к файлу
train_df.head(5)

In [None]:
from sklearn.preprocessing import LabelEncoder

label = LabelEncoder()
label.fit(train_df['labels'])
train_df['label_id'] = label.transform(train_df['labels'])
label_dic = dict(sorted(train_df[['label_id', 'labels']].values.tolist())) #save for submission# сохранили в словарь что бы потом при сабмите понимать где какая метка(всего 12 меток)
print(label_dic)
classes = len(train_df['labels'].value_counts()) #12

del train_df['labels'] 

image_labels = np.array(train_df['label_id'].values)#[ 9  6  9  9  3  9  3 10  6  6]
image_list = np.array(train_df['path'].values)#path to img

print(image_list.shape) #18632
print(image_labels[:10])

In [None]:

#train_df.groupby('label_id').size()
cls_weight = list((1.0001/(train_df.groupby('label_id').size() / 4826)).values)


cls_weight

display(train_df)
cls_weight
train_df.groupby('label_id').size()
(train_df.groupby('label_id').size() / 4826)

In [None]:
!apt install ../input/pyturbojpeg/libturbojpeg_1.4.2-0ubuntu3.4_amd64.deb
!pip install ../input/pyturbojpeg/PyTurboJPEG-1.4.1

In [None]:
import matplotlib.pyplot as plt
import albumentations as A
import cv2, torch
import torchvision.transforms as transforms
from tqdm.notebook import tqdm
from torch.utils.data import Dataset, DataLoader
from turbojpeg import TurboJPEG

device = torch.device('cuda' if torch.cuda.is_available else 'cpu')

#######################################

from albumentations.pytorch import ToTensor

def get_training_augmentation():
    
    augmentation_pipeline = A.Compose(
        [
            A.SmallestMaxSize(224),
            A.RandomCrop(224, 224),
            A.RandomContrast(), 
            A.OneOf(
                [
                    A.RandomGamma(), 
                    A.RandomBrightness(), 
                ],
                p = 0.2
            ),
            A.OneOf(
                [
                    A.GaussNoise(),
                    A.RandomContrast(),
                    A.RandomGamma(),
                    A.Rotate(limit=60), 
                    A.MotionBlur(blur_limit=20)
                ],
                p = 0.2
            ),
            A.OneOf(
                [
                    A.Rotate(limit=360),
                    A.Flip(p=0.2),                
                ],
                p = 0.2
            ),            
            A.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225]
                ),
            ToTensor() 
        ],
        p = 1
    )
    return lambda img:augmentation_pipeline(image=np.array(img))['image']



def transform_valid():
    
    augmentation_pipeline = A.Compose(
        [
            A.SmallestMaxSize(224),
            A.RandomCrop(224, 224),
            A.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225]
                ),
            ToTensor() 
        ],
        p = 1
    )
    return lambda img:augmentation_pipeline(image=np.array(img))['image']

######################################

jpeg_reader = TurboJPEG()

def read_img(img):
    with open(img, "rb") as f:
        return jpeg_reader.decode(f.read(), 0) 
    

class dataset(Dataset) :
    def __init__(self, image_list, image_labels, transform, device) :
        self.image_list = image_list
        self.image_labels = image_labels
        self.transform = transform
    
    def __len__(self) :
        return len(self.image_list)
    
    def __getitem__(self, index) :
        x = read_img(self.image_list[index])
        x = self.transform(x).to(device)
        
        y = self.image_labels[index]
        y = torch.LongTensor([y,]).to(device)
        
        return x, y


train_data = dataset(image_list[:15000], image_labels[:15000], get_training_augmentation(), device)

print(len(train_data))

train_data = DataLoader(train_data, batch_size = 15, shuffle = True)

##########
# validation loader
valid_data = dataset(image_list[15000:], image_labels[15000:], transform_valid(), device)
print(len(valid_data))
valid_data = DataLoader(valid_data, batch_size = 15, shuffle = True)
########

In [None]:
dataloaders = {
    'train': train_data , 
    'val': valid_data
}

dataset_sizes = {
    'train': 15000, 
    'val': 3632
}

In [None]:
def gallery(array, ncols=3):
    nindex, height, width, intensity = array.shape
    nrows = nindex//ncols
    assert nindex == nrows * ncols
    result = (array.reshape(nrows, ncols, height, width, intensity)
              .swapaxes(1, 2)
              .reshape(height*nrows, width*ncols, intensity))
    return result

In [None]:
image = read_img('../input/plant2021-downscaled-images-dataset/800113bb65efe69e.jpg')
images_aug = np.array([(get_training_augmentation()(image)).permute((1,2,0)).numpy() for _ in range(25)])

plt.figure(figsize=(10,10))
plt.axis('off')
plt.imshow(gallery(images_aug, ncols=5))
plt.title('Augmentation pipeline examples')

In [None]:
import sys

sys.path.append("../input/efficientnet-pytorch/EfficientNet-PyTorch/EfficientNet-PyTorch-master")

from efficientnet_pytorch import model as enet

In [None]:
get_training_augmentation()(image).numpy().reshape(1,3,224,224).shape

In [None]:
model = enet.EfficientNet.from_name('efficientnet-b7')

model.load_state_dict(torch.load('../input/efficientnet-pytorch/efficientnet-b7-dcc49843.pth'))

In [None]:
model._fc

In [None]:
import torch.nn as nn

class FocalLoss(nn.Module):
    """
    The focal loss for fighting against class-imbalance
    """
    def __init__(self, alpha=1, gamma=2):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = 1e-12  # prevent training from Nan-loss error
        self.cls_weights = torch.tensor([cls_weight],dtype=torch.float, requires_grad=False, device=device)

    def forward(self, logits, target):
        """
        logits & target should be tensors with shape [batch_size, num_classes]
        """
        probs = torch.sigmoid(logits)
        one_subtract_probs = 1.0 - probs
        # add epsilon
        probs_new = probs + self.epsilon
        one_subtract_probs_new = one_subtract_probs + self.epsilon
        # calculate focal loss
        log_pt = target * torch.log(probs_new) + (1.0 - target) * torch.log(one_subtract_probs_new)
        pt = torch.exp(log_pt)
        focal_loss = -1.0 * (self.alpha * (1 - pt) ** self.gamma) * log_pt
        focal_loss = focal_loss * self.cls_weights
        return torch.mean(focal_loss)

In [None]:
class F1_Loss(nn.Module):

    def __init__(self, epsilon=1e-7):
        super().__init__()
        self.epsilon = epsilon
        
    def forward(self, y_pred, y_true):
        assert y_pred.ndim == 2
        assert y_true.ndim == 1
        y_true = torch.nn.functional.one_hot(y_true, 12).to(torch.float32)
        y_pred = torch.nn.functional.softmax(y_pred, dim=1)
        
        tp = (y_true * y_pred).sum(dim=0).to(torch.float32)
        tn = ((1 - y_true) * (1 - y_pred)).sum(dim=0).to(torch.float32)
        fp = ((1 - y_true) * y_pred).sum(dim=0).to(torch.float32)
        fn = (y_true * (1 - y_pred)).sum(dim=0).to(torch.float32)
        
        precision = tp / (tp + fp + self.epsilon)
        recall = tp / (tp + fn + self.epsilon)
        
        f1 = 2 * (precision * recall) / (precision + recall + self.epsilon)
        f1 = f1.clamp(min=self.epsilon, max=1 - self.epsilon)
        return 1 - f1.mean()

f1_loss = F1_Loss().cuda()


In [None]:
model._fc = nn.Linear(in_features=2560, out_features=12).cuda()

In [None]:
import torch.nn as nn
from torch.optim import Adam
from torch.optim import lr_scheduler

# model._fc = torch.nn.Linear(in_features=1280, out_features=classes) #change the last FC layer

model = model.to(device)
criterion = FocalLoss().to(device) #nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001) # lr, SGD

exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

In [None]:
dataset_sizes

In [None]:
import time
import copy

def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                optimizer.step()
                scheduler.step()
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in tqdm(dataloaders[phase]):
                inputs = inputs.to(device)
                labels = labels.reshape(-1).to(device) #
                #print(labels)
                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, torch.nn.functional.one_hot(labels, num_classes=12).long())

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                torch.save(model.state_dict(), 'best_model.pth')


    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [None]:
model = train_model(model, criterion, optimizer, exp_lr_scheduler, num_epochs=15)

In [None]:
transform_valid()

In [None]:
from glob import glob

valid_image_list = glob('../input/plant-pathology-2021-fgvc8/test_images/*.jpg')

model.eval()
predict_list = []
image_name_list = []
for i, image in tqdm(enumerate(valid_image_list)) :
    image_name = image[48:]
    
    img = read_img(image)
    img = transform_valid()(img)
    
    result_list = torch.FloatTensor(np.zeros((classes))).to(device)
    img = img.to(device)
    img = img.reshape(-1, 3, 224, 224)
    predict = model(img)
    predict = predict.reshape(-1)
    result_list += predict
    
    predict_list.append(torch.argmax(result_list).item())
    image_name_list.append(image_name)
    
predict_list = np.array(predict_list)
image_name_list = np.array(image_name_list)
print(image_name_list)

submission_df = pd.DataFrame()
submission_df['image'] = image_name_list
submission_df['label_id'] = predict_list
submission_df['labels'] = submission_df['label_id'].map(label_dic)
del submission_df['label_id']
submission_df.head()

In [None]:
img.shape

In [None]:
submission_df.to_csv("submission.csv", index = False)