In [None]:
#EfficientNetB4

In [None]:
# Импорт необходимых библиотек
import os
import glob
import shutil
import json
import keras
import itertools
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
from PIL import Image
import matplotlib.pyplot as plt
from collections import Counter
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GlobalAveragePooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import RMSprop, Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.applications import EfficientNetB4
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator
import albumentations

# Определение каталогов
work_dir = '../input/cassava-leaf-disease-classification/'
os.listdir(work_dir) 
train_path = '/kaggle/input/cassava-leaf-disease-classification/train_images'

In [None]:
# датафрейм для train.csv
data = pd.read_csv(work_dir + 'train.csv')

In [None]:
f = open(work_dir + 'label_num_to_disease_map.json')
real_labels = json.load(f)
real_labels = {int(k):v for k,v in real_labels.items()}

# Определение рабочего набора данных
data['class_name'] = data.label.map(real_labels)

train,val = train_test_split(data, test_size = 0.1, random_state = 42, stratify = data['class_name'])

IMG_SIZE = 380
size = (IMG_SIZE,IMG_SIZE)
n_CLASS = 5
BATCH_SIZE = 16 #V2 -8, V3 - 16

datagen_train = ImageDataGenerator(
                    preprocessing_function = tf.keras.applications.efficientnet.preprocess_input,
                    rotation_range = 40, # Диапазон градусов сдля случайных вращений 
                    width_shift_range = 0.2,
                    height_shift_range = 0.2,
                    shear_range = 0.2, # угол сдвига против часовой стрелки
                    zoom_range = 0.2,# Диапазон случайного увеличения
                    horizontal_flip = True, # Произвольные повороты по 
                    vertical_flip = True,   # вертикали и горизонтали
                    fill_mode = 'nearest') # По умолчанию "близжайший"

datagen_val = ImageDataGenerator(
                    preprocessing_function = tf.keras.applications.efficientnet.preprocess_input,
                    ) # Создание пакетов данных изображений с увеличением данных в реальном времени

train_data = datagen_train.flow_from_dataframe(train,         # Создание датасета из файлов изображений в каталоге 
                             directory = train_path,
                             seed=42, #seed для обеспечения повторяемости результатов
                             x_col = 'image_id',
                             y_col = 'class_name',
                             target_size = size,
                             #color_mode="rgb",
                             class_mode = 'categorical',
                             interpolation = 'nearest', # Метод интрополяции
                             shuffle = True,
                             batch_size = BATCH_SIZE)

val_data = datagen_val.flow_from_dataframe(val,
                             directory = train_path,
                             seed=42,
                             x_col = 'image_id',
                             y_col = 'class_name',
                             target_size = size,
                             #color_mode="rgb",
                             class_mode = 'categorical',
                             interpolation = 'nearest',
                             shuffle = True,
                             batch_size = BATCH_SIZE)

In [None]:
def create_model():
    
    model = Sequential()    # Последовательный
    model.add(EfficientNetB4(input_shape = (IMG_SIZE, IMG_SIZE, 3), include_top = False, 
                             weights = '../input/tfkerasefficientnetimagenetnotop/efficientnetb4_notop.h5', 
                             drop_connect_rate=0.4))
    model.add(GlobalAveragePooling2D())
    model.add(Flatten())
    model.add(Dense(512, activation = 'relu', bias_regularizer=tf.keras.regularizers.L1L2(l1=0.01, l2=0.001)))
    model.add(Dropout(0.5))
    model.add(Dense(n_CLASS, activation = 'softmax'))
    
    return model

leaf_model = create_model()
leaf_model.summary()

In [None]:
EPOCHS = 12 #V2 -7, V3 - 7
SST = train_data.n//train_data.batch_size
SSV = val_data.n//val_data.batch_size

In [None]:
def model_fitter():
        
    leaf_model = create_model()
        
    loss = tf.keras.losses.CategoricalCrossentropy(from_logits = False, label_smoothing=0.0001,name='categorical_crossentropy' )
    # Вычисление потерь кроссэнтропии между метками и прогнозами
    leaf_model.compile(optimizer = Adam(learning_rate = 1e-3), loss = loss, metrics = ['categorical_accuracy'])
    
    es = EarlyStopping(monitor='val_loss', mode='min', patience=3, restore_best_weights=True, verbose=1)
    
    checkpoint_cb = ModelCheckpoint("Cassava_best_model.h5", save_best_only=True, monitor = 'val_loss', mode='min')
    
    reduce_lr = ReduceLROnPlateau(monitor = 'val_loss', factor = 0.3, patience = 2, min_lr = 1e-6, mode = 'min', verbose = 1)
    
    history = leaf_model.fit(train_data, validation_data = val_data, epochs= EPOCHS, batch_size = BATCH_SIZE,
                             steps_per_epoch = SST,
                             validation_steps = SSV,
                             callbacks=[es, checkpoint_cb, reduce_lr])
    
    leaf_model.save('Cassava_model'+'.h5')  
    
    return history

In [None]:
#def data_augment(image, label):
    # data augmentation
#    flag = 2 #random.randint(1,3)
#    coef_1 = random.randint(75, 95) * 0.01
#    coef_2 = random.randint(75, 95) * 0.01
#    if flag == 1:
#        image = tf.image.random_flip_left_right(image, seed=SEED)
#    elif flag == 2:
#        image = tf.image.random_flip_up_down(image, seed=SEED)
#    else:
#        image = tf.image.random_crop(image, [int(IMAGE_SIZE[0]*coef_1), int(IMAGE_SIZE[0]*coef_2), 3],seed=SEED)
#    return image, label   

In [None]:
results = model_fitter()

In [None]:
# метрика оценки

print('Точность тренировки: ', max(results.history['categorical_accuracy']))
print('Точность проверки: ', max(results.history['val_categorical_accuracy']))

In [None]:
import keras

final_model = keras.models.load_model('Cassava_best_model.h5')

In [None]:
TEST_DIR = '../input/cassava-leaf-disease-classification/test_images/'
test_images = os.listdir(TEST_DIR)
predictions = []

for image in test_images:
    img = Image.open(TEST_DIR + image)
    img = img.resize(size)
    img = np.expand_dims(img, axis=0)
    predictions.extend(final_model.predict(img).argmax(axis = 1))

In [None]:
predictions

In [None]:
#Создание файла submission.csv
sub = pd.DataFrame({'image_id': test_images, 'label': predictions})
display(sub)
sub.to_csv('submission.csv', index = False)