In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import cv2
import matplotlib.pyplot as plt

from keras.utils import to_categorical, Sequence
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, BatchNormalization
from keras.optimizers import RMSprop,Adam
from keras.applications import ResNet50, ResNet101, DenseNet121

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

#import os
#for dirname, _, filenames in os.walk('/kaggle/input/cassava-leaf-disease-classification/'):
#    for filename in filenames:
#        print(os.path.join(dirname, filename))
        
path = '/kaggle/input/cassava-leaf-disease-classification/'
os.listdir(path)



In [None]:
train_data = pd.read_csv(path+'train.csv')
samp_subm = pd.read_csv(path+'sample_submission.csv')
y_train = to_categorical(train_data['label'])



batch_size = 32
img_size = 256
img_channel = 3



In [None]:
class_weight = dict(zip(range(0, 5), (train_data['label'].value_counts().sort_index()/len(train_data))))
class_weight

In [None]:


class DataGenerator(Sequence):
    def __init__(self, path, list_IDs, labels, batch_size, img_size, img_channel):
        self.path = path
        self.list_IDs = list_IDs
        self.labels = labels
        self.batch_size = batch_size
        self.img_size = img_size
        self.img_channel = img_channel
        self.indexes = np.arange(len(self.list_IDs))
        
    def __len__(self):
        return int(np.floor(len(self.list_IDs)/self.batch_size))
    
    
    def __getitem__(self, index):
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        list_IDs_temp = [self.list_IDs[k] for k in indexes]
        X, y = self.__data_generation(list_IDs_temp)
        return X, y

    
    def __data_generation(self, list_IDs_temp):
        X = np.empty((self.batch_size, self.img_size, self.img_size, self.img_channel))
        y = np.empty((self.batch_size, 5), dtype=int)
        for i, ID in enumerate(list_IDs_temp):
            data_file = cv2.imread(self.path+ID)
            img = cv2.resize(data_file, (self.img_size, self.img_size))
            X[i, ] = img
            y[i, ] = self.labels[i]
        X = X.astype('float32')
        X -= X.mean()
        X /= X.std()
        return X, y



In [None]:
resnet50_weights='../input/resnet50_weights.h5'

In [None]:
conv_base = ResNet50(weights=None,
                     include_top=True,
                     input_shape=(img_size, img_size, img_channel))
conv_base.trainable = True

In [None]:
model = Sequential()
model.add(conv_base)
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(5, activation='softmax'))


In [None]:
model.compile(optimizer=RMSprop(lr=1e-4), loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

In [None]:
#TRAINING
epochs = 10
train_generator = DataGenerator(path+'train_images/', train_data['image_id'], y_train, batch_size, img_size, img_channel)
history = model.fit_generator(generator=train_generator,
                              epochs = epochs)

In [None]:
test_generator = DataGenerator(path+'test_images/', samp_subm['image_id'], samp_subm['label'], 1, img_size, img_channel)

In [None]:
predict = model.predict_generator(test_generator, verbose=1)

In [None]:
samp_subm['label'] = predict.argmax(axis=1)

In [None]:
samp_subm.to_csv('submission.csv', index=False)