
For more reference on training notebook, refer to the following link:

https://www.kaggle.com/mohitkeshwanii/cassava-ensemble-vgg16-mobilenetv2-densenet169

## Import Libraries

In [None]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import os
import glob
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import cv2
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D,GlobalAveragePooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.optimizers import Adam

import warnings
warnings.filterwarnings('ignore')

import logging
logger = tf.get_logger()
logger.setLevel(logging.ERROR)


In [None]:
print(tf.__version__)

In [None]:
def get_files(base_dir, target_dir):
    count = 0
    path = get_path(base_dir, target_dir)
    for dirname, _, filenames in os.walk(path):
        for filename in filenames:
            count+=len(glob.glob(os.path.join(dirname, filename)))
        return path, count
    
def get_path(base_dir, target_dir):
    path = os.path.join(base_dir,target_dir)
    return path

## Directory Setup

In [None]:
base_dir = '/kaggle/input/cassava-leaf-disease-classification'
train_dir = 'train_images'
labels_file = 'train.csv'
test_dir = 'test_images'
json_file = 'label_num_to_disease_map.json'

train_path, train_count = get_files(base_dir,train_dir)
test_path, test_count = get_files(base_dir,test_dir)

with open(get_path(base_dir,json_file)) as f:
    class_names = json.load(f)
    class_dict = pd.Series(class_names.values()).to_dict()
    f.close()

data = pd.read_csv(get_path(base_dir, labels_file))
data['class_name'] = data.label.map(class_dict)

print("No of Train Images: {}".format(train_count))
print("No of Test Images: {}".format(test_count))
print("No of Classes: {}".format(len(class_dict)))
print("Classes:")
for v in class_dict.values():
    print(" \u2022 {}".format(v))

In [None]:
data.info()

In [None]:
data['class_name'].value_counts().plot(kind='bar')

In [None]:
for dirname, _, filenames in os.walk(train_path):
    for filename in filenames:
        image = cv2.imread(os.path.join(train_path, filename))
        image_size = image.shape
        break

image_size

## Visualization

In [None]:
def visualize_img(images):
    fig = plt.figure(figsize=(20, 15))
    for i,a in enumerate(images):
        fig.add_subplot(4, 4, i+1, xticks=[], yticks=[])
        path = get_path(train_path, a)
        img = cv2.imread(path)
        plt.imshow(img)
        plt.title(data[data.image_id == a].class_name.values[0])
    
fig = plt.figure(figsize=(15, 15))
for i in range(5):
    images = data[data.label == i].image_id
    images = np.random.choice(images , 4)
    visualize_img(images)

## Loading the Models
1. VGG16
2. DenseNet169

In [None]:
vgg_model = tf.keras.models.load_model("../input/cassava-models/vgg16.h5")
densenet_model = tf.keras.models.load_model("../input/cassava-models/densenet.h5")

In [None]:
ss = pd.read_csv(os.path.join(base_dir, "sample_submission.csv"))
ss.head()

### Helper Function

In [None]:
from PIL import Image
def predict(image_path, model):
    im = Image.open(image_path)
    test_image = np.asarray(im)
    processed_test_image = process_image(test_image)
    processed_test_image = np.expand_dims(processed_test_image, axis = 0)
    
    ps = model.predict(processed_test_image)
    return ps
    
def process_image(image):
    image = tf.cast(image , tf.float32)
    image = tf.image.resize(image , (224 , 224))
    image = image/255
    image = image.numpy()
    return image


## Ensemble

In [None]:
model1_list=[]
model2_list=[]

predicted_label_list = []

for image in ss.image_id:
    model1_list.append(predict(os.path.join(test_path, image), vgg_model))
    model2_list.append(predict(os.path.join(test_path, image), densenet_model))

for vgg,dense in zip(model1_list, model2_list):
    predicted_label_list.append(np.argmax(vgg/np.linalg.norm(vgg) + dense/np.linalg.norm(dense)))
    
ss['label'] = predicted_label_list

In [None]:
ss

In [None]:
ss.to_csv('submission.csv', index = False)