In [None]:
# Loading the libraries
import numpy as np
from numpy.random import seed
seed(1)
import tensorflow as tf
tf.random.set_seed(1)
import pandas as pd 
import matplotlib.pyplot as plt
from matplotlib.image import imread
%matplotlib inline
import seaborn as sns
import os
import json
import shutil
import random

In [None]:
#Loading the dataset
df_train = pd.read_csv("../input/cassava-leaf-disease-classification/train.csv")

In [None]:
df_train.head(10)

In [None]:
# Loading the sample submission file
df_submit = pd.read_csv("../input/cassava-leaf-disease-classification/sample_submission.csv")

In [None]:
df_submit.head()

In [None]:
# Defining utlity function for submission
def submit(df_submit, disease_label):
    df_submit['label'] = disease_label
    df_submit.to_csv('submission.csv', index = False)
    

In [None]:
# Loading the numeric label to disease name mapping JSON file
with open('../input/cassava-leaf-disease-classification/label_num_to_disease_map.json') as file:
  label_num_to_disease = json.load(file)

In [None]:
# The disease name corresponding to the numeric labels
label_num_to_disease

In [None]:
# Making a copy of the train DataFrame for further investigation
df_train_1 = df_train.copy()

In [None]:
# Adding the disease names to the DataFrame
df_train_1['disease'] = df_train_1['label'].astype(str).replace(label_num_to_disease)

In [None]:
df_train_1.head(10)

In [None]:
# Looking at unique disease names corresponding to the dataset
df_train_1['disease'].unique()

In [None]:
# Visualising the Label distribution
plt.figure(figsize=(18,10))
sns.countplot(data=df_train_1, x='disease')

In [None]:
# Number of images of each disease
count = df_train_1[['disease']].value_counts().reset_index()
count.columns = ['disease', 'count of images']
count

In [None]:
# Percentage of each disease
count['Percent presence'] = count['count of images'] / len(df_train_1['disease']) * 100
count

In [None]:
# Creating seperate directories for each class
labels = df_train_1['disease'].unique()
for i in labels:
    disease = str(i)
    os.makedirs("/kaggle/working/train_img/" + disease)

In [None]:
# Copying images to their respective folders
for image_row in range(len(df_train_1['image_id'])):
    image = df_train_1['image_id'].iloc[image_row]
    if df_train_1['disease'].iloc[image_row] == 'Cassava Bacterial Blight (CBB)':
        shutil.copy("/kaggle/input/cassava-leaf-disease-classification/train_images/"+image,"/kaggle/working/train_img/Cassava Bacterial Blight (CBB)/")
    elif df_train_1['disease'].iloc[image_row] == 'Cassava Mosaic Disease (CMD)':
        shutil.copy("/kaggle/input/cassava-leaf-disease-classification/train_images/"+image,"/kaggle/working/train_img/Cassava Mosaic Disease (CMD)/")
    elif df_train_1['disease'].iloc[image_row] == 'Cassava Brown Streak Disease (CBSD)':
        shutil.copy("/kaggle/input/cassava-leaf-disease-classification/train_images/"+image,"/kaggle/working/train_img/Cassava Brown Streak Disease (CBSD)/")
    elif df_train_1['disease'].iloc[image_row] == 'Cassava Green Mottle (CGM)':
        shutil.copy("/kaggle/input/cassava-leaf-disease-classification/train_images/"+image,"/kaggle/working/train_img/Cassava Green Mottle (CGM)/")
    elif df_train_1['disease'].iloc[image_row] == 'Healthy':
        shutil.copy("/kaggle/input/cassava-leaf-disease-classification/train_images/"+image,"/kaggle/working/train_img/Healthy/")
    

In [None]:
# Utility function to print images of each disease
def show_disease_images(label_name):
    plt.figure(figsize = ((10,10)))
    for i in range(0,9):
        plt.subplot(3,3,i+1)
        image_path = "/kaggle/working/train_img/" + label_name + "/"
        image = imread(image_path + random.choice(os.listdir(image_path)))
        plt.imshow(image)


In [None]:
show_disease_images('Healthy')

In [None]:
show_disease_images('Cassava Green Mottle (CGM)')

In [None]:
show_disease_images('Cassava Brown Streak Disease (CBSD)')

In [None]:
show_disease_images('Cassava Mosaic Disease (CMD)')

In [None]:
show_disease_images('Cassava Bacterial Blight (CBB)')