In [None]:
import os
import json
import pandas as pd
import numpy as np
import seaborn as sns 
import matplotlib.pyplot as plt
from pathlib import Path 
from keras.preprocessing.image import load_img

In [None]:

def get_image_from_imgname(imgname):
    return load_img("/kaggle/input/cassava-leaf-disease-classification/train_images/" + imgname)
    
    
    
def sample_from_df_by_label(df, label, size_sample):
    one_label_df = df[df['label_names'] == label]
    return one_label_df.sample(size_sample)
    
def sample_x_image_from_each_label(DS, x):
    dict_labels = dict(DS['label_names'].value_counts())
    
    samples = []
    labels = []
    for label in dict_labels:
        samples.append(sample_from_df_by_label(DS, label, size_sample))
        labels.append(label)
    
    return samples, labels
    
def show_img_with_title(img, title, ax):
    ax.imshow(img)
    ax.title.set_text(title)
    ax.axis("off")
    
    
    
def calc_mean_color_value_of_imgs_ls(imgs_ls, mean_by_axis):
    red = np.stack([img[:, :, 0].ravel() for img in imgs_ls]).mean(axis=mean_by_axis)
    green = np.stack([img[:, :, 1].ravel() for img in imgs_ls]).mean(axis=mean_by_axis)
    blue = np.stack([img[:, :, 2].ravel() for img in imgs_ls]).mean(axis=mean_by_axis)
    return red, green, blue


def make_ls_of_imgarray_from_sample(sample):
    imgs =  []
    for imgname in sample["image_id"]:
        img = np.array(get_image_from_imgname(imgname))
        imgs.append(img)
    return imgs

In [None]:
BASE_DIR = Path('/kaggle/input/cassava-leaf-disease-classification')

# Reading DataFrame having Labels
DS = pd.read_csv(BASE_DIR/'train.csv')

# Label Mappings
with open(BASE_DIR/'label_num_to_disease_map.json') as f:
    mapping = json.loads(f.read())
    mapping = {int(k): v for k,v in mapping.items()}

print(mapping)


DS['label_names'] = DS['label'].map(mapping)
DS.head(255)

# image dimensions

In [None]:
fig, ax = plt.subplots(figsize = (10,6))
image = get_image_from_imgname(DS.loc[21000, "image_id"])
plt.imshow(image)

# Sample photos by category

In [None]:
size_sample = 4
samples, k = sample_x_image_from_each_label(DS, size_sample)


fig, axes = plt.subplots(size_sample ,5 , figsize=(20, size_sample*5))
counter_place = 0

for sample in samples:
    for i,(_, row) in enumerate(sample.iterrows()):
        image = get_image_from_imgname(row.image_id)
        show_img_with_title(image, row.label_names, axes[i, counter_place])
    counter_place +=1
    
fig.tight_layout()
plt.show()


# Distribution of classes in the array

In [None]:
fig, ax = plt.subplots(figsize = (10,6))

sns.countplot(y=DS["label_names"], orient='v')
plt.title('Target distribution')

# MEAN Color distribution by categories

In [None]:
fig, axs = plt.subplots(1 ,5 , figsize=(20,5)) 

samples, labels= sample_x_image_from_each_label(DS, 2000)


for i in range(len(samples)):
    imgs_ls = make_ls_of_imgarray_from_sample(samples[i])
    red, green, blue = calc_mean_color_value_of_imgs_ls(imgs_ls, 0)
    sns.kdeplot(red, alpha=0.5, color='red', ax=axs[i])
    sns.kdeplot(green, alpha=0.5, color='green', ax=axs[i])
    sns.kdeplot(blue, alpha=0.5, color='blue', ax=axs[i])
    axs[i].set_title(labels[i])