# EDA Sorghum -100 Image Classification

In [None]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt

In [None]:
train_dir='../input/sorghum-id-fgvc-9/train_images/'

In [None]:
test_dir='../input/sorghum-id-fgvc-9/test/'

In [None]:
train=pd.read_csv('../input/sorghum-id-fgvc-9/train_cultivar_mapping.csv')

In [None]:
train.head()

In [None]:
train_length=len(os.listdir(train_dir))
test_length=len(os.listdir(test_dir))
print('There are a total of {} images in Train set and a total of {} images in Test set'.format(train_length, test_length ))

### MISSING VALUES

In [None]:
train.isnull().sum()

In [None]:
#Deleting rows with missing values
train=train.dropna(axis=0)

### COUNTS IN EACH CATEGORY

In [None]:
train['cultivar'].value_counts().mean()

In [None]:
train['cultivar'].value_counts().plot(kind='bar', figsize=(14,6))
plt.xticks(visible = False)
plt.title('Number of images in each Category')
plt.show()

print('There are on an average {} images in each Category'.format(train['cultivar'].value_counts().mean()))

In [None]:
train_images=os.listdir(train_dir)

In [None]:
test_images=os.listdir(test_dir)

### IMAGE PIXEL DETAILS

In [None]:
from tensorflow.keras.preprocessing.image import load_img,img_to_array


sample_image  = load_img(os.path.join(train_dir, train_images[0]))
sample_grayscale=load_img(os.path.join(train_dir, train_images[0]),color_mode = "grayscale")
sample_gray_array=  img_to_array(sample_grayscale)
sample_array = img_to_array(sample_image)

print(f"Each image has shape: {sample_array.shape}")

print(f"The maximum pixel value used is: {np.max(sample_array)}")
print(f"The minimum pixel value used is: {np.min(sample_array)}")

### VISUALIZE TRAIN IMAGES

In [None]:
# Visualize a sample of 16 images
import matplotlib.image as mpimg
nrows=4
ncols=4

plt.figure(figsize=(16, 16))
for i in range (16):
    plt.subplot(nrows, ncols, i+1)
    img_path=os.path.join(train_dir, train_images[i])
    img = mpimg.imread(img_path)
    
    plt.imshow(img)


### VISUALIZE TEST IMAGES

In [None]:
plt.figure(figsize=(16, 16))
for i in range (16):
    plt.subplot(nrows, ncols, i+1)
    img_path=os.path.join(test_dir, test_images[i])
    img = mpimg.imread(img_path)
    
    plt.imshow(img)

### VISUALIZE IMAGE FROM EACH CATEGORY

In [None]:
train_unique_images=train.groupby('cultivar')['image'].min().to_list()

In [None]:
# VISUALIZE IMAGE FROM 16 DIFFERENT CATEGORIES
nrows=4
ncols=4
plt.figure(figsize=(16, 16))
for i in range (16):
    plt.subplot(nrows, ncols, i+1)
    img_path=os.path.join(train_dir, train_unique_images[i])
    img = mpimg.imread(img_path)
    
    plt.imshow(img)

### HISTOGRAM OF TRAIN IMAGES

In [None]:
for fname in train_unique_images[0:8]:
    image  = load_img(os.path.join(train_dir, fname))
    grayscale=load_img(os.path.join(train_dir, fname),color_mode = "grayscale")
    gray_array=  img_to_array(grayscale)
    image_array = img_to_array(image)

    plt.imshow(image)
    plt.title('Image and its color Histogram in Red, Green, Blue and Grayscale' )
    fig, (ax1, ax2, ax3, ax4) = plt.subplots(nrows=1,ncols=4, sharey=True, figsize=(24,5))

    ax1.hist(image_array[:,:,0].ravel(),256,[0,256],color='red')
    plt.ylim(0,120000)
    ax2.hist(image_array[:,:,1].ravel(),256,[0,256], color='green')
    ax3.hist(image_array[:,:,1].ravel(),256,[0,256], color='blue')
    ax4.hist(gray_array.ravel(),256,[0,256])
    plt.show()

### HISTOGRAM OF TEST IMAGES

In [None]:
for fname in test_images[0:8]:
    image  = load_img(os.path.join(test_dir, fname))
    grayscale=load_img(os.path.join(test_dir, fname),color_mode = "grayscale")
    gray_array=  img_to_array(grayscale)
    image_array = img_to_array(image)

    plt.imshow(image)
    plt.title('Image and its color Histogram in Red, Green, Blue and Grayscale' )
    fig, (ax1, ax2, ax3, ax4) = plt.subplots(nrows=1,ncols=4, sharey=True, figsize=(24,5))

    ax1.hist(image_array[:,:,0].ravel(),256,[0,256],color='red')
    plt.ylim(0,120000)
    ax2.hist(image_array[:,:,1].ravel(),256,[0,256], color='green')
    ax3.hist(image_array[:,:,1].ravel(),256,[0,256], color='blue')
    ax4.hist(gray_array.ravel(),256,[0,256])
    plt.show()

#### Thanks! for viewing my notebook. If you liked it, please do upvote.