# SIIM-ISIC Melanoma Classification
## Identify melanoma in lesion images

https://www.kaggle.com/c/siim-isic-melanoma-classification/overview


In [None]:
import os

import pandas as pd  
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

In [None]:
print(os.listdir("../input/siim-isic-melanoma-classification"))

In [None]:
# Defining Image path
TRAIN_IMAGE_PATH = "../input/siim-isic-melanoma-classification/jpeg/train"
TEST_IMAGE_PATH = "../input/siim-isic-melanoma-classification/jpeg/test"

In [None]:
train_df = pd.read_csv('../input/siim-isic-melanoma-classification/train.csv' )
sample_df = train_df.sample(n = 100)
# test_df = pd.read_csv('../input/siim-isic-melanoma-classification/test.csv')

In [None]:
print('Training data shape: ', train_df.shape)
print('Training Sample data shape: ', sample_df.shape)
# print('Test data shape: ', test_df.shape)

sample_df.head()

In [None]:
# test_df.head()

In [None]:
# Total number of images in the dataset 
print("Total images in Train DataSet: ",sample_df['image_name'].count())

In [None]:
# del train_df

# Visualize

In [None]:
images = sample_df['image_name'].values

# Extract 9 random images from it
random_images = [np.random.choice(images+'.jpg') for i in range(9)]

print('Display Random Images')

# Adjust the size of your images
plt.figure(figsize=(10,8))

# Iterate and plot random images
for i in range(9):
    plt.subplot(3, 3, i + 1)
    img = plt.imread(os.path.join(TRAIN_IMAGE_PATH, random_images[i]))
    plt.imshow(img, cmap='gray')
    plt.axis('off')
    
# Adjust subplot parameters to give specified padding
plt.tight_layout()  

# Histograms
Histograms are a graphical representation showing how frequently various color values occur in the image i.e frequency of pixels intensity values. In a RGB color space, pixel values range from 0 to 255 where 0 stands for black and 255 stands for white. Analysis of a histogram can help us understand thee brightness, contrast and intensity distribution of an image. Now let's look at the histogram of a random selected sample from each category.

In [None]:
f = plt.figure(figsize=(16,8))
f.add_subplot(1,2, 1)

sample_img = train_df['image_name'][0]+'.jpg'
raw_image = plt.imread(os.path.join(TRAIN_IMAGE_PATH, sample_img))
plt.imshow(raw_image, cmap='gray')
plt.colorbar()
plt.title(train_df['benign_malignant'][0])
print(f"Image dimensions:  {raw_image.shape[0],raw_image.shape[1]}")
print(f"Maximum pixel value : {raw_image.max():.1f} ; Minimum pixel value:{raw_image.min():.1f}")
print(f"Mean value of the pixels : {raw_image.mean():.1f} ; Standard deviation : {raw_image.std():.1f}")

f.add_subplot(1,2, 2)

#_ = plt.hist(raw_image.ravel(),bins = 256, color = 'orange',)
_ = plt.hist(raw_image[:, :, 0].ravel(), bins = 256, color = 'red', alpha = 0.5)
_ = plt.hist(raw_image[:, :, 1].ravel(), bins = 256, color = 'Green', alpha = 0.5)
_ = plt.hist(raw_image[:, :, 2].ravel(), bins = 256, color = 'Blue', alpha = 0.5)
_ = plt.xlabel('Intensity Value')
_ = plt.ylabel('Count')
_ = plt.legend(['Red_Channel', 'Green_Channel', 'Blue_Channel'])
plt.show()