# About this notebook...

In [None]:
#Importing the necessary libraries
import pandas as pd
import matplotlib.pylab as plt
from matplotlib import pyplot
import seaborn as sns
import keras
from keras.models import Sequential
from keras.layers import Dense, Conv2D , MaxPool2D , Flatten , Dropout
from keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve, auc
from numpy import expand_dims
import numpy as np
import glob
import os
import cv2

In [None]:
# Read the train csv file
Train_df = pd.read_csv('/kaggle/input/siim-isic-melanoma-classification/train.csv')
Train_df

In [None]:
#Finding unique patient ids from csv file
print(f"The total patient ids are {Train_df['patient_id'].count()}, from those the unique ids are {Train_df['patient_id'].value_counts().shape[0]} ")

In [None]:
#Display patient_id column 
patient_id = Train_df['patient_id'].unique()
patient_id

In [None]:
#Remove the duplicate 'patiend_id'
df = Train_df.drop_duplicates(subset = "patient_id", keep='first') 
df

In [None]:
#Check whether any cell is empty or not
df.isnull().sum()

In [None]:
# Replace empty cell with nan 
df.replace('', np.nan, inplace=True)

In [None]:
#Remove all the rows which have null value
data = df.dropna()

In [None]:
#Finding number of benign samples
benign = data[data['target'] == 0]
benign = benign.sample(800)                               #choose number of samples from benign 
benign_image = benign['image_name'].tolist()              #convert the columan data into list
benign_image = [item + '.jpg' for item in benign_image]   #add the .jpg extension at the end of 'image_name'

In [None]:
benign_label = benign['target'].tolist()                  #Converted the labels into the list
benign_label = np.array(benign_label)                     #convert list into numpy array
len(benign_label)

In [None]:
#Divide the benign images into training, validation and test set
train_b = benign_image[:500]
val_b = benign_image[500:650]
test_b = benign_image[650:]

In [None]:
#Divide the benign labels into training, validation and test set
train_bl = benign_label[:500]
val_bl = benign_label[500:650]
test_bl = benign_label[650:]

Note: We have only 64 unique melanoma samples. That's why I have used augmentated melanoma images. This is the link of augmented images dataset. 

In [None]:
# dir is your directory path
M_train = os.listdir('../input/melanoma-512-images/train/') 
file1 = len(M_train)
print(file1)

In [None]:
# dir is your directory path
M_val = os.listdir('../input/melanoma-512-images/val/') 
file2 = len(M_val)
print(file2)

In [None]:
# dir is your directory path
M_test = os.listdir('../input/melanoma-512-images/test/') 
file3 = len(M_test)
print(file3)

In [None]:
#Divide the malignant images into training, validation and test set
train_m = M_train[:500]
val_m = M_val[:150]
test_m = M_test[:150]

In [None]:
#Function to convert the images into grayscale and numpy array. 
def image(path, data):
    output = []
    for i in range(len(data)):
        img_arr = cv2.imread(path + data[i], cv2.IMREAD_GRAYSCALE)
        output.append(img_arr)
    return np.array(output)

In [None]:
#train, validation and test set melanoma images for model training
path1 = '../input/melanoma-512-images/train/'
train_mimg = image(path1, train_m)
path2 = '../input/melanoma-512-images/val/'
val_mimg = image(path2, val_m)
path3 = '../input/melanoma-512-images/test/' 
test_mimg = image(path3, test_m)

In [None]:
#Reshape the melanoma images
img_size = 512
train_mimg = train_mimg.reshape(-1, img_size, img_size, 1)
val_mimg = val_mimg.reshape(-1, img_size, img_size, 1)
test_mimg = test_mimg.reshape(-1, img_size, img_size, 1)

In [None]:
print(len(train_mimg))
print(len(val_mimg))
print(len(test_mimg))

In [None]:
#Divide the malignant labels into training, validation and test set
train_ml = np.ones(500, dtype = int)
val_ml = np.ones(150, dtype = int)
test_ml = np.ones(150, dtype = int)

In [None]:
#Final train, validation and test set labels for model training
y_train = np.concatenate((train_bl, train_ml))
y_val = np.concatenate((val_bl, val_ml))
y_test = np.concatenate((test_bl, test_ml))

In [None]:
#Function to resize the benign images and convert into grayscale and numpy array. 
img_size = 512
def load_image(path, data_dir):
    data = []
    for i in range(len(data_dir)):
        img_arr = cv2.imread(path + data_dir[i], cv2.IMREAD_GRAYSCALE)
        resized_arr = cv2.resize(img_arr, (img_size, img_size)) # Reshaping images to preferred size
        data.append(resized_arr)
    return np.array(data)

In [None]:
#train, validation and test set benign images for model training
path_train = '/kaggle/input/siim-isic-melanoma-classification/jpeg/train/'
train_bimg = load_image(path_train, train_b)
val_bimg = load_image(path_train, val_b)
test_bimg = load_image(path_train, test_b)

In [None]:
#Reshape the benign images
train_bimg = train_bimg.reshape(-1, img_size, img_size, 1)
val_bimg = val_bimg.reshape(-1, img_size, img_size, 1)
test_bimg = test_bimg.reshape(-1, img_size, img_size, 1)

In [None]:
#Final train, validation and test set images for model training
train = np.concatenate((train_bimg, train_mimg))
val = np.concatenate((val_bimg, val_mimg))
test = np.concatenate((test_bimg, test_mimg))

In [None]:
# Normalize the data
x_train = np.array(train) / 255
x_val = np.array(val) / 255
x_test = np.array(test) / 255

In [None]:
#Train the model
model = Sequential()
model.add(Conv2D(32, (3,3) , strides = 1 , padding = 'same' , activation = 'linear' , input_shape = (512, 512, 1)))
model.add(MaxPool2D((2,2) , strides = 2 , padding = 'same'))
model.add(Conv2D(64, (3,3) , strides = 1 , padding = 'same' , activation = 'linear'))
model.add(MaxPool2D((2,2) , strides = 2 , padding = 'same'))
model.add(Conv2D(128, (3,3) , strides = 1 , padding = 'same' , activation = 'linear'))
model.add(MaxPool2D((2,2) , strides = 2 , padding = 'same'))
model.add(Conv2D(128, (3,3) , strides = 1 , padding = 'same' , activation = 'linear'))
model.add(MaxPool2D((2,2) , strides = 2 , padding = 'same'))
model.add(Conv2D(256, (3,3) , strides = 1 , padding = 'same' , activation = 'linear'))
model.add(MaxPool2D((2,2) , strides = 2 , padding = 'same'))
model.add(Flatten())
model.add(Dense(units = 256, activation = 'linear'))
model.add(Dropout(0.3))
model.add(Dense(units = 1 , activation = 'sigmoid'))
model.compile(optimizer = 'sgd' , loss = 'binary_crossentropy' , metrics = ['accuracy', keras.metrics.AUC()])
model.summary()

In [None]:
history = model.fit(x_train, y_train, batch_size = 4, epochs = 30 , steps_per_epoch = 100, validation_data = (x_val, y_val))

In [None]:
#Predict x_test set
predictions = model.predict_classes(x_test)
predictions = predictions.reshape(1,-1)[0]
predictions[:300]

In [None]:
#Calculate roc_auc_score
rocaucscore = roc_auc_score(y_test, predictions)
print('ROC_AUC_SCORE: %.2f' % rocaucscore)

In [None]:
#Calculate sensitivity and precision
tp,fn,fp,tn = confusion_matrix(y_test, predictions, labels=[0,1]).ravel()   
Sensityvity = tp/(tp+fn)
print('sensitivity:',Sensityvity)
precision = tp/(tp+fp)
print('precision:',precision)
print('False Negatives:',fn)

In [None]:
# Read the test csv file
test_df = pd.read_csv('/kaggle/input/siim-isic-melanoma-classification/test.csv')
test_df

In [None]:
all_images = test_df['image_name'].tolist()              #convert the columan data into list
all_images = [item + '.jpg' for item in all_images]      #add the .jpg extension at the end of 'image_name' to read image from the main folder

In [None]:
#load test data and convert it into 512 grayscale images
path_test = '/kaggle/input/siim-isic-melanoma-classification/jpeg/test/'
test_data = load_image(path_test, all_images[:5491])

In [None]:
#reshape test images
test_imgs = test_data.reshape(-1, img_size, img_size, 1)

In [None]:
#predict the probability of the class of test images
probabilities = model.predict(test_imgs)

In [None]:
#convert the probability of the class into numpy array
result = np.array(probabilities)
print(result.shape)

In [None]:
#store the probabitility of array into list
for i in range(len(result)):
    result[i]=result[i][0]
result = list(result)

In [None]:
#create data frame to store the result
df_result = pd.DataFrame(result, columns=['target'])
df_image = test_df['image_name']

final_result = pd.concat([df_image, df_result], axis = 1)
final_result.head()

In [None]:
#convert the data frame into csv file
final_result.to_csv('submission.csv', header=True, index=False)