**Pre-processing the classification images**

In [None]:
import numpy as np
import cv2
import pandas as pd
import csv
from google.colab.patches import cv2_imshow
import math
from PIL import Image
import glob
import os
from sklearn.metrics import *
from zipfile import ZipFile
from collections import *
from imblearn.over_sampling import SMOTE

**Mounting the google drive**

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


# Preprocessing images

**1. Pre-processing**

Image Resolution Scaling

The input image resolutions are reduced to 256*256 using bi-linear interpolation.

**2. Solving Uneven illumination**

1. Converting the rgb image to lab space
2. Applying CLAHE to the l channel
3. Merging the enhanced l channel with the remaining
4. Converting the enhanced lab image to rgb

**3. Hair/Vein Removal**

In [None]:
def bl_resize(original_img, new_h, new_w):
	#get dimensions of original image
	old_h, old_w, c = original_img.shape
	#create an array of the desired shape. 
	#We will fill-in the values later.
	resized = np.zeros((new_h, new_w, c))
	#Calculate horizontal and vertical scaling factor
	w_scale_factor = (old_w ) / (new_w ) if new_h != 0 else 0
	h_scale_factor = (old_h ) / (new_h ) if new_w != 0 else 0
	for i in range(new_h):
		for j in range(new_w):
			#map the coordinates back to the original image
			x = i * h_scale_factor
			y = j * w_scale_factor
			#calculate the coordinate values for 4 surrounding pixels.
			x_floor = math.floor(x)
			x_ceil = min( old_h - 1, math.ceil(x))
			y_floor = math.floor(y)
			y_ceil = min(old_w - 1, math.ceil(y))

			if (x_ceil == x_floor) and (y_ceil == y_floor):
				q = original_img[int(x), int(y), :]
			elif (x_ceil == x_floor):
				q1 = original_img[int(x), int(y_floor), :]
				q2 = original_img[int(x), int(y_ceil), :]
				q = q1 * (y_ceil - y) + q2 * (y - y_floor)
			elif (y_ceil == y_floor):
				q1 = original_img[int(x_floor), int(y), :]
				q2 = original_img[int(x_ceil), int(y), :]
				q = (q1 * (x_ceil - x)) + (q2	 * (x - x_floor))
			else:
				v1 = original_img[x_floor, y_floor, :]
				v2 = original_img[x_ceil, y_floor, :]
				v3 = original_img[x_floor, y_ceil, :]
				v4 = original_img[x_ceil, y_ceil, :]

				q1 = v1 * (x_ceil - x) + v2 * (x - x_floor)
				q2 = v3 * (x_ceil - x) + v4 * (x - x_floor)
				q = q1 * (y_ceil - y) + q2 * (y - y_floor)

			resized[i,j,:] = q
	return resized.astype(np.uint8)
 
 
def apply_clahe(red_img_arr):
  #1
  image_lab = cv2.cvtColor(red_img_arr, cv2.COLOR_BGR2LAB)
  #2
  clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
  colorimage_l = clahe.apply(image_lab[:,:,0])
  #3
  colorimage_clahe = np.stack((colorimage_l,image_lab[:,:,1],image_lab[:,:,2]), axis=2)
  #4
  image_rgb = cv2.cvtColor(colorimage_clahe, cv2.COLOR_LAB2BGR)
  pilImage = Image.fromarray(image_rgb)
  return image_rgb


def Hair_removal(image_clahe):
	# Convert the original image to grayscale
  grayScale = cv2.cvtColor( image_clahe, cv2.COLOR_RGB2GRAY )

  # Kernel for the morphological filtering
  kernel = cv2.getStructuringElement(1,(17,17))

  # Perform the blackHat filtering on the grayscale image to find the 
  # hair countours
  blackhat = cv2.morphologyEx(grayScale, cv2.MORPH_BLACKHAT, kernel)
  # intensify the hair countours in preparation for the inpainting 
  # algorithm
  ret,thresh2 = cv2.threshold(blackhat,10,255,cv2.THRESH_BINARY)
  # inpaint the original image depending on the mask
  dst = cv2.inpaint(image_clahe,thresh2,1,cv2.INPAINT_TELEA)

  FinalImage = Image.fromarray(dst)
  return FinalImage

# Separating unbalanced data into class folders

In [None]:
print("Separation of unbalanced data into class folders....")
X = []
for i in range(len(images)):
    print((i,images[i]))
    image = cv2.imread (images[i])
    image = bl_resize(image,256,256)
    image_clahe = apply_clahe(image)
    image  =Hair_removal(image_clahe)
    image = np.array(image)
    X.append(image)
    cv2.imwrite(root_path+'/old_'+Y_train[i]+'/'+images[i],image)


# Preparing class labels

In [None]:
root_path = '/content/gdrive/MyDrive/ISIC_DATASETS/ISIC_Datasets/Classification'
os.chdir(root_path)  

In [None]:
# Unzipping the training input

os.mkdir('Unbalanced_train')
os.chdir(root_path+'/Unbalanced_train')
filename = root_path+'/ISIC_2019_Training_Input.zip'
print("Extracting files from ISIC_2019_Training_Input.zip.....")
with ZipFile(filename, 'r') as zip:
    zip.extractall()

In [None]:
os.chdir(root_path+'/Unbalanced_train/ISIC_2019_Training_Input')

In [None]:
images = glob.glob('*.jpg')
os.chdir(root_path)
print("Number of training samples: = ",len(images))
images = sorted(images)

25331


In [None]:
#creating folders for one class each

os.mkdir('old_MEL')
os.mkdir('old_NV') 
os.mkdir('old_BCC') 
os.mkdir('old_AK') 
os.mkdir('old_BKL') 
os.mkdir('old_DF') 
os.mkdir('old_VASC') 
os.mkdir('old_SCC') 

In [None]:
print("Preparing the Y_train....")
Y_train = []
# getting the corresponding labels for the images
df = pd.read_csv("ISIC_2019_Training_GroundTruth.csv")
Names = df.image
MEL = df.MEL
NV = df.NV
BCC = df.BCC
AK = df.AK
BKL = df.BKL
DF = df.DF
VASC = df.VASC
SCC = df.SCC

img_names = []

for j in range(len(Names)):
  img_names.append(Names[j])

for i in range(len(images)):
  if (images[i][:-4] in img_names):
    val = img_names.index(images[i][:-4])
    if(MEL[val] ==1):
      Y_train.append('MEL')
    elif(NV[val] ==1):
      Y_train.append('NV')
    elif(BCC[val] ==1):
      Y_train.append('BCC')
    elif(AK[val] ==1):
      Y_train.append('AK')
    elif(BKL[val] ==1):
      Y_train.append('BKL')
    elif(DF[val] ==1):
      Y_train.append('DF')
    elif(VASC[val] ==1):
      Y_train.append('VASC')
    elif(SCC[val] ==1):
      Y_train.append('SCC')
   
#print(Y_train)

In [None]:
os.chdir(root_path+'/Unbalanced_train/ISIC_2019_Training_Input')

# Ensuring correct separation of images into folders

In [None]:
# check condition for folders

start = 0
num = len(images)

img1=[]
img2 = []

df = pd.read_csv("/content/gdrive/MyDrive/ISIC_DATASETS/ISIC_Datasets/Classification/ISIC_2019_Training_GroundTruth.csv")
Names = df.image
MEL = df.MEL
NV = df.NV
BCC = df.BCC
AK = df.AK
BKL = df.BKL
DF = df.DF
VASC = df.VASC
SCC = df.SCC

print("for AK...")

os.chdir('/content/gdrive/MyDrive/ISIC_DATASETS/ISIC_Datasets/Classification/old_AK')
img1 = glob.glob('*.jpg')
img1=sorted(img1)
for t in range(len(img1)):
  img1[t] = img1[t][:-4]


for i in range(start,num):
  if(AK[i]==1):
    img2.append(Names[i])

print(len(img1))
print(len(img2))

for u in img2:
  if u not in img1:
    print(u)

#os.chdir(root_path+'/Unbalanced_train/ISIC_2019_Training_Input')

print("for MEL....")

img1=[]
img2 = []

os.chdir('/content/gdrive/MyDrive/ISIC_DATASETS/ISIC_Datasets/Classification/old_MEL')
img1 = glob.glob('*.jpg')
img1=sorted(img1)
for t in range(len(img1)):
  img1[t] = img1[t][:-4]


for i in range(start,num):
  if(MEL[i]==1):
    img2.append(Names[i])

print(len(img1))
print(len(img2))

for u in img2:
  if u not in img1:
    print(u)

print("for NV....")

img1=[]
img2 = []

os.chdir('/content/gdrive/MyDrive/ISIC_DATASETS/ISIC_Datasets/Classification/old_NV')
img1 = glob.glob('*.jpg')
img1=sorted(img1)
for t in range(len(img1)):
  img1[t] = img1[t][:-4]


for i in range(start,num):
  if(NV[i]==1):
    img2.append(Names[i])

print(len(img1))
print(len(img2))

for u in img2:
  if u not in img1:
    print(u)

print("for BCC....")

img1=[]
img2 = []

os.chdir('/content/gdrive/MyDrive/ISIC_DATASETS/ISIC_Datasets/Classification/old_BCC')
img1 = glob.glob('*.jpg')
img1=sorted(img1)
for t in range(len(img1)):
  img1[t] = img1[t][:-4]


for i in range(start,num):
  if(BCC[i]==1):
    img2.append(Names[i])

print(len(img1))
print(len(img2))

for u in img2:
  if u not in img1:
    print(u)


print("for BKL....")

img1=[]
img2 = []

os.chdir('/content/gdrive/MyDrive/ISIC_DATASETS/ISIC_Datasets/Classification/old_BKL')
img1 = glob.glob('*.jpg')
img1=sorted(img1)
for t in range(len(img1)):
  img1[t] = img1[t][:-4]


for i in range(start,num):
  if(BKL[i]==1):
    img2.append(Names[i])

print(len(img1))
print(len(img2))

for u in img2:
  if u not in img1:
    print(u)


print("for DF....")

img1=[]
img2 = []

os.chdir('/content/gdrive/MyDrive/ISIC_DATASETS/ISIC_Datasets/Classification/old_DF')
img1 = glob.glob('*.jpg')
img1=sorted(img1)
for t in range(len(img1)):
  img1[t] = img1[t][:-4]


for i in range(start,num):
  if(DF[i]==1):
    img2.append(Names[i])

print(len(img1))
print(len(img2))

for u in img2:
  if u not in img1:
    print(u)

print("for VASC....")

img1=[]
img2 = []

os.chdir('/content/gdrive/MyDrive/ISIC_DATASETS/ISIC_Datasets/Classification/old_VASC')
img1 = glob.glob('*.jpg')
img1=sorted(img1)
for t in range(len(img1)):
  img1[t] = img1[t][:-4]


for i in range(start,num):
  if(VASC[i]==1):
    img2.append(Names[i])

print(len(img1))
print(len(img2))

for u in img2:
  if u not in img1:
    print(u)


print("for SCC....")

img1=[]
img2 = []

os.chdir('/content/gdrive/MyDrive/ISIC_DATASETS/ISIC_Datasets/Classification/old_SCC')
img1 = glob.glob('*.jpg')
img1=sorted(img1)
for t in range(len(img1)):
  img1[t] = img1[t][:-4]


for i in range(start,num):
  if(SCC[i]==1):
    img2.append(Names[i])

print(len(img1))
print(len(img2))

for u in img2:
  if u not in img1:
    print(u)
