In [None]:
#Importing the necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
from keras.preprocessing.image import ImageDataGenerator
import numpy as np
import os
import cv2
import shutil

In [None]:
# Read the csv file
df = pd.read_csv('/kaggle/input/siim-isic-melanoma-classification/train.csv')
df

In [None]:
#Finding unique patient ids from csv file
print(f"The total patient ids are {df['patient_id'].count()}, from those the unique ids are {df['patient_id'].value_counts().shape[0]} ")

In [None]:
#Display patient_id column 
patient_id = df['patient_id'].unique()
patient_id

In [None]:
#Remove the duplicate 'patiend_id'
df = df.drop_duplicates(subset = "patient_id", keep='first') 
df

In [None]:
#Check whether any cell is empty or not
df.isnull().sum()

In [None]:
# Replace empty cell with nan 
df.replace('', np.nan, inplace=True)

In [None]:
#Remove all the rows which have null value
data = df.dropna()

In [None]:
#Finding number of malignant samples
malignant = data[data['target'] == 1]
malignant_image = malignant['image_name'].tolist()              #convert the columan data into list
malignant_image = [item + '.jpg' for item in malignant_image]   #add the .jpg extension at the end of 'image_name'

In [None]:
len(malignant_image)

In [None]:
#Final train set images for model training
train_img = malignant_image[0:40]
val_img = malignant_image[40:52]
test_img = malignant_image[52:]

In [None]:
path = '/kaggle/input/siim-isic-melanoma-classification/jpeg/train/'

In [None]:
img_size = 512
def load_image(data_dir):
    data = []
    for i in range(len(data_dir)):
        img_arr = cv2.imread(path + data_dir[i])
        resized_arr = cv2.resize(img_arr, (img_size, img_size)) # Reshaping images to preferred size
        data.append(resized_arr)
    return np.array(data)

In [None]:
x_train = load_image(train_img)
x_val = load_image(val_img)
x_test = load_image(test_img)

In [None]:
x_train.shape

In [None]:
#Image augmentation using ImageDataGenerator
datagen1 = ImageDataGenerator(rotation_range = 50, zoom_range = 0.3,width_shift_range=0.3,
                             height_shift_range=0.3,horizontal_flip = True, vertical_flip=True)
datagen1.fit(x_train)

In [None]:
#Image augmentation using ImageDataGenerator
datagen2 = ImageDataGenerator(rotation_range = 50, zoom_range = 0.3,width_shift_range=0.3,
                             height_shift_range=0.3,horizontal_flip = True, vertical_flip=True)
datagen2.fit(x_val)

In [None]:
#Image augmentation using ImageDataGenerator
datagen3 = ImageDataGenerator(rotation_range = 50, zoom_range = 0.3,width_shift_range=0.3,
                             height_shift_range=0.3,horizontal_flip = True, vertical_flip=True)
datagen3.fit(x_test)

In [None]:
dir1 = 'train_aug'
if not os.path.exists(dir1):
    os.mkdir(dir1)
    print('Directory', dir1, 'created')
else:
    print('Directory', dir1, 'already exists')

In [None]:
dir2 = 'val_aug'
if not os.path.exists(dir2):
    os.mkdir(dir2)
    print('Directory', dir2, 'created')
else:
    print('Directory', dir2, 'already exists')

In [None]:
dir3 = 'test_aug'
if not os.path.exists(dir3):
    os.mkdir(dir3)
    print('Directory', dir3, 'created')
else:
    print('Directory', dir3, 'already exists')

In [None]:
i = 0
for batch in datagen1.flow(x_train, batch_size = 4, 
                          save_to_dir = dir1, 
                          save_prefix = 'M',
                          save_format = 'jpg'):
    i += 1
    if i > 250:
        break

In [None]:
j = 0
for batch in datagen2.flow(x_val, batch_size = 4, 
                          save_to_dir = dir2, 
                          save_prefix = 'M',
                          save_format = 'jpg'):
    j += 1
    if j > 50:
        break

In [None]:
k = 0
for batch in datagen3.flow(x_test, batch_size = 4, 
                          save_to_dir = dir3, 
                          save_prefix = 'M',
                          save_format = 'jpg'):
    k += 1
    if k > 50:
        break

In [None]:
os.getcwd()
collection1 = "train_aug/"
for i, filename in enumerate(os.listdir(collection1)):
    os.rename("train_aug/" + filename, "train_aug/" + str(i) + ".jpg")

In [None]:
collection2 = "val_aug/"
for i, filename in enumerate(os.listdir(collection2)):
    os.rename("val_aug/" + filename, "val_aug/" + str(i) + ".jpg")

In [None]:
collection3 = "test_aug/"
for i, filename in enumerate(os.listdir(collection3)):
    os.rename("test_aug/" + filename, "test_aug/" + str(i) + ".jpg")

In [None]:
l1 = os.listdir('/kaggle/working/train_aug/') # dir is your directory path
file1 = len(l1)
print(file1)

In [None]:
l2 = os.listdir('/kaggle/working/val_aug/') # dir is your directory path
file2 = len(l2)
print(file2)

In [None]:
l3 = os.listdir('/kaggle/working/test_aug/') # dir is your directory path
file3 = len(l3)
print(file3)

In [None]:
img_read1 = cv2.imread('train_aug/1.jpg')
plt.imshow(img_read1)

In [None]:
img_read2 = cv2.imread('val_aug/1.jpg')
plt.imshow(img_read2)

In [None]:
img_read3 = cv2.imread('test_aug/1.jpg')
plt.imshow(img_read3)

In [None]:
train = 'zip'
if not os.path.exists(train):
    os.mkdir(train)
    print('Directory', train, 'created')
else:
    print('Directory', train, 'already exists')

In [None]:
zip_name = 'train'
directory_name = 'train_aug'

# Create 'path\to\zip_file.zip'
shutil.make_archive(zip_name, 'zip', directory_name)

In [None]:
val = 'zip1'
if not os.path.exists(val):
    os.mkdir(val)
    print('Directory', val, 'created')
else:
    print('Directory', val, 'already exists')

In [None]:
zip_name2 = 'val'
directory_name2 = 'val_aug'

# Create 'path\to\zip_file.zip'
shutil.make_archive(zip_name2, 'zip', directory_name2)

In [None]:
test = 'zip2'
if not os.path.exists(test):
    os.mkdir(test)
    print('Directory', test, 'created')
else:
    print('Directory', test, 'already exists')

In [None]:
zip_name3 = 'test'
directory_name3 = 'test_aug'

# Create 'path\to\zip_file.zip'
shutil.make_archive(zip_name3, 'zip', directory_name3)