# Step 1: Reading CSV File

In [None]:
import pandas as pd
train_csv_path = '../input/siim-isic-melanoma-classification/train.csv'
jpeg_dir = '../input/siim-isic-melanoma-classification/jpeg/train'
train_df = pd.read_csv(train_csv_path)
train_df.head()

# Step 2: Reading an Image and Showing

In [None]:
from PIL import Image
import matplotlib.pyplot as plt

row = train_df.iloc[45]
print(row)
img = Image.open(f"{jpeg_dir}/{row[0]}.jpg")
plt.imshow(img); plt.show()

# Step 3: Create Output folders

In [None]:
import os
folderlocation = './Melanoma-JPEG-folders/'

if not os.path.exists(folderlocation):
    os.mkdir(folderlocation)

N_data = 53
    
for itern in range(N_data):
    iter_train = 'tr' + str(itern) + '/'
    path = folderlocation+iter_train
    if not os.path.exists(path):
        os.mkdir(path)
    path = folderlocation+iter_train+'benign'  
    if not os.path.exists(path):
        os.mkdir(path)
    path = folderlocation+iter_train+'malignant'   
    if not os.path.exists(path):
        os.mkdir(path)    
    
path = folderlocation+'validation/'
if not os.path.exists(path):
    os.mkdir(path)
path = folderlocation+'validation/benign'
if not os.path.exists(path):
    os.mkdir(path)
path = folderlocation+'validation/malignant'
if not os.path.exists(path):
    os.mkdir(path)

# Step 4: Data Transformation

Augmentation on malignant samples based on a serial number.
1. Resize-crop
2. Resize
3. Resize-crop fliplr
4. Resize-crop flipud
5. Resize-crop flipud + fliplr
6. Resize fliplr
7. Resize flipud
8. Resize flipud + fliplr
9. Resize-crop + rotate90
10. Resize + rotate90

In [None]:
import numpy as np

def Image_resize_crop512(img):
    min_dim = 512

    if img.size[0]>=img.size[1]:     #width is greater or equal
        hsize = min_dim
        basewidth = int(hsize * (img.size[0]/img.size[1]))

    if img.size[1]>img.size[0]:      #height is greater
        basewidth = min_dim
        hsize = int(basewidth * (img.size[1]/img.size[0]))

    img = img.resize((basewidth, hsize), Image.ANTIALIAS)

    left = int(img.size[0]/2-512/2)
    upper = int(img.size[1]/2-512/2)
    right = left +512
    lower = upper + 512

    img = img.crop((left, upper,right,lower))
    return img

def Image_resize512(img):
    img = img.resize((512, 512), Image.ANTIALIAS)
    return img

def Data_transform(img, serial):
    if serial ==1 or serial ==3 or serial ==4 or serial ==5 or serial ==9:
        img = Image_resize_crop512(img)
    else:
        img = Image_resize512(img)
    
    if serial > 8:
        img = np.rot90(img, 1)
        img = Image.fromarray(np.uint8(img)).convert('RGB')
        
    if serial == 3 or serial == 5 or serial == 6 or serial == 8:
        img = np.fliplr(img)
        img = Image.fromarray(np.uint8(img)).convert('RGB')
        
    if serial == 4 or serial == 5 or serial == 7 or serial == 8:
        img = np.flipud(img)
        img = Image.fromarray(np.uint8(img)).convert('RGB')
    
    return img

img2 = Data_transform(img,10)
plt.imshow(img2); plt.show()



# Step 5: Image Conversion and Saving



In [None]:
import csv
import time


since = time.time()
iter1=0
iter2=0

for row in train_df.iloc:
    img = Image.open(f"{jpeg_dir}/{row[0]}.jpg")
    
    iter1 = iter1 + 1
    if iter1 % 2000 == 1999:
        time_elapsed = time.time() - since
        print('Time from start {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
        print('Percentage complete: {:4f}'.format(100*iter1/(len(train_df))))
        #break # delete/comment this line for the entire dataset

        
    if iter1%10 ==9 and row[6] == 'malignant': # malignant images in validation without augmentation
        out = Image_resize_crop512(img)
        out.save(folderlocation+'validation/'+row[6]+'/'+row[0]+'.jpg', 'JPEG')
        continue
    
    folder_serial = iter1%(N_data+1)
    if folder_serial == N_data and row[6] == 'benign'and iter1%10 ==9: # benign validation images number reduced by 1/N_data times, roughly 
        out = Image_resize_crop512(img)
        out.save(folderlocation+'validation/'+row[6]+'/'+row[0]+'.jpg', 'JPEG')
    
    if row[6] == 'malignant': # the same malignant image in different training with different augmentation
        for irer3 in range(N_data):
            out = Data_transform(img,(iter2%10)+1)
            iter2 = iter2 +1
            out.save(folderlocation+'tr' + str(irer3) + '/'+row[6]+'/'+row[0]+'.jpg', 'JPEG')
            continue
        
    
    if iter1%10 < 9 and folder_serial < N_data: # remaining benign images in different training folder
        out = Image_resize_crop512(img)
        out.save(folderlocation+'tr' + str(folder_serial) + '/'+row[6]+'/'+row[0]+'.jpg', 'JPEG')
    
    
    
    

# Step 6: Zipping

In [None]:
import shutil
shutil.make_archive('Melanoma-JPEG-512', 'zip', folderlocation)
shutil.rmtree(folderlocation)