In [None]:
import cv2
import numpy as np
import pandas as pd
import os

### Process data: convert X_train, X_test, y_train, y_test to numpy arrays

In [None]:
def changelabel(label):
    if label == 'benign':
        return 0
    elif label == 'malignant':
        return 1

In [None]:
# Bring in combined train labels, change labels "benign" to "0", "malignant" to "1". 
# Convert to and save as numpy array
df = pd.read_csv('./Labels/train_combined_GT.csv')
y_train = df['benign_malignant'].map(changelabel).to_numpy()
np.save('./Data/y_train.npy', y_train)

In [None]:
# Bring in test labels, convert to and save as numpy array
df = pd.read_csv('./Labels/ISBI2016_ISIC_Part3_Test_GroundTruth.csv', header=None)
y_test = df[1].to_numpy().astype(np.int32)
np.save('./Data/y_test.npy', y_test)

In [None]:
# 1. For every image in training and test folder, obtain the path in a list
# 2. Sort the paths in numerical order
# 3. Set desired shape
# 4. Create an empty array to keep a list of numpy arrays of image pixels

def process_x(x_dir, save_name):
    train_files = [f.path for f in os.scandir(x_dir)]
    train_files.sort()
    im_shape = (480, 360)
    images = []
    
    # 5. For every path in the train_files list,
    # 6. Read the image in grayscale (0), normalize the pixel values (/255)
    # 7. Resize the image
    # 8. Append image values to list
    
    for im_file in train_files:
        image = cv2.imread(im_file, 0) / 255
        image_rs = cv2.resize(image, im_shape)
        images.append(image_rs)
    
    # 9. Convert the list of arrays to one numpy array, use float32 for keras
    # 10. Save numpy array
    
    x_array = np.array(images).astype(np.float32)
    np.save(f'./Data/{save_name}.npy', x_array)

In [None]:
# Set directory to X Train images
x_dir = './Images/ISBI2016_ISIC_Part3_Training_Data'

# Call function to process X Train
process_x(x_dir, 'X_train')

In [None]:
# Set directory to X Test images
x_dir = './Images/ISBI2016_ISIC_Part3_Test_Data'

# Call function to process X Test
process_x(x_dir, 'X_test')