# Setup

In [None]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import tensorflow as tf
import cv2
import numpy as np
from matplotlib import pyplot as plt
from skimage.io import imread
import math
from sklearn import model_selection as ms
from numpy import random
random.seed(0)

# In[2]: Define how to read in images
def read_images(arr, index, filepath): 
    datafiles = os.listdir(filepath)
    for f in datafiles:
        img = imread(os.path.join(filepath, f))
        arr[index,:,:,:] = img
        index += 1

# In[3]: convert from integers to floats
def prep_pixels(train, val, test):
    train_norm = train.astype('float32')
    val_norm = val.astype('float32')
    test_norm = test.astype('float32')
    # normalize to range 0-1
    train_norm /= 255.0
    val_norm /= 255.0
    test_norm /= 255.0
    # return normalized images
    return train_norm, val_norm, test_norm


# set up path to directories
mpdatapath='/kaggle/input/monkeypox-images/archive/Augmented Images/Monkeypox_augmented'
mporigpath='/kaggle/input/monkeypox-images/archive/Original Images/Monkey Pox'
otherdatapath='/kaggle/input/monkeypox-images/archive/Augmented Images/Others_augmented'
otherorigpath='/kaggle/input/monkeypox-images/archive/Original Images/Others'

# Read in images and set up labels
M = len(os.listdir(mpdatapath))
N = len(os.listdir(otherdatapath))
X_mp = np.zeros((M,224,224,3))
X_ot = np.zeros((N,224,224,3))
y_mp = np.zeros(M)
y_ot = np.zeros(N)
y_mp[:]=1

read_images(X_mp, 0, mpdatapath)
print("number of mp files: ", M)
for i in range(25):
    plt.subplot(5, 5, i+1)
    plt.imshow(X_mp[i]/255.0)
plt.show()

read_images(X_ot, 0, otherdatapath)
print("number of other files: ", N)
for i in range(25):
    plt.subplot(5, 5, i+1)
    plt.imshow(X_ot[i]/255.0)
plt.show()

In [None]:
y_mp.reshape((-1,1))
y_ot.reshape((-1,1))

In [None]:
X_mp_train, X_mp_test, y_mp_train, y_mp_test = ms.train_test_split(X_mp, y_mp, test_size=0.2, random_state=1)
# Clear up some memory
import gc
gc.collect()

X_ot_train, X_ot_test, y_ot_train, y_ot_test = ms.train_test_split(X_ot, y_ot, test_size=0.2, random_state=1)

In [None]:
print("mp train size: ", len(X_mp_train))
for i in range(3):
    plt.subplot(1, 3, i+1)
    plt.imshow(X_mp_train[i+200]/255.0)
plt.show()
print("mp test size: ", len(X_mp_test))
for i in range(3):
    plt.subplot(1, 3, i+1)
    plt.imshow(X_mp_test[i+20]/255.0)
plt.show()

In [None]:
print("ot train size: ", len(X_ot_train))
for i in range(12):
    plt.subplot(3, 4, i+1)
    plt.imshow(X_ot_train[i+200]/255.0)
plt.show()
print("ot test size: ", len(X_ot_test))
for i in range(12):
    plt.subplot(3, 4, i+1)
    plt.imshow(X_ot_test[i+20]/255.0)
plt.show()

# Save Data

In [None]:
X_mp_train = np.uint8(X_mp_train)
X_mp_test = np.uint8(X_mp_test)

X_ot_train = np.uint8(X_ot_train)
X_ot_test = np.uint8(X_ot_test)

In [None]:
"""import shutil
shutil.rmtree("/kaggle/working/train")
shutil.rmtree("/kaggle/working/test")
shutil.rmtree("/kaggle/working/val")
os.remove("/kaggle/working/data.zip")"""

In [None]:
import os
from PIL import Image
out = '/kaggle/working'
test_mp_path = '/kaggle/working/test/mp'
test_ot_path = '/kaggle/working/test/ot'

In [None]:
SPLITS=8

In [None]:
for i in range(1,SPLITS+1):
    os.makedirs(f'/kaggle/working/Fold{i}/val/mp')
    os.makedirs(f'/kaggle/working/Fold{i}/val/ot')
    os.makedirs(f'/kaggle/working/Fold{i}/train/mp')
    os.makedirs(f'/kaggle/working/Fold{i}/train/ot')
os.makedirs(f'/kaggle/working/test/mp')
os.makedirs(f'/kaggle/working/test/ot')

In [None]:
mp_split = np.array_split(X_mp_train, SPLITS)
ot_split = np.array_split(X_ot_train, SPLITS)

In [None]:
for i in range(0,SPLITS): # each fold
    for k in range(0,SPLITS): # go through the splits
        if (i == k):
            for j in range(len(mp_split[k])):
                img = Image.fromarray(mp_split[k][j])
                img.save(f'{out}/Fold{i+1}/val/mp/M_{k}_{str(j).zfill(4)}.jpg')
            for j in range(len(ot_split[k])):
                img = Image.fromarray(ot_split[k][j])
                img.save(f'{out}/Fold{i+1}/val/ot/NM_{k}_{str(j).zfill(4)}.jpg')
            continue
        for j in range(len(mp_split[k])):
            img = Image.fromarray(mp_split[k][j])
            img.save(f'{out}/Fold{i+1}/train/mp/M_{k}_{str(j).zfill(4)}.jpg')
        for j in range(len(ot_split[k])):
            img = Image.fromarray(ot_split[k][j])
            img.save(f'{out}/Fold{i+1}/train/ot/NM_{k}_{str(j).zfill(4)}.jpg')

In [None]:
for j in range(len(X_mp_test)):
    img = Image.fromarray(X_mp_test[j])
    img.save(f'{out}/test/mp/M_{str(j).zfill(4)}.jpg')
for j in range(len(X_ot_test)):
    img = Image.fromarray(X_ot_test[j])
    img.save(f'{out}/test/ot/NM_{str(j).zfill(4)}.jpg')

In [None]:
for i in range(1,SPLITS+1):
    mp_dir = f'{out}/Fold{i}/train/mp'
    print(f'Fold{i} train mp data: ', len(os.listdir(mp_dir)))
    datafiles = os.listdir(mp_dir)
    for j in range(3):
        plt.subplot(1, 3, j+1)
        plt.imshow(plt.imread(os.path.join(mp_dir,datafiles[j+10])))
    plt.show()
    
    ot_dir = f'{out}/Fold{i}/train/ot'
    print(f'Fold{i} train ot data: ', len(os.listdir(ot_dir)))
    datafiles = os.listdir(ot_dir)
    for j in range(3):
        plt.subplot(1, 3, j+1)
        plt.imshow(plt.imread(os.path.join(ot_dir,datafiles[j+10])))
    plt.show()
    
    mp_dir = f'{out}/Fold{i}/val/mp'
    print(f'Fold{i} val mp data: ', len(os.listdir(mp_dir)))
    datafiles = os.listdir(mp_dir)
    for j in range(3):
        plt.subplot(1, 3, j+1)
        plt.imshow(plt.imread(os.path.join(mp_dir,datafiles[j+10])))
    plt.show()
    
    ot_dir = f'{out}/Fold{i}/val/ot'
    print(f'Fold{i} val ot data: ', len(os.listdir(ot_dir)))
    datafiles = os.listdir(ot_dir)
    for j in range(3):
        plt.subplot(1, 3, j+1)
        plt.imshow(plt.imread(os.path.join(ot_dir,datafiles[j+10])))
    plt.show()


print('mp test data: ', len(os.listdir(test_mp_path)))
datafiles = os.listdir(test_mp_path)
for i in range(12):
    plt.subplot(3, 4, i+1)
    plt.imshow(plt.imread(os.path.join(test_mp_path,datafiles[i])))
plt.show()
print('ot test data: ', len(os.listdir(test_ot_path)))
datafiles = os.listdir(test_ot_path)
for i in range(12):
    plt.subplot(3, 4, i+1)
    plt.imshow(plt.imread(os.path.join(test_ot_path,datafiles[i])))
plt.show()

In [None]:
!zip -r premade8foldholdout.zip '/kaggle/working'