In [None]:
import numpy as np
import PIL
import pandas as pd
import tensorflow as tf
from scipy.io import loadmat, savemat

In [None]:
#Load images and convert to numpy arrays
train_annotations = np.array(pd.read_csv('Train.csv'))
img_ids = train_annotations[:, 0].astype(np.str)
img_labels = train_annotations[:, 1].astype(np.str)
img_directory = 'Train_Images/'
classdict = {'fruit_brownspot':0, 'fruit_healthy':1, 'fruit_woodiness':2}
unique_ids = np.unique(img_ids)

nimages = unique_ids.shape[0]
All_imgs = np.zeros([nimages, 256, 256, 3]).astype(np.float32)
All_target = np.zeros(nimages)
for i in range(nimages):
    if i%100 == 0:
        pdone = i/nimages*100
        print('Percentage done: %.1f' % pdone)
    fname = img_directory + unique_ids[i] + '.jpg'
    img = PIL.Image.open(fname).resize([256, 256])
    All_imgs[i, :, :, :] = np.array(img)/255
    label_ind = np.where(img_ids == unique_ids[i])[0][0]
    All_target[i] = classdict[img_labels[label_ind]]

outdict = {'All_imgs':All_imgs, 'All_target':All_target}
savemat('full_set.mat', outdict)

In [None]:
#Split images into training and validation sets
class1_inds = np.where(All_target == 0)[0]
class2_inds = np.where(All_target == 1)[0]
class3_inds = np.where(All_target == 2)[0]

np.random.seed(123)
np.random.shuffle(class1_inds)
np.random.shuffle(class2_inds)
np.random.shuffle(class3_inds)

class1_intrain = int(len(class1_inds)*0.85)
class2_intrain = int(len(class2_inds)*0.85)
class3_intrain = int(len(class3_inds)*0.85)

train_inds = np.append(class1_inds[:class1_intrain], 
                       class2_inds[:class2_intrain])
train_inds = np.append(train_inds,
                       class3_inds[:class3_intrain])
np.random.shuffle(train_inds)

train_imgs = All_imgs[train_inds, :, :, :]
train_target = All_target[train_inds]

val_imgs = np.delete(All_imgs, train_inds, axis=0)
val_target = np.delete(All_target, train_inds, axis=0)

outdict = {'train_imgs':train_imgs,
           'train_target':train_target,
           'val_imgs':val_imgs,
           'val_target':val_target}
savemat('split_set.mat', outdict)