In [1]:
from keras.models import Sequential
from ipynb.fs.full.DataGenerator import DataGenerator
from ipynb.fs.full.PreprocessData import Preprocessing

import os
import numpy as np
import matplotlib.pyplot as plt

import imgaug as ia
import imgaug.augmenters as iaa

In [2]:
class DataLoader:
    def __init__(self, ntrain, batch_size=64, augment_data=True, shuffle=True, plot_distribuition=True):
        self.ntrain = ntrain
        self.batch_size = batch_size
        self.augment_data = augment_data
        self.shuffle = shuffle
        
#        tuples of (images, labels):
        preprocess_files = Preprocessing("input", ntrain)
        train_normal, train_pneumonia,  test_normal, test_pneumonia, val_normal, val_pneumonia = preprocess_files.preprocess()
        
        self.train_data = self.load_train_data(train_normal, train_pneumonia)
        self.val_data = self.load_val_data(test_normal, test_pneumonia)
        self.test_data = self.load_test_data(val_normal, val_pneumonia)
        
        if plot_distribuition: 
            self.plot_data_distribuition()

    def load_train_data(self, train_normal, train_pneumonia):
        """
        retourne un tableau d'images train_normal et train_pneumonia dont les étiquettes sont NORMAL et PNEUMONIA concaténées
        """
        normal_labels = np.zeros(len(train_normal))
        print("train_normal : ", np.shape(train_normal))
        print("train_pneumonia : ", np.shape(train_pneumonia))

        pneumonia_labels = np.ones(len(train_pneumonia))
        images = np.concatenate((train_normal, train_pneumonia))
        labels = np.concatenate((normal_labels, pneumonia_labels))
        
        print("images : ", np.shape(images), "labels : ", np.shape(labels))
        
        return (images, labels)

    def load_val_data(self, val_normal, val_pneumonia):
        """
        retourne un tableau d'images val_normal et val_pneumonia dont les étiquettes sont NORMAL et PNEUMONIA concaténées
        """
        normal_val_labels = np.zeros(len(val_normal))
        pneumonia_val_labels = np.ones(len(val_pneumonia))
        images = np.concatenate((val_normal, val_pneumonia))
        labels = np.concatenate((normal_val_labels, pneumonia_val_labels))
        
        print("images : ", np.shape(images), "labels : ", np.shape(labels))
        
        return (images, labels)
    
    def load_test_data(self, test_normal, test_pneumonia):
        """
        retourne un tableau d'images val_normal et val_pneumonia dont les étiquettes sont NORMAL et PNEUMONIA concaténées
        """
        normal_test_labels = np.zeros(len(test_normal))
        pneumonia_test_labels = np.ones(len(test_pneumonia))
        images = np.concatenate((test_normal, test_pneumonia))
        labels = np.concatenate((normal_test_labels, pneumonia_test_labels))
        
        print("images : ", np.shape(images), "labels : ", np.shape(labels))

        return (images, labels)

    
    def load_train_generator(self):
        return DataGenerator(*self.train_data,
                             batch_size=max(self.batch_size, self.ntrain),
                             shuffle=self.shuffle,
                             augment_data=self.augment_data
                            )

    def load_validation_generator(self):
        return DataGenerator(*self.val_data,
                             batch_size=max(self.batch_size, self.ntrain),
                             shuffle=False,
                             augment_data=False
                            )
    
    def load_test_generator(self):
        return DataGenerator(*self.test_data,
                             batch_size=max(self.batch_size, self.ntrain),
                             shuffle=False,
                             augment_data=False
                            )

In [3]:
# samples = 2
# batch_size=64
# image_shape = (224, 244, 3)
# model_architecture = 'custom'
# percent=0.6
# learning_rate=0.001
# epochs = 25

In [4]:
# data_loader = DataLoader(ntrain= samples,
#                          batch_size=batch_size,
#                          augment_data=True,
#                          shuffle=True,
#                          plot_distribuition=False)

In [5]:
# train_data = data_loader.load_train_generator()
# val_data   = data_loader.load_validation_generator()
# test_data  = data_loader.load_test_generator()