In [47]:
import tensorflow as tf
import numpy as np
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle


from Models.Augment import Augment

In [48]:
THIS_DIR = os.path.abspath('')
DATASETS_DIR = "Datasets/"
dataset_csv = "cells_64x64_it_50.npz"

dataset_csv_filepath = os.path.join(THIS_DIR, DATASETS_DIR, dataset_csv)

In [62]:
def create_train_and_test_files(npz_filepath, test_size = 0.33, augment = True, transformations = ['rotate', 'flip']):
    
    data = np.load(npz_filepath, allow_pickle = True)
    
    features = data["features"]
    target = data["target"]
        
    X_train, X_test, y_train, y_test = train_test_split(features, target, test_size = test_size, random_state=42)
    
    print("Without augmentation:")
    print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
    
    if augment:
        aug_X_train = []
        aug_y_train = []
        aug = Augment()
        
        for i in range(0, len(X_train)):
            image_to_transform = X_train[i][0]
            target = y_train[i][0]
            transformed_images = aug.TransformImage(image_to_transform, transformations)
              
            for transformed_image in transformed_images:
                aug_X_train.append(transformed_image)
                aug_y_train.append(target)
        
        new_X_train = X_train + aug_X_train
        new_y_train = y_train + aug_y_train
        
        X_train, y_train = shuffle(new_X_train, new_y_train)
        print("With augmentation:")
        print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
        
    return X_train, X_test, y_train, y_test

X_train, X_test, y_train, y_test = create_train_and_test_files(dataset_csv_filepath)

Without augmentation:
(2542,) (2542,) (1253,) (1253,)


ValueError: operands could not be broadcast together with shapes (2542,) (2512,64,64,3) 

In [52]:
X_train.shape

(2542,)

In [20]:
def load_data(dataset_name):
    this_dir = os.path.abspath('')
    datasets_dir = os.path.join(this_dir, DATASETS_DIR)

    return np.load(f"{datasets_dir}{dataset_name}.npz", allow_pickle=True)

In [21]:
data = load_data(dataset_name)

In [23]:
data['y_train']

array(['non_tumor', 'non_tumor', 'non_tumor', ..., 'tumor', 'non_tumor',
       'tumor'], dtype=object)

In [17]:
def generate_tf_training_files(dataset_name, batch_size = 512, shuffle = True):

    train_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={"x": train_test_data['X_train']},
        y=train_test_data['y_train'],
        num_epochs=None,
        shuffle=True)
        
estimator = generate_tf_training_files(dataset_name)

In [18]:
estimator