In [14]:
import keras
from keras import models
from keras import layers
from keras import optimizers
import os, shutil

Using TensorFlow backend.


In [15]:
train_dir = "data_files/Cactus_Image/training_set"
test_dir = "data_files/Cactus_Image/testing_set"

In [16]:
def keras_cnn(n_neurons=32, n_layers=3, filter_size=(3, 3), activation="relu", 
               input_shape =(64,64,3), max_pooling=(2,2), dense_layer=128, 
               loss="binary_crossentropy",optimizer="adam",metrics="acc"):
    # NOTE: always alter the input_shape to the specific input shape off the problem.

    model = models.Sequential()
    model.add(layers.Conv2D(n_neurons, filter_size, activation=activation,
                           input_shape =input_shape))
    model.add(layers.MaxPooling2D(max_pooling))
    for num in range(n_layers-2):
        model.add(layers.Conv2D(n_neurons, filter_size, activation=activation))
        model.add(layers.MaxPooling2D(max_pooling))
    model.add(layers.Flatten())
    model.add(layers.Dense(dense_layer, activation=activation))
    model.add(layers.Dense(1, activation="sigmoid"))

    model.compile(loss=loss, 
                  optimizer=optimizer,
                  metrics=[metrics])
    
    return model


In [19]:
model = keras_cnn() 
model.summary()

Instructions for updating:
Colocations handled automatically by placer.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 62, 62, 32)        896       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 31, 31, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 29, 29, 32)        9248      
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 14, 14, 32)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 6272)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 128)               802944    
_________________________________________________________________
dens

In [22]:
from keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(64,64),
    batch_size=20,
    class_mode="binary")

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(64, 64),
    batch_size=20,
    class_mode="binary")

Found 13999 images belonging to 2 classes.
Found 3501 images belonging to 2 classes.


In [23]:
history = model.fit_generator(
    train_generator,
    steps_per_epoch=250,
    epochs=5,
    validation_data=test_generator,
    validation_steps=50)

Instructions for updating:
Use tf.cast instead.
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [24]:
import pathlib
import random
import shutil
from os import path, listdir


def list_dirs(directory):
    """Returns all directories in a given directory
    """
    return [f for f in pathlib.Path(directory).iterdir() if f.is_dir()]


def list_files(directory):
    """Returns all files in a given directory
    """
    return [f for f in pathlib.Path(directory).iterdir() if f.is_file() and not f.name.startswith('.')]


def ratio(input, output="output", seed=1337, ratio=(.8, .1, .1)):
    assert sum(ratio) == 1
    assert len(ratio) in (2, 3)

    for class_dir in list_dirs(input):
        split_class_dir_ratio(class_dir, output, ratio, seed)


def fixed(input, output="output", seed=1337, fixed=(100, 100), oversample=False):
    # make sure its reproducible
    if isinstance(fixed, int):
        fixed = (fixed)

    assert len(fixed) in (1, 2)

    dirs = list_dirs(input)
    lens = []
    for class_dir in dirs:
        lens.append(split_class_dir_fixed(class_dir, output, fixed, seed))

    if not oversample:
        return

    max_len = max(lens)

    for length, class_dir in zip(lens, dirs):
        class_name = path.split(class_dir)[1]
        full_path = path.join(output, 'train', class_name)
        train_files = list_files(full_path)
        for i in range(max_len - length):
            f_orig = random.choice(train_files)
            new_name = f_orig.stem + '_' + str(i) + f_orig.suffix
            f_dest = f_orig.with_name(new_name)
            shutil.copy2(f_orig, f_dest)


def setup_files(class_dir, seed):
    """Returns shuffled files
    """
    # make sure its reproducible
    random.seed(seed)

    files = list_files(class_dir)

    files.sort()
    random.shuffle(files)
    return files


def split_class_dir_fixed(class_dir, output, fixed, seed):
    """Splits one very class folder
    """
    files = setup_files(class_dir, seed)

    if not len(files) > sum(fixed):
        raise ValueError(f'The number of samples in class "{class_dir.stem}" are too few. There are only {len(files)} samples available but your fixed parameter {fixed} requires at least {sum(fixed)} files. You may want to split your classes by ratio.')

    split_train = len(files) - sum(fixed)
    split_val = split_train + fixed[0]

    li = split_files(files, split_train, split_val, len(fixed) == 2)
    copy_files(li, class_dir, output)
    return len(files)


def split_class_dir_ratio(class_dir, output, ratio, seed):
    """Splits one very class folder
    """
    files #= setup_files(class_dir, seed)

    split_train = int(ratio[0] * len(files))
    split_val = split_train + int(ratio[1] * len(files))

    li = split_files(files, split_train, split_val, len(ratio) == 3)
    copy_files(li, class_dir, output)


def split_files(files, split_train, split_val):
    """Splits the files along the provided indices
    """
    files_train = files[:split_train]
    files_val = files[split_train:]

    li = [(files_train, 'train'), (files_val, 'val')]

    return li


def copy_files(files_type, class_dir, output):
    """Copies the files from the input folder to the output folder
    """
    # get the last part within the file
    class_name = path.split(class_dir)[1]
    for (files, folder_type) in files_type:
        full_path = path.join(output, folder_type, class_name)

        pathlib.Path(full_path).mkdir(
            parents=True, exist_ok=True)
        for f in files:
            shutil.copy2(f, full_path)

In [None]:
dir1 = list_dirs("data_files/Cactus_Image/training_set")

In [None]:
dir1

In [None]:
list_files("data_files/Cactus_Image/training_set/cactus")

In [None]:
ratio1 = ratio("data_files/Cactus_Image/training_set",ratio=(0.8, 0.0, 0.2))

In [None]:
fixed1 = fixed("data_files/Cactus_Image/training_set")

In [None]:
files = setup_files("data_files/Cactus_Image/training_set/cactus", seed=1337)

In [None]:
fixed = ((100,100))

In [None]:
split_class_dir_fixed("data_files/Cactus_Image/training_set/cactus",output="output", ratio = (.8, .0, .1), seed=1337)

In [None]:
split_class_dir_ratio("data_files/Cactus_Image/training_set/cactus", "output", (.8, .2), seed=1337)

In [None]:
split_files(files=setup_files())

In [25]:
import pathlib
import random
import shutil
from os import path, listdir

def setup_files(class_dir, seed):
    """Returns shuffled files
    """
    # make sure its reproducible
    random.seed(seed)

    files = list_files(class_dir)

    files.sort()
    random.shuffle(files)
    return files


def split_files(files, split_train, split_val):
    """Splits the files along the provided indices
    """
    files_train = files[:split_train]
    files_val = files[split_train:]

    li = [(files_train, 'train'), (files_val, 'val')]

    return li

def list_files(directory):
    """Returns all files in a given directory
    """
    return [f for f in pathlib.Path(directory).iterdir() if f.is_file() and not f.name.startswith('.')]

def copy_files(files_type, class_dir, output):
    """Copies the files from the input folder to the output folder
    """
    # get the last part within the file
    class_name = path.split(class_dir)[1]
    for (files, folder_type) in files_type:
        full_path = path.join(output, folder_type, class_name)

        pathlib.Path(full_path).mkdir(
            parents=True, exist_ok=True)
        for f in files:
            shutil.copy2(f, full_path)

class_dir = "data_files/Cactus_Image/training_set/no_cactus"
list_files = list_files("data_files/Cactus_Image/training_set/no_cactus")
files = setup_files("data_files/Cactus_Image/training_set/no_cactus",seed=1337)
ratio = (0.8, 0.2)
split_train = int(ratio[0]*len(files))
split_val = split_train + int(ratio[1] * len(files))
li = split_files(files, split_train, split_val)
copy_files(li, class_dir, "data_files/Cactus_Image/training_set/no_cactus_new")

split_class_dir_ratio(class_dir=class_dir,
                      ratio=ratio,
                      output="data_files/Cactus_Image/training_set/train_cactus",
                      seed=1337)

TypeError: 'list' object is not callable

In [None]:
split_class_dir_ratio(class_dir=class_dir,
                      ratio=ratio,
                      output="data_files/Cactus_Image/training_set/train_cactus",
                      seed=1337)