<a href="https://colab.research.google.com/github/mschuessler/two4two/blob/trainKerasExample/examples/two4two_leNet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Two4two data training
The notebook demonstrates how to train a modern LeNet CNN on a Dataset pregenerated with the [two4two Module](https://github.com/mschuessler/two4two).

If you open this notebook in Colab please make sure to request a GPU Instance. Training times will be excessively slow otherwise.

In [10]:
import pathlib
import os
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import layers
import pandas as pd
from keras_preprocessing.image import ImageDataGenerator
from google.colab import drive


# Downloading the dataset

In [5]:
dataset_url = "https://f001.backblazeb2.com/file/two4two/datasets_models/medVarSpherObjColorBias.tar.gz"
data_dir = keras.utils.get_file(
    origin=dataset_url, fname="medVarSpherObjColorBias", untar=True
)

Downloading data from https://f001.backblazeb2.com/file/two4two/datasets_models/medVarSpherObjColorBias.tar.gz


# Reading dataframe from jsonl

In [11]:
train_dir = os.path.join(data_dir, "train")
train_df = pd.read_json(os.path.join(train_dir, "parameters.jsonl"), lines=True)
train_df["filename"] = train_df["id"] + ".png"

In [12]:
valid_dir = os.path.join(data_dir, "validation")
valid_df = pd.read_json(os.path.join(valid_dir, "parameters.jsonl"), lines=True)
valid_df["filename"] = valid_df["id"] + ".png"

# Creating Datagenerator from dataframes

In [13]:
datagen = ImageDataGenerator(rescale=1. / 255)
train_generator = datagen.flow_from_dataframe(dataframe=train_df, directory=train_dir,
                                              x_col="filename", y_col="obj_name", batch_size=64)
valid_generator = datagen.flow_from_dataframe(dataframe=valid_df, directory=valid_dir,
                                              x_col="filename", y_col="obj_name", batch_size=64)
STEP_SIZE_TRAIN = train_generator.n // train_generator.batch_size
STEP_SIZE_VALID = valid_generator.n // valid_generator.batch_size

Found 40000 validated image filenames belonging to 2 classes.
Found 10000 validated image filenames belonging to 2 classes.


# Mounting Google drive to save trained model later

In [14]:
from google.colab import drive
drive.mount("/content/gdrive")

Mounted at /content/gdrive


In [15]:
checkpoint_filepath = "/content/gdrive/My Drive/model_checkpoints_two4two_lenet/epochs:{epoch:03d}-val_acc:{val_acc:.3f}.hdf5"
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='val_accuracy',
    mode='max',
    save_best_only=False)

# Defining model architecture

In [16]:
modernLenetModel = keras.models.Sequential([
        layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Flatten(),
        layers.Dropout(0.5),
        layers.Dense(2, activation="softmax"),
    ])

# Compile and train Model

In [17]:
modernLenetModel.compile(loss="categorical_crossentropy",
                             optimizer="adam", metrics=["accuracy"])

In [19]:
modernLenetModel.fit(train_generator,
                     steps_per_epoch=STEP_SIZE_TRAIN,
                     validation_data=valid_generator,
                     validation_steps=STEP_SIZE_VALID,
                     epochs=1, #45,
                     callbacks = [model_checkpoint_callback]
                     )



KeyError: ignored