[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/nidolow/image-classification/blob/master/notebooks/train_exp.ipynb)

# Initialize
## Import packages

In [1]:
import os
import json
import hashlib
import pandas as pd
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator

## Select data location

In [2]:
try:
    from google.colab import drive
    drive.mount('/content/drive')
    DATA_PATH = '/content/drive/My Drive/train/'
    OUTPUT_DIR = '/content/drive/My Drive/train/'
except:
    DATA_PATH = '../data/train/'
    OUTPUT_DIR = '../models/'

## Set up const

In [3]:
CONF = {
    'batch': 128,
    'max_epochs': 15,
    'height': 128,
    'width': 128,
    'features': {}
}

## Set up GPU

In [4]:
MAX_GPU_MEM = 1536
gpus = tf.config.experimental.list_physical_devices('GPU')
if False: # Better GPU works fine with no restrictions
    # Restrict TensorFlow to only allocate limited amount of memory on the first GPU
    try:
        tf.config.experimental.set_virtual_device_configuration(
            gpus[0],
            [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=MAX_GPU_MEM)])
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Virtual devices must be set before GPUs have been initialized
        print(e)

# Manage input data
## Load data

In [5]:
df = pd.DataFrame()
for category in os.listdir(DATA_PATH):
    print('Loading category:', category)
    filenames = [os.path.join(category, f) for f in os.listdir(os.path.join(DATA_PATH, category))]
    df = pd.concat([df,
                    pd.DataFrame({'filename': filenames,
                                  'category': category})])

train_df, validation_df = train_test_split(df, test_size=0.10, random_state=29)

Loading category: human
Loading category: dog
Loading category: cat


## Prepare data for training

In [6]:
train_generator = ImageDataGenerator(rescale=1./255)
validation_generator = ImageDataGenerator(rescale=1./255)

train_data = train_generator.flow_from_dataframe(
    train_df,
    DATA_PATH,
    x_col='filename',
    y_col='category',
    batch_size=CONF['batch'],
    target_size=(CONF['height'], CONF['width']),
    class_mode='categorical')

validation_data = validation_generator.flow_from_dataframe(
    validation_df,
    DATA_PATH,
    x_col='filename',
    y_col='category',
    batch_size=CONF['batch'],
    target_size=(CONF['height'], CONF['width']),
    class_mode='categorical')

Found 31708 validated image filenames belonging to 3 classes.
Found 3524 validated image filenames belonging to 3 classes.


## Define model

In [7]:
model = Sequential([
    Conv2D(16, 3, padding='same', activation='relu', input_shape=(CONF['height'], CONF['width'], 3)),
    MaxPooling2D(),
    Conv2D(32, 3, padding='same', activation='relu'),
    MaxPooling2D(),
    Conv2D(64, 3, padding='same', activation='relu'),
    MaxPooling2D(),
    Flatten(),
    Dense(512, activation='relu'),
    Dense(3, activation='softmax')])

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.summary()
CONF['model'] = json.loads(model.to_json())


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 128, 128, 16)      448       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 64, 64, 16)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 64, 64, 32)        4640      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 32, 32, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 32, 32, 64)        18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 16, 16, 64)        0         
_________________________________________________________________
flatten (Flatten)            (None, 16384)             0

# Train

In [8]:
history = model.fit(
    train_data,
    steps_per_epoch=len(train_df) // CONF['batch'],
    epochs=CONF['max_epochs'],
    validation_data=validation_data,
    validation_steps=len(validation_df) // CONF['batch'])


  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 247 steps, validate for 27 steps
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


## Save model

In [9]:
hash = str(hashlib.md5(json.dumps(CONF, sort_keys = True).encode("utf-8")).hexdigest()[0:7])

model.save_weights(os.path.join(OUTPUT_DIR, 'model-'+hash+'.mdl'))
with open(os.path.join(OUTPUT_DIR, 'model-'+hash+'.history'), 'w') as w:
    pd.DataFrame(history.history).to_json(w)
with open(os.path.join(OUTPUT_DIR, 'model-'+hash+'.conf'), 'w') as w:
    json.dump(CONF, w)