[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/nidolow/image-classification/blob/master/notebooks/train_exp.ipynb)

# Initialize
## Import packages

In [1]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator

## Select data location

In [2]:
try:
    from google.colab import drive
    drive.mount('/content/drive')
    DATA_PATH = '/content/drive/My Drive/train/'
    OUTPUT_DIR = '/content/drive/My Drive/train/'
except:
    DATA_PATH = '../data/train/'
    OUTPUT_DIR = '../models/'

## Set up const

In [3]:
BATCH = 128
EPOCHS = 3
HEIGHT = 128
WIDTH = 128

## Set up GPU

In [None]:
import tensorflow as tf
MAX_GPU_MEM = 1536
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    # Restrict TensorFlow to only allocate limited amount of memory on the first GPU
    try:
        tf.config.experimental.set_virtual_device_configuration(
            gpus[0],
            [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=MAX_GPU_MEM)])
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Virtual devices must be set before GPUs have been initialized
        print(e)

# Manage input data
## Load data

In [4]:
df = pd.DataFrame()
for category in os.listdir(DATA_PATH):
    print('Loading category:', category)
    filenames = [os.path.join(category, f) for f in os.listdir(os.path.join(DATA_PATH, category))]
    df = pd.concat([df,
                    pd.DataFrame({'filename': filenames,
                                  'category': category})])

train_df, validation_df = train_test_split(df, test_size=0.10, random_state=29)

Loading category: cat
Loading category: dog
Loading category: human


## Prepare data for training

In [5]:
train_generator = ImageDataGenerator(rescale=1./255)
validation_generator = ImageDataGenerator(rescale=1./255)

train_data = train_generator.flow_from_dataframe(
    train_df,
    DATA_PATH,
    x_col='filename',
    y_col='category',
    batch_size=BATCH,
    target_size=(HEIGHT, WIDTH),
    class_mode='categorical')

validation_data = validation_generator.flow_from_dataframe(
    validation_df,
    DATA_PATH,
    x_col='filename',
    y_col='category',
    batch_size=BATCH,
    target_size=(HEIGHT, WIDTH),
    class_mode='categorical')

Found 31708 validated image filenames belonging to 3 classes.
Found 3524 validated image filenames belonging to 3 classes.


## Define model

In [6]:
model = Sequential([
    Conv2D(16, 3, padding='same', activation='relu', input_shape=(HEIGHT, WIDTH, 3)),
    MaxPooling2D(),
    Conv2D(32, 3, padding='same', activation='relu'),
    MaxPooling2D(),
    Conv2D(64, 3, padding='same', activation='relu'),
    MaxPooling2D(),
    Flatten(),
    Dense(512, activation='relu'),
    Dense(3, activation='softmax')])

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 128, 128, 16)      448       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 64, 64, 16)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 64, 64, 32)        4640      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 32, 32, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 32, 32, 64)        18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 16, 16, 64)        0         
_________________________________________________________________
flatten (Flatten)            (None, 16384)             0

# Train

In [7]:
history = model.fit(
    train_data,
    steps_per_epoch=len(train_df) // BATCH,
    epochs=EPOCHS,
    validation_data=validation_data,
    validation_steps=len(validation_df) // BATCH)


  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 247 steps, validate for 27 steps
Epoch 1/3
Epoch 2/3
Epoch 3/3


## Save model

In [8]:
model.save_weights(os.path.join(OUTPUT_DIR, 'model.mdl'))
with open(os.path.join(OUTPUT_DIR, 'model.history'), 'w') as w:
    pd.DataFrame(history.history).to_json(w)