# Streamlined Model Builder


In [1]:
#Import required packages
import tensorflow as tf
import os
import cv2
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout, BatchNormalization, GlobalAveragePooling2D, SeparableConv2D

In [2]:
#set data directory relative path (expecting subdirectories of classes)
os.chdir("/Users/andrew/Documents/Research/BiodivInformatics/AI-carabids/")
data_dir = 'JORN'
#find names of classes
classes=os.listdir(data_dir)

In [21]:
#Read in data
train, val = tf.keras.utils.image_dataset_from_directory(data_dir, image_size=(400, 600), validation_split=0.3, subset="both", seed=452198)

Found 254 files belonging to 14 classes.
Using 178 files for training.
Using 76 files for validation.


In [23]:
#scale data
train_scaled = train.map(lambda x,y: (x/255, y))
val_scaled = val.map(lambda x,y: (x/255, y))

In [24]:
#establish a model - can do rest inside first command or use the add method
model = Sequential()

model.add(Conv2D(64, (3,3), 2, activation='relu', input_shape=(400,600,3), padding='same'))
#model.add(Conv2D(16, (3,3), 1, activation='relu', padding='same'))
model.add(MaxPooling2D())

model.add(Conv2D(128, (3,3), 2, activation='relu', padding='same'))
#model.add(Conv2D(32, (3,3), 1, activation='relu', padding='same'))
#model.add(MaxPooling2D())

model.add(Conv2D(256, (3,3), 2, activation='relu', padding='same'))
model.add(MaxPooling2D())

#model.add(Flatten())
model.add(GlobalAveragePooling2D())
model.add(Dense(512, activation='relu'))
#number of final possibilities
model.add(Dense(14, activation='softmax'))

In [27]:
#Compile model and set up logs and callbacks
model.compile('adam', loss=tf.losses.SparseCategoricalCrossentropy(), metrics=['accuracy'])
model.summary()
logdir = 'logs'
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=6, verbose=1, restore_best_weights=True)

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_6 (Conv2D)           (None, 200, 300, 64)      1792      
                                                                 
 max_pooling2d_4 (MaxPoolin  (None, 100, 150, 64)      0         
 g2D)                                                            
                                                                 
 conv2d_7 (Conv2D)           (None, 50, 75, 128)       73856     
                                                                 
 conv2d_8 (Conv2D)           (None, 25, 38, 256)       295168    
                                                                 
 max_pooling2d_5 (MaxPoolin  (None, 12, 19, 256)       0         
 g2D)                                                            
                                                                 
 global_average_pooling2d_2  (None, 256)              

In [31]:
#Train model
hist = model.fit(train_scaled, epochs=100, validation_data=val_scaled, callbacks=[tensorboard_callback, early_stopping])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 12: early stopping


### Notes
- padding='same' is important, bumps up how fast the models starts to increase, and jumps the val_accuracy by 10% right away
- average global pooling is a little faster
- number of filters increases computation time. Might not have a huge response to accuracy of 12-sp dataset?
- fewer conv2d layers make epochs faster but require more epochs to 'catch' onto higher accuracy?
- Not sure about removing one of the pooling layers or not. Trying out slightly increasing the filters for some of the layers to see if the val accuracy goes up faster - so far seems like maybe
- removing some layers is definitely the way to go. increasing filters seems to slightly help the model 'catch' but it doesn't seem to make a huge difference on val accuracy in this dataset
- the second to last dense layer seems to work fine at 256 or 512 ... not a major difference in final acuracy or speed to get there, just reduces the model size
- Changing strides from 2 down to 1 creates a 5-fold increase in epoch time (for 3x3 kernal size) - if anything it performed worse over 100 epochs
- increasing second to last dense layer makes hte model train faster (fewer epochs) - unsure of convergence differences

In [32]:
model.save(os.path.join('models','JORN-14.tf'))

INFO:tensorflow:Assets written to: models/JORN-14.tf/assets


INFO:tensorflow:Assets written to: models/JORN-14.tf/assets


In [9]:
train.class_names

['CALPER2',
 'CHLSER',
 'CHLTOM',
 'CICPUN',
 'CYMPUN2',
 'DISROB',
 'EURGRO',
 'HAROBL',
 'HARPEN',
 'HELFER',
 'JUNLEM',
 'PASOBS',
 'SELPLA',
 'TETPAL']

## JORN-12 Previous models

In [None]:
#Model finally saved after 300 epochs, 90-95% val_accuracy

#establish a model - can do rest inside first command or use the add method
model = Sequential()

model.add(Conv2D(32, (3,3), 2, activation='relu', input_shape=(400,600,3), padding='same'))
#model.add(Conv2D(16, (3,3), 1, activation='relu', padding='same'))
model.add(MaxPooling2D())

model.add(Conv2D(32, (3,3), 2, activation='relu', padding='same'))
#model.add(Conv2D(32, (3,3), 1, activation='relu', padding='same'))
#model.add(MaxPooling2D())

model.add(Conv2D(256, (3,3), 2, activation='relu', padding='same'))
model.add(MaxPooling2D())

#model.add(Flatten())
model.add(GlobalAveragePooling2D())
model.add(Dense(512, activation='relu'))
#number of final possibilities
model.add(Dense(12, activation='softmax'))

In [None]:
#Fast epochs, 300 on M2 got up to low 90%'s

#establish a model - can do rest inside first command or use the add method
model = Sequential()

model.add(Conv2D(32, (3,3), 2, activation='relu', input_shape=(400,600,3), padding='same'))
#model.add(Conv2D(16, (3,3), 1, activation='relu', padding='same'))
model.add(MaxPooling2D())

model.add(Conv2D(32, (3,3), 2, activation='relu', padding='same'))
#model.add(Conv2D(32, (3,3), 1, activation='relu', padding='same'))
#model.add(MaxPooling2D())

model.add(Conv2D(256, (3,3), 2, activation='relu', padding='same'))
model.add(MaxPooling2D())

#model.add(Flatten())
model.add(GlobalAveragePooling2D())
model.add(Dense(128, activation='relu'))
#number of final possibilities
model.add(Dense(12, activation='softmax'))'

In [None]:
#Fast epochs (10s on M2) ~85% but between 80 and 90, never fully converged

#establish a model - can do rest inside first command or use the add method
model = Sequential()

model.add(Conv2D(16, (3,3), 2, activation='relu', input_shape=(400,600,3), padding='same'))
#model.add(Conv2D(16, (3,3), 1, activation='relu', padding='same'))
model.add(MaxPooling2D())

model.add(Conv2D(32, (3,3), 2, activation='relu', padding='same'))
#model.add(Conv2D(32, (3,3), 1, activation='relu', padding='same'))
#model.add(MaxPooling2D())

model.add(Conv2D(256, (3,3), 2, activation='relu', padding='same'))
model.add(MaxPooling2D())

#model.add(Flatten())
model.add(GlobalAveragePooling2D())
model.add(Dense(728, activation='relu'))
#number of final possibilities
model.add(Dense(12, activation='softmax'))

In [None]:
# High 80's near 90%, 10s on M2

#establish a model - can do rest inside first command or use the add method
model = Sequential()

model.add(Conv2D(32, (3,3), 2, activation='relu', input_shape=(400,600,3), padding='same'))
#model.add(Conv2D(16, (3,3), 1, activation='relu', padding='same'))
model.add(MaxPooling2D())

model.add(Conv2D(32, (3,3), 2, activation='relu', padding='same'))
#model.add(Conv2D(32, (3,3), 1, activation='relu', padding='same'))
#model.add(MaxPooling2D())

model.add(Conv2D(256, (3,3), 2, activation='relu', padding='same'))
model.add(MaxPooling2D())

#model.add(Flatten())
model.add(GlobalAveragePooling2D())
model.add(Dense(728, activation='relu'))
#number of final possibilities
model.add(Dense(12, activation='softmax'))

In [None]:
#Fast epochs (15-16sec) took about 200 epochs - 85-90%, training and val did not ever converge to 100%

#establish a model - can do rest inside first command or use the add method
model = Sequential()

model.add(Conv2D(16, (3,3), 2, activation='relu', input_shape=(400,600,3), padding='same'))
#model.add(Conv2D(16, (3,3), 1, activation='relu', padding='same'))
model.add(MaxPooling2D())

model.add(Conv2D(64, (3,3), 2, activation='relu', padding='same'))
#model.add(Conv2D(32, (3,3), 1, activation='relu', padding='same'))
#model.add(MaxPooling2D())

model.add(Conv2D(256, (3,3), 2, activation='relu', padding='same'))
model.add(MaxPooling2D())

#model.add(Flatten())
model.add(GlobalAveragePooling2D())
model.add(Dense(512, activation='relu'))
#number of final possibilities
model.add(Dense(12, activation='softmax'))

In [None]:
#Fast epochs (15s) but took 250 epochs - 80-85%

#establish a model - can do rest inside first command or use the add method
model = Sequential()

model.add(Conv2D(16, (3,3), 2, activation='relu', input_shape=(400,600,3), padding='same'))
#model.add(Conv2D(16, (3,3), 1, activation='relu', padding='same'))
model.add(MaxPooling2D())

model.add(Conv2D(32, (3,3), 2, activation='relu', padding='same'))
#model.add(Conv2D(32, (3,3), 1, activation='relu', padding='same'))
model.add(MaxPooling2D())

model.add(Conv2D(128, (3,3), 2, activation='relu', padding='same'))
model.add(MaxPooling2D())

#model.add(Flatten())
model.add(GlobalAveragePooling2D())
model.add(Dense(512, activation='relu'))
#number of final possibilities
model.add(Dense(12, activation='softmax'))

In [None]:
#Fairly fast ~65-70%
#establish a model - can do rest inside first command or use the add method
model = Sequential()

model.add(Conv2D(64, (3,3), 2, activation='relu', input_shape=(400,600,3), padding='same'))
model.add(MaxPooling2D())
#model.add(BatchNormalization())

model.add(Conv2D(128, (3,3), 2, activation='relu', padding='same'))
#model.add(MaxPooling2D())
#model.add(BatchNormalization())

model.add(Conv2D(128, (3,3), 2, activation='relu', padding='same'))
#model.add(MaxPooling2D())
#model.add(BatchNormalization())

model.add(Conv2D(256, (3,3), 2, activation='relu', padding='same'))
#model.add(MaxPooling2D())
#model.add(BatchNormalization())

model.add(Conv2D(512, (3,3), 2, activation='relu', padding='same'))
#model.add(MaxPooling2D())
#model.add(BatchNormalization())

model.add(Conv2D(1028, (3,3), 2, activation='relu'))
#model.add(MaxPooling2D())
model.add(BatchNormalization())


#model.add(Flatten())
model.add(GlobalAveragePooling2D())
model.add(Dense(512, activation='relu'))
#model.add(Dropout(0.8))
#number of final possibilities is n-1
model.add(Dense(12, activation='softmax'))

In [None]:
#Original CNN from 5sp test - ~40%
#establish a model - can do rest inside first command or use the add method
model = Sequential()

model.add(Conv2D(16, (5,5), 1, activation='relu', input_shape=(400,600,3)))
model.add(MaxPooling2D())

model.add(Conv2D(32, (5,5), 1, activation='relu'))
model.add(MaxPooling2D())

model.add(Conv2D(64, (5,5), 1, activation='relu'))
model.add(MaxPooling2D())

model.add(Flatten())

model.add(Dense(256, activation='relu'))
#number of final possibilities
model.add(Dense(12, activation='softmax'))

## Xception clone

In [55]:
#Copied from https://keras.io/examples/vision/image_classification_from_scratch/
from tensorflow import keras
from tensorflow.keras import layers
def make_model(input_shape, num_classes):
    inputs = keras.Input(shape=input_shape)

    # Entry block
    #x = layers.Rescaling(1.0 / 255)(inputs)
    x = layers.Conv2D(128, 3, strides=2, padding="same")(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)

    previous_block_activation = x  # Set aside residual

    for size in [256, 512, 728]:
        x = layers.Activation("relu")(x)
        x = layers.SeparableConv2D(size, 3, padding="same")(x)
        x = layers.BatchNormalization()(x)

        x = layers.Activation("relu")(x)
        x = layers.SeparableConv2D(size, 3, padding="same")(x)
        x = layers.BatchNormalization()(x)

        x = layers.MaxPooling2D(3, strides=2, padding="same")(x)

        # Project residual
        residual = layers.Conv2D(size, 1, strides=2, padding="same")(
            previous_block_activation
        )
        x = layers.add([x, residual])  # Add back residual
        previous_block_activation = x  # Set aside next residual

    x = layers.SeparableConv2D(1024, 3, padding="same")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)

    x = layers.GlobalAveragePooling2D()(x)
    if num_classes == 2:
        activation = "sigmoid"
        units = 1
    else:
        activation = "softmax"
        units = num_classes

    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(units, activation=activation)(x)
    return keras.Model(inputs, outputs)


model = make_model(input_shape=(400, 600) + (3,), num_classes=12)
#keras.utils.plot_model(model, show_shapes=True)

In [None]:
# run model
epochs = 25

callbacks = [
    keras.callbacks.ModelCheckpoint("save_at_{epoch}.keras"),
]
model.compile(
    optimizer=keras.optimizers.legacy.Adam(1e-3),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"],
)
model.fit(
    train_scaled,
    epochs=epochs,
    callbacks=callbacks,
    validation_data=val_scaled,
)

Epoch 1/25
Epoch 2/25
Epoch 3/25


2023-08-25 03:29:06.006285: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] Filling up shuffle buffer (this may take a while): 63 of 256
2023-08-25 03:29:16.086519: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] Filling up shuffle buffer (this may take a while): 134 of 256
2023-08-25 03:29:26.295964: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] Filling up shuffle buffer (this may take a while): 203 of 256
2023-08-25 03:29:27.572230: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] Shuffle buffer filled.


Epoch 4/25


2023-08-25 03:54:08.236416: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] Filling up shuffle buffer (this may take a while): 100 of 256
2023-08-25 03:54:18.354160: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] Filling up shuffle buffer (this may take a while): 203 of 256
2023-08-25 03:54:19.161916: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] Shuffle buffer filled.


Epoch 5/25


2023-08-25 04:17:04.300161: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] Filling up shuffle buffer (this may take a while): 100 of 256
2023-08-25 04:17:14.443692: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] Filling up shuffle buffer (this may take a while): 208 of 256
2023-08-25 04:17:14.925496: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] Shuffle buffer filled.


Epoch 6/25


2023-08-25 04:39:56.798043: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] Filling up shuffle buffer (this may take a while): 100 of 256
2023-08-25 04:40:06.650287: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] Filling up shuffle buffer (this may take a while): 203 of 256
2023-08-25 04:40:07.538098: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] Shuffle buffer filled.


Epoch 7/25


2023-08-25 05:09:49.027251: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] Filling up shuffle buffer (this may take a while): 66 of 256
2023-08-25 05:09:59.086182: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] Filling up shuffle buffer (this may take a while): 138 of 256
2023-08-25 05:10:09.031479: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] Filling up shuffle buffer (this may take a while): 216 of 256
2023-08-25 05:10:09.031543: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] Shuffle buffer filled.


Epoch 8/25
Epoch 9/25


2023-08-25 06:34:31.370116: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] Filling up shuffle buffer (this may take a while): 53 of 256
2023-08-25 06:34:31.452979: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] Filling up shuffle buffer (this may take a while): 54 of 256
2023-08-25 06:34:31.495947: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] Filling up shuffle buffer (this may take a while): 55 of 256
2023-08-25 06:34:31.496006: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] Filling up shuffle buffer (this may take a while): 56 of 256
2023-08-25 06:34:31.651546: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] Filling up shuffle buffer (this may take a while): 57 of 256
2023-08-25 06:34:31.651560: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] Filling up shuffle buffer (this may take a while): 58 of 256
2023-08-25 06:34:31.651562: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] Filling up shuffle buffer (this 

Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25

In [3]:
classes

['NF-PASCAL',
 '.DS_Store',
 'CHLSER',
 'NF-PASELO',
 'JUNLEM',
 'NF-OMUDEJ',
 'CALPER2',
 'NF-PTELAM',
 'CYMPUN2',
 'CICPUN',
 'NF-SCASUB2',
 'DISROB',
 'CHLTOM']