In [1]:
#default_exp training

In [2]:
#export
import numpy as np
import pandas as pd
import os
from deeplearning_image_classification import data_loading

from sklearn import model_selection, metrics
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.mixed_precision import experimental as mixed_precision
import keras.applications


np.random.seed(0)
pd.set_option('display.max_colwidth', 100)

In [3]:
gpu_devices = tf.config.experimental.list_physical_devices('GPU')
for device in gpu_devices:
    tf.config.experimental.set_memory_growth(device, True)
    
#policy = mixed_precision.Policy('mixed_float16')
#mixed_precision.set_policy(policy)

In [4]:
train_csv_path = os.path.join(data_loading.DATA_DIR, 'train_metadata.csv')
test_csv_path = os.path.join(data_loading.DATA_DIR, 'test_metadata.csv')

train_metadata_df = pd.read_csv(train_csv_path)
test_metadata_df = pd.read_csv(test_csv_path)

In [5]:
sample_size = 11000
sample_val_size = 1000
__, sample_train_val_metadata_df = model_selection.train_test_split(train_metadata_df, test_size=sample_size, random_state=2, stratify=train_metadata_df['class'])
sample_train_metadata_df, sample_val_metadata_df = model_selection.train_test_split(sample_train_val_metadata_df, test_size=sample_val_size, random_state=2, stratify=sample_train_val_metadata_df['class'])

In [6]:
sample_train_metadata_df.head()

Unnamed: 0.1,Unnamed: 0,class,name,filename
15008,139046,Verbenaceae,293f237272df889747b628ad5ab4d766c474955b,../data/images_train/data/324932/293f237272df889747b628ad5ab4d766c474955b.jpg
119283,197578,Lauraceae,c221d86a85270537a7b1bcfe0729157e1e697ad6,../data/images_train/data/158370/c221d86a85270537a7b1bcfe0729157e1e697ad6.jpg
133029,13881,Poaceae,a3e74644ac6d90799bf8cd1ae7b4b13da8ec69ae,../data/images_train/data/265835/a3e74644ac6d90799bf8cd1ae7b4b13da8ec69ae.jpg
152296,173446,Rubiaceae,77d096981f4a90ed8d31926dcdf985a24eaa10e8,../data/images_train/data/293108/77d096981f4a90ed8d31926dcdf985a24eaa10e8.jpg
91208,262151,Piperaceae,c23a20c276c9d50e461f5d5eaf60838b0d728df1,../data/images_train/data/253046/c23a20c276c9d50e461f5d5eaf60838b0d728df1.jpg


In [7]:
image_gen = keras.preprocessing.image.ImageDataGenerator(
    preprocessing_function=keras.applications.mobilenet_v2.preprocess_input,
    horizontal_flip=True
)
image_size = (224, 224)


sample_train_image_iterator = image_gen.flow_from_dataframe(sample_train_metadata_df, batch_size=32, target_size=image_size,
    shuffle=False)
sample_val_image_iterator = image_gen.flow_from_dataframe(sample_val_metadata_df, batch_size=32, target_size=image_size, shuffle=False)
test_image_iterator = image_gen.flow_from_dataframe(test_metadata_df, batch_size=64, target_size=image_size, shuffle=False)

n_classes = len(sample_train_image_iterator.class_indices)

Found 10000 validated image filenames belonging to 82 classes.
Found 1000 validated image filenames belonging to 82 classes.
Found 10000 validated image filenames belonging to 82 classes.


# Setting up model

We use pretrained MobileNet model for feature extraction

In [8]:
base_model = keras.applications.MobileNet(include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False
model = keras.Sequential(
    [
        base_model,
        keras.layers.AveragePooling2D((4, 4)),
        keras.layers.Flatten(),
        keras.layers.Dense(n_classes),
        keras.layers.Softmax()
    ])
model.compile(
    loss=keras.losses.CategoricalCrossentropy(),
    optimizer=keras.optimizers.Adam(),
    metrics=['acc', keras.metrics.Precision(), keras.metrics.Recall()])

Using TensorFlow backend.


In [9]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
mobilenet_1.00_224 (Model)   (None, 7, 7, 1024)        3228864   
_________________________________________________________________
average_pooling2d_1 (Average (None, 1, 1, 1024)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 1024)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 82)                84050     
_________________________________________________________________
softmax_1 (Softmax)          (None, 82)                0         
Total params: 3,312,914
Trainable params: 84,050
Non-trainable params: 3,228,864
_________________________________________________________________


In [10]:
model.fit(sample_train_image_iterator, validation_data=sample_val_image_iterator, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.callbacks.History at 0x7f55ed0b49d0>

In [11]:
y_test_pred = model.predict(test_image_iterator).argmax(axis=1)

In [12]:
y_test = np.array([test_image_iterator.class_indices[c] for c in test_metadata_df['class']])

## Test set accuracy

In [13]:
metrics.accuracy_score(y_test, y_test_pred)

0.1983