In [1]:
import os
import random
import numpy as np
import pandas as pd
import glob
from shutil import copyfile

from util import *

In [2]:
# loading labels from csv file
df = pd.read_csv("Datasets/labels.csv")

# create dictionary of unique breeds with its respective id
breeds = df.breed.unique()
dict_breeds = dict(zip(breeds, range(len(breeds))))

# add duplicate column
df["breed_id"] = df.breed
# convert duplicated column as unique id
df = df.replace({"breed_id":dict_breeds})

print("Shape\t\t: {}".format(df.shape))
print("Unique labels\t: {}".format(len(breeds)))

Shape		: (10222, 3)
Unique labels	: 120


In [3]:
DIR_TRAIN    = "images/train/"
DIR_VALIDATE = "images/validation/"

def move_files(files, dist_folder="image/train/"):
    for src_file_path in files:
        # read file info
        file_name = os.path.basename(src_file_path)
        img_id    = file_name.split(".")[0]
        result    = df.loc[df['id'] == img_id]
        if result.empty:
            print("LABEL NOT FOUND: {}".format(img_id))
            continue
        label = result.iloc[0]["breed"]
        
        # create new folder structure
        dist_file_path = dist_folder+label+"/"+file_name
        os.makedirs(os.path.dirname(dist_file_path), exist_ok=True)
        copyfile(src_file_path, dist_file_path)

In [4]:
# search all files in directory
files = [f for f in glob.glob("Datasets/train" + "/**/*", recursive=True) if not os.path.isdir(f)]
print("{} files found!".format(len(files)))


# spliting the files in train and validation sets
random.seed(42)
random.shuffle(files)
train, validate = np.split(files, [int(len(files)*0.7)])
print("#train: ",len(train))
print("#val: ",len(validate))

10222 files found!
#train:  7155
#val:  3067



```python
idx = 0
img_path = files[idx]
img_file = os.path.basename(img_path)
img_id   = img_file.split(".")[0]
result   = df.loc[df['id'] == img_id]
label    = result.iloc[0]["breed"]
#result.at[0,'breed']

print(img_path)
print(img_file)
print(img_id)
print(label)
```

In [5]:
if not os.path.exists(os.path.join(os.getcwd(), 'Images')):
    move_files(train,    DIR_TRAIN)
    move_files(validate, DIR_VALIDATE)


# Test on training

In [6]:
import tensorflow as tf 
from tensorflow import keras
from tensorflow.keras import backend as K
from tensorflow.keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from tensorflow.keras.layers import BatchNormalization, Conv2D, MaxPooling2D, GlobalAveragePooling2D, Activation, Flatten, Dropout, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam


In [7]:
# checking if GPU is being used for training
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
if tf.test.gpu_device_name(): 
    print('Default GPU Device:{}'.format(tf.test.gpu_device_name()))
else:
    print("GPU is not detected")

Default GPU Device:/device:GPU:0


In [8]:
# hyperparameters 
epochs = 30
lr = 1e-3
batch_size = 32
img_dims = (128,128,3)
keep_pretrained = False

In [9]:
# this is the augmentation configuration we will use for training
train_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode = "nearest"
)

# this is the augmentation configuration we will use for testing:
# only rescaling
test_datagen = ImageDataGenerator(rescale=1./255)

# this is a generator that will read pictures found in
# subfolers of 'data/train', and indefinitely generate
# batches of augmented image data
train_generator = train_datagen.flow_from_directory(
        DIR_TRAIN,  # this is the target directory
        target_size=(img_dims[0], img_dims[1]),  # all images will be resized to 150x150
        batch_size=batch_size,
        class_mode='categorical') 

# this is a similar generator, for validation data
validation_generator = test_datagen.flow_from_directory(
        DIR_VALIDATE,
        target_size=(img_dims[0], img_dims[1]),
        batch_size=batch_size,
        class_mode='categorical')

Found 7155 images belonging to 120 classes.
Found 3067 images belonging to 120 classes.


## checking input size
```python
print("#img:",train_generator.samples)
print("#steps per epochs:",train_generator.samples// batch_size)
```


## Using Preprocessing for model

```python
from tensorflow.keras.applications.xception import preprocess_input


train_datagen = ImageDataGenerator(
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode = "nearest",
    preprocessing_function = preprocess_input
)


test_datagen = ImageDataGenerator(
    preprocessing_function = preprocess_input
)


train_generator = train_datagen.flow_from_directory(
        DIR_TRAIN,
        target_size=(img_dims[0], img_dims[1]), 
        batch_size=batch_size,
        class_mode='categorical') 

# this is a similar generator, for validation data
validation_generator = test_datagen.flow_from_directory(
        DIR_VALIDATE,
        target_size=(img_dims[0], img_dims[1]),
        batch_size=batch_size,
        class_mode='categorical')
```

In [10]:
from tensorflow.keras.applications import Xception

model_name = "Xception_s{}x{}_bs{}_e{}_w{}".format(img_dims[0], img_dims[1], batch_size, epochs, "Keep" if keep_pretrained else "NoKeep")

# retrieve base model
base = Xception(include_top=False, weights='imagenet', input_shape=img_dims)

# freeze pre-trained weight
if (keep_pretrained):
    base.trainable = False

# rebuild output layer
x = base.output
x = GlobalAveragePooling2D()(x)
head = Dense(120, activation='softmax')(x)
model = Model(inputs=base.input, outputs=head)

# Compiling the model
model.compile(optimizer=Adam(lr=lr), 
              loss = 'categorical_crossentropy', 
              metrics=['accuracy'])

# validate with # trainable/non-trainable weights
trainable_count = np.sum([K.count_params(w) for w in model.trainable_weights])
non_trainable_count = np.sum([K.count_params(w) for w in model.non_trainable_weights])
print("# trainable\t: {}".format(trainable_count))
print("# non-trainable\t: {}".format(non_trainable_count))

# start training models
H=model.fit(
        train_generator,
        steps_per_epoch=train_generator.samples// batch_size,
        epochs=epochs,
        validation_data=validation_generator,
        validation_steps=validation_generator.samples // batch_size
)

# plot model performance
plot_model_history(H, saving_name="{}".format(model_name))

# trainable	: 21052832
# non-trainable	: 54528
