<a href="https://colab.research.google.com/github/maybachar/getpet-recognition/blob/master/train_model_dog.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import numpy as np
import pandas as pd
import cv2
from glob import glob

import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from keras.models import model_from_json

#append to lables list the suitale lable for each image using info fרom csv file
#and add the number of breed instead the name breed(using dictionary).   
def create_lables_list(image_id, lables):
    image_id = image_id.split("/")[-1].split(".")[0]
    # get breed name by image id from labels.csv
    breed_name = list(labels_file[labels_file.id == image_id]["breed"])[0]
    # get the number of this breed and append it to list.
    breed_idx = breed_num_dic[breed_name]
    lables.append(breed_idx)


def build(size, num_classes):
    # the input size the model will get. default size for mobile and 3 is for colors.
    inputs = Input((size, size, 3))
    # create the base pre-trained model.
    base_model = MobileNetV2(input_tensor=inputs, include_top=False,
                           weights="imagenet")
    #set all parameters to be trainable
    base_model.trainable = True
    #get the model so we can add layers to it.
    x = base_model.output
    # add a global spatial average pooling layer
    x = GlobalAveragePooling2D()(x)
    # include a dropout layer to minimize the overfitting.
    x = Dropout(0.2)(x)
    # using activation function RELU- widely used in CNN.
    x = Dense(1024, activation="relu")(x)
    # add softmax layer for getting probabilities on the breeds.
    x = Dense(num_classes, activation="softmax")(x)
    #create the model with it's layers.
    model = tf.keras.Model(inputs, x)
    return model


def fix_data_element(path, y):
    num_class = 120
    size = 224
    path = path.decode()
    # loads a color image from the specified file.
    image = cv2.imread(path, cv2.IMREAD_COLOR)
    # change the width, height of an image.
    image = cv2.resize(image, (size, size))
    # normaliztion.
    image = image / 255.0
    image = image.astype(np.float32)
    # one hot encoding to lable (all zero except for the place y).
    label = [0] * num_class
    label[y] = 1
    label = np.array(label)
    label = label.astype(np.int32)
    return image, label

def tf_parse(x, y):
    num_class = 120
    size = 224
    # wrap numpy function as an operation in TensorFlow function.
    x, y = tf.numpy_function(fix_data_element, [x, y], [tf.float32, tf.int32])
    # resize image to the image size mobilenet expect to get.
    x.set_shape((size, size, 3))
    y.set_shape((num_class))
    return x, y

def tf_dataset(x, y, batch=8):
    # get the slices of  x and y into one dataset.
    dataset = tf.data.Dataset.from_tensor_slices((x, y))
    # execute fix_data function on every element of the Dataset separately.
    dataset = dataset.map(tf_parse)
    # combines consecutive elements of a dataset object into batches.
    dataset = dataset.batch(batch)
    return dataset



if __name__ == "__main__":
    # parameters
    # image size
    size = 224
    num_breeds = 120
    # learning rate 0.0001.
    lr = 0.0001
    batch = 16
    epochs = 5

    # paths of train data folder and csv file with labels of each pic.
    path = "/content/drive/MyDrive/dog_breed_identification/"
    train_path = os.path.join(path, "train/*")
    labels_path = os.path.join(path, "labels.csv")
    # import the CSV file.
    labels_file = pd.read_csv(labels_path)
    # get all unique breeds in file.
    breed = labels_file["breed"].unique()
    # create dictionary of breed and number.
    breed_num_dic = {name: i for i, name in enumerate(breed)}
    # ids of photos
    ids = glob(train_path)
    # list of breed index by order of image id
    lables = []
    # adding the num of breed-the lable of each image to lables list.
    for image_id in ids:
        create_lables_list(image_id, lables)

    # work only with N image id and N lables of the breed id which suitable to the images.
    ids = ids[:5000]
    lables = lables[:5000]

    # spliting data to train and validation.
    train_x, validation_x = train_test_split(ids, test_size=0.2, random_state=42)
    train_y, validation_y = train_test_split(lables, test_size=0.2, random_state=42)

    #create a CNN model with it's layers.
    model = build(size, num_breeds)
    #Configures the model for training. optimizer- adam.
    model.compile(loss="categorical_crossentropy", optimizer=Adam(lr), metrics=["acc"])
    # dataset
    trainSet = tf_dataset(train_x, train_y, batch=batch)
    validationSet = tf_dataset(validation_x, validation_y, batch=batch)

    # train
    #set of functions to be applied at training procedure:
    # modelCheckPoint saves the model after every epoch.
    # Reduce learning rate when a metric has stopped improving.
    callbacks = [
        ModelCheckpoint("model.h5", verbose=1, save_best_only=True),
        ReduceLROnPlateau(factor=0.1, patience=2, min_lr=0.000001)
    ]
    # Trains the model for number of epochs. evaluate the loss at the end of each epoch.
    model.fit(trainSet, validation_data=validationSet, epochs=epochs, callbacks=callbacks)
    # save the model and it's weights in H5 format to jeson file.
    model_json = model.to_json()
    with open("/content/drive/MyDrive/dog_breed_identification/model.json", "w") as json_file:
        json_file.write(model_json)
        model.save_weights("/content/drive/MyDrive/dog_breed_identification/model.h5")
        print("Saved model to disk")



    

[1, 26, 7, 118, 87, 3, 32, 83, 50, 17, 94, 12, 103, 17, 25, 31, 9, 76, 105, 41, 73, 118, 52, 111, 24, 71, 107, 5, 57, 58, 74, 73, 51, 110, 111, 14, 88, 108, 117, 116, 51, 27, 23, 9, 65, 50, 1, 45, 37, 36, 94, 31, 104, 0, 80, 16, 18, 25, 7, 67, 70, 41, 78, 21, 60, 66, 17, 17, 77, 50, 63, 66, 105, 58, 73, 0, 93, 76, 52, 95, 9, 110, 35, 70, 50, 43, 71, 38, 95, 12, 34, 109, 17, 7, 9, 65, 69, 38, 105, 22, 46, 112, 81, 102, 82, 38, 31, 83, 21, 30, 27, 33, 67, 79, 46, 114, 69, 102, 34, 55, 11, 91, 13, 37, 52, 92, 60, 91, 54, 6, 64, 106, 55, 27, 74, 94, 117, 109, 85, 11, 6, 3, 73, 43, 82, 110, 76, 6, 25, 55, 111, 117, 63, 113, 95, 56, 46, 88, 90, 18, 4, 8, 104, 37, 64, 113, 13, 74, 48, 118, 97, 11, 80, 117, 75, 44, 67, 77, 60, 3, 0, 103, 51, 90, 15, 11, 63, 63, 50, 71, 72, 14, 8, 87, 32, 48, 109, 48, 113, 93, 42, 26, 39, 66, 25, 15, 77, 58, 27, 64, 68, 87, 63, 63, 97, 89, 14, 11, 110, 111, 96, 109, 40, 58, 27, 52, 39, 45, 70, 64, 77, 77, 75, 73, 114, 106, 51, 50, 93, 44, 114, 32, 8, 103, 18, 2



Epoch 2/5

Epoch 00002: val_loss did not improve from 5.01230
Epoch 3/5

Epoch 00003: val_loss did not improve from 5.01230
Epoch 4/5

Epoch 00004: val_loss did not improve from 5.01230
Epoch 5/5

Epoch 00005: val_loss did not improve from 5.01230
Saved model to disk
