In [None]:
import tensorflow as tf
import tensorflow_hub as hub
import pandas as pd

df_dog = pd.read_csv("assets\labels.csv")
df_dog.info()

In [None]:
df_dog.describe()

In [None]:
import os

len(os.listdir("assets\\train\\"))

In [None]:
df_dog["breed"].value_counts().median(), df_dog["breed"].value_counts()[:20].plot(kind="bar", figsize=(10,4), xlabel="")

In [None]:
filenames = ["assets\\train\\" + fname + ".jpg" for fname in df_dog["id"]]

In [None]:
import numpy as np

labels = np.array(df_dog["breed"])
len(labels) == len(df_dog["breed"]) 

In [None]:
from IPython.display import Image

Image(filenames[2])

In [None]:
unique_breeds = np.unique(labels)
labels_bool = [label == unique_breeds for label in labels]

In [None]:
X = filenames
y = labels_bool

IMAGES_NUM = 1000
IMG_SIZE = 224

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(X[:IMAGES_NUM], y[:IMAGES_NUM], test_size=0.2, random_state=42)

In [None]:
from matplotlib.pyplot import imread

imread(filenames[1]).shape

In [None]:
tf.constant(filenames[1]) 

In [None]:
def process_image(image_path):
  image = tf.io.read_file(image_path) 
  image = tf.image.decode_jpeg(image, channels=3) 
  image = tf.image.convert_image_dtype(image, tf.float32) 
  image = tf.image.resize(image, size=[IMG_SIZE, IMG_SIZE])
  return image

In [None]:
def get_img_label(path, label):
    img = process_image(path)
    return img, label

In [None]:
BATCH_SIZE = 32

def create_batches(X, y=None, batch_size=BATCH_SIZE, validation_data=False, test_data=False):
    if test_data:
        data = tf.data.Dataset.from_tensor_slices((tf.constant(X))) 
        data_batch = data.map(process_image).batch(batch_size=BATCH_SIZE) 
        return data_batch        
    elif validation_data:
        data = tf.data.Dataset.from_tensor_slices((tf.constant(X), tf.constant(y)))
        data_batch = data.map(get_img_label).batch(batch_size=BATCH_SIZE)
        return data_batch
    else:
        data = tf.data.Dataset.from_tensor_slices((tf.constant(X), tf.constant(y)))
        data = data.shuffle(buffer_size=len(X)) 
        data = data.map(get_img_label) 
        data_batch = data.batch(batch_size=BATCH_SIZE)
        return data_batch

In [None]:
training_data = create_batches(X_train, y_train)
validation_data = create_batches(X_val, y_val, validation_data=True)

In [None]:
training_data.element_spec, validation_data.element_spec 

In [None]:
import matplotlib.pyplot as plt

def show_images(images, labels):
    plt.figure(figsize=(8,10))
    for i in range(25):
        ax = plt.subplot(5, 5, i+1) 
        ax.tick_params(axis='both', labelsize=0)
        plt.imshow(images[i])
        plt.title(unique_breeds[labels[i].argmax()], fontsize=8)


In [None]:
train_images, train_labels = next(training_data.as_numpy_iterator()) 

show_images(train_images, train_labels)

In [None]:
val_images, val_labels = next(validation_data.as_numpy_iterator())

show_images(val_images, val_labels)

In [None]:
INPUT_SHAPE = [None, IMG_SIZE, IMG_SIZE, 3] 

OUTPUT_SHAPE = len(unique_breeds) 

MODEL_URL = "https://www.kaggle.com/models/google/mobilenet-v2/TensorFlow2/130-224-classification/1"

In [None]:
import tf_keras as tfk

def create_model(input_shape=INPUT_SHAPE, output_shape=OUTPUT_SHAPE, model_url=MODEL_URL):
    model = tfk.Sequential([
        hub.KerasLayer(MODEL_URL), 
        tfk.layers.Dense(units=OUTPUT_SHAPE, activation="softmax") 
    ])
    
    model.compile(
        loss=tfk.losses.CategoricalCrossentropy(), 
        optimizer=tfk.optimizers.Adam(), 
        metrics=["accuracy"] 
    )

    model.build([None, 224, 224, 3]) 

    return model

In [None]:
model = create_model()
model.summary()

In [None]:
%load_ext tensorboard

In [None]:
import datetime

def create_tensorboard_cb():
    log_dir = os.path.join('assets\\logs', datetime.datetime.now().strftime('%Y%m%d-%H%M%S')) 
    return tfk.callbacks.TensorBoard(log_dir)

In [None]:
early_stopping = tfk.callbacks.EarlyStopping('val_accuracy', patience=3)

In [None]:
NUM_EPOCHS = 100

In [None]:
def train_model():
    model = create_model()
    tensorboard_cb = create_tensorboard_cb()
    model.fit(x=training_data, epochs=NUM_EPOCHS, validation_data=validation_data, validation_freq=1, callbacks=[tensorboard_cb, early_stopping]) # validation_freq= checks validation metrics every epoch.
    return model

In [None]:
model = train_model()

In [None]:
%tensorboard --logdir assets/logs

In [None]:
predictions = model.predict(validation_data, verbose=1)
predictions 

In [None]:
index = 42
print(np.max(predictions[index])) 
print(np.sum(predictions[index]))
print(np.argmax(predictions[index])) 
print(unique_breeds[np.argmax(predictions[index])]) 

In [None]:
def get_pred_label(prediction_probilities):
    return unique_breeds[np.argmax(prediction_probilities)]

pred_label = get_pred_label(predictions[0])
pred_label

In [None]:
images_un = []
labels_un = []

for image, label in validation_data.unbatch().as_numpy_iterator():
    images_un.append(image)
    labels_un.append(label)

labels_un[0]

In [None]:
def unbatch_images(dataset):
    images = []
    labels = []
    for image, label in dataset.unbatch().as_numpy_iterator():
        images.append(image)
        labels.append(unique_breeds[np.argmax(label)])
    return images, labels

val_images, val_labels = unbatch_images(validation_data)

In [None]:
def plot_pred(pred_probilities, labels, images, n=1):
    pred_prob, true_label, image = pred_probilities[n], labels[n], images[n]

    pred_label = get_pred_label(pred_prob)

    plt.imshow(image)
    plt.xticks([])
    plt.yticks([])

    if pred_label == true_label:
        color = "green"
    else:
        color = "red"
    plt.title(f"{pred_label} {np.max(pred_prob)*100:2.0f}%", color=color, fontsize=10)

In [None]:
plot_pred(predictions, val_labels, val_images,42)

In [None]:
def plot_pred_top(pred_probilities, labels, n=1):
    pred_prob, true_label = pred_probilities[n], labels[n]
    pred_label = get_pred_label(pred_prob)

    top_10_indexes = pred_prob.argsort()[-10:][::-1]
    top_10_values = pred_prob[top_10_indexes]
    top_10_labels = unique_breeds[top_10_indexes]

    top_plot = plt.bar(np.arange(len(top_10_labels)), top_10_values, color="grey")

    plt.xticks(np.arange(len(top_10_labels)), top_10_labels, rotation="vertical", fontsize=8)

    if np.isin(true_label, top_10_labels):
        top_plot[np.argmax(top_10_labels == true_label)].set_color("green")
    else:
        pass

In [None]:
plot_pred_top(predictions, val_labels, 42)

In [None]:
i_multiply = 10
num_rows = 3
num_cols = 2
num_images = num_rows*num_cols
plt.figure(figsize=(6*num_rows, 6*num_cols))

for i in range(num_images):
    plt.subplot(num_rows, 2*num_cols, 2*i+1)
    plot_pred(predictions, val_labels, val_images, i+i_multiply)
    plt.subplot(num_rows, 2*num_cols, 2*i+2)
    plot_pred_top(predictions, val_labels, i+i_multiply)
plt.tight_layout(h_pad=1)
plt.show()

In [None]:
def save_model(model, suffix=None):
    modeldir = os.path.join('assets\\models', datetime.datetime.now().strftime('%Y%m%d-%H%M%S'))
    model_path = modeldir + "-" + suffix + ".h5"
    model.save(model_path)
    return model_path

In [None]:
def load_model(model_path):
    model = tfk.models.load_model(model_path, custom_objects={"KerasLayer":hub.KerasLayer}) 
    return model

In [None]:
save_model(model, suffix="1000-images-mobilnetv2-Adam")

In [None]:
loaded_model_1000 = load_model("assets\\models\\1000-images-mobilnetv2-Adam.keras")

In [None]:
loaded_model_1000.evaluate(validation_data)

In [None]:
full_data = create_batches(X, y)

In [None]:
full_model = create_model()

In [None]:
full_model_cb = create_tensorboard_cb()
full_model_earlystop = tfk.callbacks.EarlyStopping("accuracy", patience=3)

In [None]:
full_model.fit(full_data, epochs=NUM_EPOCHS, callbacks=[full_model_cb, full_model_earlystop])

In [None]:
save_model(full_model, suffix="full-images-mobilnetv2-Adam")

In [None]:
load_model_full = load_model('assets\\models\\full-images-mobilnetv2-Adam.keras')

In [None]:
test_path = "assets\\test\\"
filenames_test = [test_path + fname for fname in os.listdir(test_path)]

In [None]:
test_data = create_batches(filenames_test, test_data=True)

In [None]:
predictions_test = load_model_full.predict(test_data, verbose=1) 

In [None]:
np.savetxt("assets\\predictions.csv", predictions_test, delimiter=",") 

In [None]:
np.loadtxt("assets\\predictions.csv", delimiter=",") 

In [None]:
df_test = pd.DataFrame(columns=["id"] + list(unique_breeds)) 

df_test["id"] = [os.path.splitext(path)[0] for path in os.listdir(test_path)]
df_test[list(unique_breeds)] = predictions_test

df_test.to_csv("assets\\predictions_df.csv", index=False)

In [None]:
def predict_image(pred_probilities, images, n=0):
    plt.figure(figsize=(6,6))
    plt.subplot
    plt.imshow(plt.imread(images[n]))
    plt.title(f"{unique_breeds[np.argmax(pred_probilities[n])]} {np.max(pred_probilities[n])*100:2.0f}%", 
                fontsize=10, y=1, pad=-14, backgroundcolor="yellow")
    plt.xticks([])
    plt.yticks([]);

In [None]:
predict_image(predictions_test, filenames_test, 3232)

In [None]:
custom_path = "assets\\custom\\"
filenames_custom = [custom_path + fname for fname in os.listdir(custom_path)]
custom_data = create_batches(filenames_custom, test_data=True)
predictions_custom = load_model_full.predict(custom_data, verbose=1)

In [None]:
def predict_image_custom(pred_probilities, images, n=0, folder=False):
    '''
    Folder=True if you want to use all images in the folder, else you can use n=number.
    '''
    if folder:
        plt.figure(figsize=(10,10))
        for n, image in enumerate(images):
            plt.subplot(1, len(images), n+1)
            plt.imshow(plt.imread(image))
            plt.title(f"{unique_breeds[np.argmax(pred_probilities[n])]} {np.max(pred_probilities[n])*100:2.0f}%", 
                        fontsize=8, y=1, pad=-14, backgroundcolor="yellow")
            plt.xticks([])
            plt.yticks([]);
    else:
        plt.figure(figsize=(6,6))
        plt.imshow(plt.imread(images[n]))
        plt.title(f"{unique_breeds[np.argmax(pred_probilities[n])]} {np.max(pred_probilities[n])*100:2.0f}%", 
                    fontsize=10, y=1, pad=-14, backgroundcolor="yellow")
        plt.xticks([])
        plt.yticks([]);

In [None]:
predict_image_custom(predictions_custom, filenames_custom, folder=True)