# Computer Vision - Image Classification example

###### Links: [Dog Breed (Kaggle)](https://www.kaggle.com/competitions/dog-breed-identification/overview)  |  [Article ()]()

### 0 - Setup

###### Import pckgs

In [1]:
import warnings
warnings.filterwarnings("ignore")

## for data
import os
import cv2
import pandas as pd
import numpy as np
from tqdm.notebook  import tqdm

## for plotting
import matplotlib.pyplot as plt
import seaborn as sns

## for metrics
from sklearn import metrics

## for cnn
from tensorflow.keras import models, layers, utils, callbacks #(2.6.0)

## for vit
import transformers

ModuleNotFoundError: No module named 'cv2'

###### Import data

In [None]:
labels = ["scottish_deerhound", "maltese_dog", "afghan_hound", "entlebucher", "bernese_mountain_dog"]

dtf = pd.read_csv("dogs_labels.csv").rename(columns={"breed":"label"})
dtf = dtf[dtf["label"].isin(labels)].sort_values("id").reset_index(drop=True)

dtf["y"] = dtf["label"].factorize(sort=True)[0]
dic_y_mapping = dict( dtf[['y','label']].drop_duplicates().sort_values('y').values )
print(dic_y_mapping)
dtf

In [None]:
#dirpath = "data_dogs"
#for file in tqdm(os.listdir(dirpath)):
#    filename = os.path.splitext(file)[0]
#    if filename not in dtf["id"].values:
#        os.remove(os.path.join(dirpath, file))

In [None]:
'''
Load a single image with opencv.
'''
def utils_load_img(file, ext=['.png','.jpg','.jpeg','.JPG']):
    if file.endswith(tuple(ext)):
        img = cv2.imread(file, cv2.IMREAD_UNCHANGED)
        if len(img.shape) > 2:
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        return img
    else:
        print("file extension unknown")

In [None]:
'''
Plot a single image with pyplot.
:parameter
    :param img: image array
    :param mask: image array
    :param rect: list of tuples - [(x1,y1), (x2,y2)]
    :param title: string
'''
def utils_plot_img(img, mask=None, rect=None, title=None, figsize=(5,3)):
    plot_img = img.copy()
    if mask is not None:
        mask = cv2.resize(mask, (img.shape[0],img.shape[1]), interpolation=cv2.INTER_LINEAR)
        plot_img = cv2.bitwise_and(plot_img, mask)
    if rect is not None:
        plot_img = cv2.rectangle(plot_img, rect[0], rect[1], (255,0,0), 4)
    fig, ax = plt.subplots(figsize=figsize)
    fig.suptitle(title, fontsize=20)
    if len(img.shape) > 2:
        plt.imshow(plot_img)
    else:
        plt.imshow(plot_img, cmap=plt.cm.binary)

In [None]:
'''
Plot n images in (1 row) x (n columns).
'''
def plot_imgs(lst_imgs, lst_titles=[], figsize=(20,13)):
    fig, ax = plt.subplots(nrows=1, ncols=len(lst_imgs), sharex=False, sharey=False, figsize=figsize)
    if len(lst_titles) == 1:
        fig.suptitle(lst_titles[0], fontsize=20)
    for i,img in enumerate(lst_imgs):
        ax[i].imshow(img)
        if len(lst_titles) > 1:
            ax[i].set(title=lst_titles[i])
    plt.show()

In [None]:
# try one
img = utils_load_img(file="data_dogs/0042188c895a2f14ef64a918ed9c7b64.jpg")
utils_plot_img(img, title="shape: "+str(img.shape))

In [None]:
# load all
dirpath = "data_dogs"
ext=['.png','.jpg','.jpeg','.JPG']

lst_imgs = []
errors = 0
for file in tqdm(sorted(os.listdir(dirpath))):
    try:
        if file.endswith(tuple(ext)):
            img = utils_load_img(file=os.path.join(dirpath, file), ext=ext)
            lst_imgs.append(img)
    except Exception as e:
        print("failed on:", file, "| error:", e)
        errors += 1
        lst_imgs.append(np.nan)
        pass

dtf["img"] = lst_imgs
dtf = dtf[["id","img","label","y"]]
print("check:", len(lst_imgs), "=", len(dtf), " |  Nas:", errors, "=", dtf["img"].isna().sum())
dtf.head()

In [None]:
plot_imgs(dtf["img"].head(), lst_titles=dtf["label"].head())

### 1 - Data Analysis

###### Target

In [None]:
dtf["y"].value_counts().plot(kind="barh", title="Y", figsize=(5,3)).grid(axis='x')
plt.show()

###### Size

In [None]:
width = [img.shape[0] for img in dtf["img"]]
height = [img.shape[1] for img in dtf["img"]]

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(15,5))

## all
ax[0].scatter(x=width, y=height, color="black")
ax[0].set(xlabel='width', ylabel="height", title="Size distribution")
ax[0].grid()

## zoom
ax[1].scatter(x=width, y=height, color="black")
ax[1].set(xlabel='width', ylabel="height", xlim=[100,700], ylim=[100,700], title="Zoom")
ax[1].grid()

plt.show()

In [None]:
img_size = (500,500)

dtf["img"] = [cv2.resize(img, img_size) for img in dtf["img"]]
plot_imgs(dtf["img"].head(), lst_titles=dtf["y"].head())

###### Color

In [None]:
img_shape = img_size+(3,)
img_shape

### 2 - Preprocessing

###### Scaling

In [None]:
dtf["img"] = dtf["img"]/255

###### Partitioning

In [None]:
dtf_train = dtf.head(500)
dtf_test = dtf.tail(88)
dtf_test.head()

###### Data Augmentation

In [None]:
data_augmentation = keras.Sequential(
    [
        layers.Normalization(),
        layers.Resizing(image_size, image_size),
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(factor=0.02),
        layers.RandomZoom(
            height_factor=0.2, width_factor=0.2
        ),
    ],
    name="data_augmentation",
)
# Compute the mean and the variance of the training data for normalization.
data_augmentation.layers[0].adapt(x_train)

### 3 - Baseline (CNN)

###### Model Design

In [None]:
## Input
x_in = layers.Input(name="x_in", shape=img_shape)

## Conv + MaxPool
x_conv2d = layers.Conv2D(name="x_conv2d", filters=32, kernel_size=(3,3), activation="relu")(x_in)
x_maxpool = layers.MaxPooling2D(name='x_maxpool', pool_size=(2,2))(x_conv2d)

## Conv + MaxPool
x_conv2d2 = layers.Conv2D(name="x_conv2d2", filters=32, kernel_size=(3,3), activation="relu")(x_maxpool)
x_maxpool2 = layers.MaxPooling2D(name='x_maxpool2', pool_size=(2,2))(x_conv2d2)

## Flat + Dense
flat = layers.Flatten(name="flat")(x_maxpool2)
dense = layers.Dense(name="dense", units=128, activation='relu')(flat)

## Output
y_out = layers.Dense(name="y_out", units=dtf_train["y"].nunique(), activation="softmax")(dense) #if binary -> 1 + sigmoid

## Compile
model = models.Model(inputs=x_in, outputs=y_out, name="CNN")
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"]) #if binary -> n + binary_crossentropy
model.summary()

In [None]:
# Sequential
model = models.Sequential(name="CNN", layers=[
    ## Conv + MaxPool
    layers.Conv2D(name="x_conv2d", input_shape=img_size+(3,), filters=32, kernel_size=(3,3), activation="relu"),
    layers.MaxPooling2D(name='x_maxpool', pool_size=(2,2)),
    ## Conv + MaxPool
    layers.Conv2D(name="x_conv2d2", filters=32, kernel_size=(3,3), activation="relu"),
    layers.MaxPooling2D(name='x_maxpool2', pool_size=(2,2)),
    ## Flat + Dense
    layers.Flatten(name="flat"),
    layers.Dense(name="dense", units=128, activation='relu'),
    ## Output
    layers.Dense(name="y_out", units=dtf_train["y"].nunique(), activation="softmax") #if binary -> 1 + sigmoid
])
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"]) #if binary -> n + binary_crossentropy
model.summary()

In [None]:
utils.plot_model(model, to_file='model.png', show_shapes=True, show_layer_names=True)

###### Train / Test

In [None]:
'''
Plot loss and metrics of keras training.
'''
def utils_plot_keras_training(training):
    metrics = [k for k in training.history.keys() if ("loss" not in k) and ("val" not in k)]
    fig, ax = plt.subplots(nrows=1, ncols=2, sharey=True, figsize=(15,3))
    
    ## training
    ax[0].set(title="Training")
    ax11 = ax[0].twinx()
    ax[0].plot(training.history['loss'], "o-", color='black')
    ax[0].set_xlabel('Epochs')
    ax[0].set_ylabel('Loss', color='black')
    for metric in metrics:
        ax11.plot(training.history[metric], "o-", label=metric)
    ax11.set_ylabel("Score", color='steelblue')
    ax11.legend()
    
    ## validation
    ax[1].set(title="Validation")
    ax22 = ax[1].twinx()
    ax[1].plot(training.history['val_loss'], "o-", color='black')
    ax[1].set_xlabel('Epochs')
    ax[1].set_ylabel('Loss', color='black')
    for metric in metrics:
        ax22.plot(training.history['val_'+metric], "o-", label=metric)
    ax22.set_ylabel("Score", color="steelblue")
    plt.show()

In [None]:
# train
training = model.fit(x=np.array([x for x in dtf_train["img"].values]), y=dtf_train["y"].values, 
                     epochs=10, batch_size=64, shuffle=True, verbose=0, validation_split=0.2,
                     callbacks=[callbacks.EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=2)])
model = training.model
utils_plot_keras_training(training)

In [None]:
# test
predicted_prob = model.predict(np.array([x for x in dtf_test["img"].values]))
predicted = [np.argmax(pred) for pred in predicted_prob]

dtf_test["yhat"] = predicted
dtf_test["pred"] = dtf_test["yhat"].apply(lambda x: dic_y_mapping[x])
dtf_test.head()

###### Evaluate

In [None]:
'''
Evaluates a model performance.
:parameter
    :param y_test: array
    :param predicted: array
    :param predicted_prob: array
    :param figsize: tuple - plot setting
'''
def evaluate_multi_classif(y_test, predicted, predicted_prob, figsize=(15,5)):
    classes = np.unique(y_test)
    y_test_array = pd.get_dummies(y_test, drop_first=False).values
    
    ## Accuracy, Precision, Recall
    accuracy = metrics.accuracy_score(y_test, predicted)
    auc = metrics.roc_auc_score(y_test, predicted_prob, multi_class="ovr")
    print("Accuracy:",  round(accuracy,2))
    print("Auc:", round(auc,2))
    print("Detail:")
    print(metrics.classification_report(y_test, predicted))
    
    ## Plot confusion matrix
    cm = metrics.confusion_matrix(y_test, predicted)
    fig, ax = plt.subplots()
    sns.heatmap(cm, annot=True, fmt='d', ax=ax, cmap=plt.cm.Blues, cbar=False)
    ax.set(xlabel="Pred", ylabel="True", xticklabels=classes, yticklabels=classes, title="Confusion matrix")
    plt.yticks(rotation=0)

    fig, ax = plt.subplots(nrows=1, ncols=2, figsize=figsize)
    ## Plot roc
    for i in range(len(classes)):
        fpr, tpr, thresholds = metrics.roc_curve(y_test_array[:,i], predicted_prob[:,i])
        ax[0].plot(fpr, tpr, lw=3, label='{0} (area={1:0.2f})'.format(classes[i], metrics.auc(fpr, tpr)))
    ax[0].plot([0,1], [0,1], color='navy', lw=3, linestyle='--')
    ax[0].set(xlim=[-0.05,1.0], ylim=[0.0,1.05], xlabel='False Positive Rate', 
              ylabel="True Positive Rate (Recall)", title="Receiver operating characteristic")
    ax[0].legend(loc="lower right")
    ax[0].grid(True)
    
    ## Plot precision-recall curve
    for i in range(len(classes)):
        precision, recall, thresholds = metrics.precision_recall_curve(y_test_array[:,i], predicted_prob[:,i])
        ax[1].plot(recall, precision, lw=3, label='{0} (area={1:0.2f})'.format(classes[i], metrics.auc(recall, precision)))
    ax[1].set(xlim=[0.0,1.05], ylim=[0.0,1.05], xlabel='Recall', ylabel="Precision", title="Precision-Recall curve")
    ax[1].legend(loc="best")
    ax[1].grid(True)
    plt.show()

In [None]:
evaluate_multi_classif(dtf_test["y"].values, dtf_test["yhat"].values, predicted_prob, figsize=(15,5))

### 4 - Model Desing & Testing (Transfer Learning)

### 5 - Model Desing & Testing (ViT)

###### Load model

In [None]:
vit = transformers.ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224-in21k')
vit(dtf["img"][0])

In [None]:
from transformers import ViTFeatureExtractor, ViTForImageClassification
from PIL import Image
import requests

url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
image = Image.open(requests.get(url, stream=True).raw)

feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224')
model = ViTForImageClassification('google/vit-base-patch16-224')

inputs = feature_extractor(images=image)
outputs = model(**inputs)
logits = outputs.logits
# model predicts one of the 1000 ImageNet classes
predicted_class_idx = logits.argmax(-1).item()
print("Predicted class:", model.config.id2label[predicted_class_idx])

In [None]:
extractor = transformers.AutoFeatureExtractor.from_pretrained("google/vit-base-patch16-224")

model = transformers.AutoModelForImageClassification.from_pretrained("google/vit-base-patch16-224")

In [None]:
TFViTModel