 # Animal face classification

 This is a simple prototype to predict animal faces.

In [1]:
from zipfile import ZipFile
import pickle
import numpy as np
from deepfeatx.image import ImageFeatureExtractor as imgExt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import gradio as gd


 ## Unzipping file
 It assumes that the file __data.zip__ is present in the same folder.

 This file comes from [this](https://www.kaggle.com/andrewmvd/animal-faces) database.

In [2]:
with ZipFile('data.zip', 'r') as zipped:
    zipped.extractall()


 ## Loading training data
 Images are loaded from disk and converted to feature vector using a pre-trained convolutional neural network.

 Along the feature vector, image label is also returned, as well as the image path.

In [3]:
training = imgExt().extract_features_from_directory('data/train', export_class_names=True)

Found 14630 images belonging to 3 classes.


In [4]:
training.head()


Unnamed: 0,filepaths,classes,0,1,2,3,4,5,6,7,...,2038,2039,2040,2041,2042,2043,2044,2045,2046,2047
0,data/training\cat\flickr_cat_000002.jpg,cat,0.017109,0.152434,0.0,0.193805,0.524241,0.071501,0.123126,0.10783,...,0.0,0.053416,0.304579,0.661124,0.0,0.07044,1.919095,0.047214,0.091598,0.109597
1,data/training\cat\flickr_cat_000003.jpg,cat,0.359051,0.66902,0.002149,0.285272,0.651593,0.684492,0.818901,0.06035,...,0.400328,0.096731,0.0,0.837292,0.041907,0.005843,1.734416,0.49665,0.133781,0.096114
2,data/training\cat\flickr_cat_000004.jpg,cat,0.294523,0.854209,0.297969,0.272979,0.575808,0.554063,0.473147,0.051065,...,0.067802,0.0,0.342157,0.62331,0.197095,0.0,1.469772,0.404472,0.165518,0.409393
3,data/training\cat\flickr_cat_000005.jpg,cat,0.265239,0.638123,0.046134,0.166363,0.240085,0.141123,0.50479,0.037237,...,0.091863,0.0,0.14526,0.087608,0.077971,0.0,1.520948,0.168698,0.340189,0.257377
4,data/training\cat\flickr_cat_000006.jpg,cat,0.534863,0.686893,0.118877,0.201426,0.814117,0.295828,0.731437,0.447083,...,0.20505,0.237526,0.04188,1.111808,0.018326,0.0,1.3715,0.125551,0.249915,0.057478


 ## Preprocessing
 Here the training set is divided in training and validation. Furthermore, X and Y arrays are separated.

 Train and validation sets have the same sample proportion for each label.

In [5]:
x = training.drop(columns=['filepaths', 'classes'])
y = training['classes']
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.2, random_state=42, stratify=y)


In [6]:
print(f'x_train shape: {x_train.shape}; y_train shape: {y_train.shape}')
print(f'x_val shape: {x_val.shape}; y_val shape: {y_val.shape}')


x_train shape: (11704, 2048); y_train shape: (11704,)
x_val shape: (2926, 2048); y_val shape: (2926,)


In [7]:
y_train.value_counts(normalize=True)


cat     0.352187
wild    0.323906
dog     0.323906
Name: classes, dtype: float64

In [8]:
y_val.value_counts(normalize=True)


cat     0.352358
dog     0.323992
wild    0.323650
Name: classes, dtype: float64

 ## Training a model
 A logistic regression model is trained to make the predictions later.

In [9]:
logisticRegression = LogisticRegression(n_jobs=-1)
logisticRegression.fit(x_train, y_train)
print(f'train accuracy: {logisticRegression.score(x_train, y_train)}')
print(f'validation accuracy: {logisticRegression.score(x_val, y_val)}')


train accuracy: 1.0
validation accuracy: 0.9982911825017088


 ## Testing the model
Here, new data is loaded to test the model.

In [10]:
testing = imgExt().extract_features_from_directory('data/val', export_class_names=True, batch_size=64)
x_test = testing.drop(columns=['filepaths', 'classes'])
y_test = testing['classes']


Found 1500 images belonging to 3 classes.


In [14]:
print(f'test accuracy: {logisticRegression.score(x_test, y_test)}')


test accuracy: 0.9986666666666667


 ## Storing the model
 At the end, the model is stored to be used in whatever application.

In [12]:
with open('model.pkl', 'wb') as model:
    pickle.dump(logisticRegression, model)


 ## Live demo
 Here there's a simple interface to submit new images and see the results.

In [13]:
with open('model.pkl', 'rb') as f:
    model = pickle.load(f)

def classify(img):
    features = imgExt().img_to_vector(img)
    probs = model.predict_proba(features)[0]
    idx = np.argmax(probs)
    pred = model.classes_[idx]
    prob = probs[idx] * 100.0
    return f'class: {pred}; confidence: {prob:.2f}'

gd_input = gd.inputs.Image(type='pil')
gd.Interface(fn=classify, inputs=gd_input, outputs='text').launch()


Running locally at: http://127.0.0.1:7860/
To create a public link, set `share=True` in `launch()`.
Interface loading below...


(<Flask 'gradio.networking'>, 'http://127.0.0.1:7860/', None)