In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import keras
from IPython.display import clear_output
from tqdm import tqdm
import cv2

from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder
from keras.preprocessing.image import load_img, img_to_array
from keras.applications import imagenet_utils
from keras.applications.vgg16 import VGG16
from keras.applications.vgg19 import VGG19
from keras.applications.resnet50 import ResNet50
from keras.applications.inception_v3 import InceptionV3
from keras.applications.inception_resnet_v2 import InceptionResNetV2
from keras.applications.xception import Xception

Using TensorFlow backend.
  return f(*args, **kwds)


In [2]:
# Read the labels csv
labels_df = pd.read_csv('dataset/labels.csv')
labels_df = shuffle(labels_df, random_state=1)

# Find the number of classes
num_classes = len(labels_df.loc[:,'breed'].unique())

# Encode the breed labels
le = LabelEncoder()
y = le.fit_transform(list(labels_df.breed))

In [3]:
#Initialize the train x and y
x = []

#Define the image size (for resizing)
im_size = 256

# Read the images and labels and append to the train lists
for img_id in tqdm(list(labels_df.id)):
    # Load the image and resize it
    img = load_img('dataset/train/{}.jpg'.format(img_id), target_size=(im_size, im_size))
    #Convert the image to a numpy array
    img = img_to_array(img)
    # Expand dimensions so that the images can be stacked together
    img = np.expand_dims(img, axis=0)
    # Mean (RGB) subtraction preprocessing
    img = imagenet_utils.preprocess_input(img)
    # Add the image to the training set
    x.append(img)

100%|██████████| 10222/10222 [00:35<00:00, 285.86it/s]


In [4]:
# Vertically stack the images from [N x (256,256,3)] to (N, 256, 256, 3)
x_arr = np.vstack(x)

In [5]:
x_arr.shape

(10222, 256, 256, 3)

In [6]:
# Call the pre-trained model without the top layer(classes) and use the predictions as features
model = VGG16(weights='imagenet', include_top=False)
features = model.predict(x_arr)

In [7]:
features.shape

(10222, 8, 8, 512)

In [8]:
# Reshape the features to (N, 8x8x512) instead of (N, 8, 8, 512)
new_shape = features.shape[1]*features.shape[2]*features.shape[3]
features = features.reshape((features.shape[0], new_shape))

In [9]:
features.shape

(10222, 32768)

In [10]:
# Use the train_test_split function or split the dataset manually, depending on the size
# You might run into memory error with the function
#x_train, x_test, y_train, y_test = train_test_split(features, y, test_size=0.3, random_state=1)

In [20]:
# Define the train-test split mark
i = int(features.shape[0]*0.7)

# Create the train and test set
x_train = features[:i]
y_train = y[:i]
x_test = features[i:]
y_test = y[i:]

In [15]:
#
params = {"C": [0.1]}
lr = LogisticRegression(multi_class='multinomial', solver='lbfgs')
model = GridSearchCV(lr, params, cv=3)
model.fit(x_train, y_train)

GridSearchCV(cv=3, error_score='raise',
       estimator=LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='multinomial',
          n_jobs=1, penalty='l2', random_state=None, solver='lbfgs',
          tol=0.0001, verbose=0, warm_start=False),
       fit_params=None, iid=True, n_jobs=1, param_grid={'C': [0.1]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=0)

In [21]:
# Evalaute the model
preds = model.predict(x_test)
print(classification_report(y_test, preds, target_names=le.classes_))

                                precision    recall  f1-score   support

                 affenpinscher       0.82      0.67      0.73        27
                  afghan_hound       0.78      0.86      0.82        29
           african_hunting_dog       0.91      0.91      0.91        34
                      airedale       0.70      0.76      0.73        25
american_staffordshire_terrier       0.48      0.48      0.48        25
                   appenzeller       0.45      0.48      0.47        21
            australian_terrier       0.73      0.53      0.62        30
                       basenji       0.85      0.79      0.81        28
                        basset       0.76      0.50      0.60        32
                        beagle       0.50      0.62      0.55        29
            bedlington_terrier       0.91      1.00      0.95        21
          bernese_mountain_dog       0.80      0.82      0.81        34
       black-and-tan_coonhound       0.79      0.85      0.81  

In [18]:
y_test.shape

(7155,)

In [19]:
preds.shape

(3067,)

In [22]:
params = {"C": [1]}
lr = LogisticRegression(multi_class='multinomial', solver='lbfgs')
model = GridSearchCV(lr, params, cv=3)
model.fit(x_train, y_train)
# Evalaute the model
preds = model.predict(x_test)
print(classification_report(y_test, preds, target_names=le.classes_))

                                precision    recall  f1-score   support

                 affenpinscher       0.77      0.63      0.69        27
                  afghan_hound       0.81      0.86      0.83        29
           african_hunting_dog       0.91      0.94      0.93        34
                      airedale       0.69      0.72      0.71        25
american_staffordshire_terrier       0.48      0.44      0.46        25
                   appenzeller       0.38      0.52      0.44        21
            australian_terrier       0.74      0.57      0.64        30
                       basenji       0.87      0.71      0.78        28
                        basset       0.72      0.56      0.63        32
                        beagle       0.49      0.59      0.53        29
            bedlington_terrier       0.81      1.00      0.89        21
          bernese_mountain_dog       0.77      0.79      0.78        34
       black-and-tan_coonhound       0.72      0.81      0.76  

In [24]:
from sklearn.svm import LinearSVC
model = LinearSVC()
model.fit(x_train, y_train)
# Evalaute the model
preds = model.predict(x_test)
print(classification_report(y_test, preds, target_names=le.classes_))

                                precision    recall  f1-score   support

                 affenpinscher       0.68      0.70      0.69        27
                  afghan_hound       0.81      0.86      0.83        29
           african_hunting_dog       0.89      0.91      0.90        34
                      airedale       0.53      0.72      0.61        25
american_staffordshire_terrier       0.47      0.36      0.41        25
                   appenzeller       0.50      0.33      0.40        21
            australian_terrier       0.67      0.47      0.55        30
                       basenji       0.64      0.57      0.60        28
                        basset       0.75      0.47      0.58        32
                        beagle       0.42      0.62      0.50        29
            bedlington_terrier       0.72      1.00      0.84        21
          bernese_mountain_dog       0.63      0.85      0.72        34
       black-and-tan_coonhound       0.61      0.73      0.67  

In [25]:
from sklearn.linear_model import SGDClassifier
model = SGDClassifier()
model.fit(x_train, y_train)
# Evalaute the model
preds = model.predict(x_test)
print(classification_report(y_test, preds, target_names=le.classes_))



                                precision    recall  f1-score   support

                 affenpinscher       0.67      0.67      0.67        27
                  afghan_hound       0.75      0.83      0.79        29
           african_hunting_dog       0.83      0.88      0.86        34
                      airedale       0.44      0.64      0.52        25
american_staffordshire_terrier       0.26      0.40      0.31        25
                   appenzeller       0.35      0.38      0.36        21
            australian_terrier       0.58      0.37      0.45        30
                       basenji       0.37      0.50      0.42        28
                        basset       0.62      0.41      0.49        32
                        beagle       0.41      0.59      0.49        29
            bedlington_terrier       0.95      0.86      0.90        21
          bernese_mountain_dog       0.76      0.74      0.75        34
       black-and-tan_coonhound       0.61      0.42      0.50  