The objective of the Datathon is to build a Deep Learning model that would allow us to predict animal species from a real-world animal image dataset!

In [None]:
import glob
import pandas as pd
import numpy as np
import cv2
import tensorflow as tf

In [None]:
from google_drive_downloader import GoogleDriveDownloader as gdd

gdd.download_file_from_google_drive(file_id='176E-pLhoxTgWsJ3MeoJQV_GXczIA6g8D',
                                    dest_path='/content/animals.zip',
                                    unzip=True)

Downloading 176E-pLhoxTgWsJ3MeoJQV_GXczIA6g8D into /content/animals.zip... Done.
Unzipping...Done.


In [None]:
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale = 1./255., # rescaling
                                   rotation_range = 40,  # for augmentation
                                   width_shift_range = 0.2,
                                   validation_split = 0.2,
                                   height_shift_range = 0.2,
                                   shear_range = 0.2,
                                   zoom_range = 0.2,
                                   horizontal_flip = True)

val_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale = 1.0/255.,validation_split = 0.2)

train_generator = train_datagen.flow_from_directory("animal_dataset_intermediate/train",
                                                    batch_size = 32,
                                                    subset="training",
                                                    class_mode = 'sparse', 
                                                    target_size = (320, 320))

val_generator = val_datagen.flow_from_directory("animal_dataset_intermediate/train",
                                                subset = "validation",
                                                    batch_size = 32,
                                                    class_mode = 'sparse', 
                                                    target_size = (320, 320))

Found 6558 images belonging to 5 classes.
Found 1638 images belonging to 5 classes.


In [None]:
# define input shape
INPUT_SHAPE = (320, 320, 3)

#1. VGG19
# get the VGG19 model
# vgg_layers = tf.keras.applications.vgg19.VGG19(weights='imagenet', include_top=False, 
#                                                input_shape=INPUT_SHAPE)

#2. inceptionresnetv2
# get the InceptionResNetV2
InceptionResNetV2_layers = tf.keras.applications.InceptionResNetV2(weights='imagenet', include_top=False, 
                                               input_shape=INPUT_SHAPE)

In [None]:
# Fine-tune all the layers
# for layer in vgg_layers.layers:
#     layer.trainable = True

for layer in InceptionResNetV2_layers.layers:
    layer.trainable = True

In [None]:
# define sequential model
model = tf.keras.models.Sequential()

# Add the vgg convolutional base model
#model.add(vgg_layers)
#model.add(ResNet_layers)
model.add(InceptionResNetV2_layers)

# add flatten layer
model.add(tf.keras.layers.Flatten())

# add dense layers with some dropout
model.add(tf.keras.layers.Dense(512, activation='relu'))
model.add(tf.keras.layers.Dropout(rate=0.25))
model.add(tf.keras.layers.Dense(512, activation='relu'))
model.add(tf.keras.layers.Dropout(rate=0.25))

# add output layer
model.add(tf.keras.layers.Dense(5, activation='softmax'))

# compile model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=2e-5), 
              loss='sparse_categorical_crossentropy', 
              metrics=['accuracy'])

# view model layers
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
inception_resnet_v2 (Functio (None, 8, 8, 1536)        54336736  
_________________________________________________________________
flatten (Flatten)            (None, 98304)             0         
_________________________________________________________________
dense (Dense)                (None, 512)               50332160  
_________________________________________________________________
dropout (Dropout)            (None, 512)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 512)               262656    
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 5)                 2

In [None]:
EPOCHS = 100
es_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, 
                                               restore_best_weights=True,
                                               verbose=1)

In [None]:
#create the model and fit the data
history = model.fit(
            train_generator,
            validation_data = val_generator,
            epochs = 100,
            callbacks = [es_callback],
            verbose = 2)

Epoch 1/100
205/205 - 570s - loss: 0.3055 - accuracy: 0.8977 - val_loss: 0.0760 - val_accuracy: 0.9762
Epoch 2/100
205/205 - 560s - loss: 0.1263 - accuracy: 0.9614 - val_loss: 0.0396 - val_accuracy: 0.9878
Epoch 3/100
205/205 - 560s - loss: 0.0865 - accuracy: 0.9712 - val_loss: 0.0353 - val_accuracy: 0.9884
Epoch 4/100
205/205 - 560s - loss: 0.0606 - accuracy: 0.9814 - val_loss: 0.0285 - val_accuracy: 0.9902
Epoch 5/100
205/205 - 560s - loss: 0.0596 - accuracy: 0.9823 - val_loss: 0.0368 - val_accuracy: 0.9884
Epoch 6/100
205/205 - 559s - loss: 0.0474 - accuracy: 0.9841 - val_loss: 0.0419 - val_accuracy: 0.9884
Epoch 7/100
Restoring model weights from the end of the best epoch.
205/205 - 560s - loss: 0.0360 - accuracy: 0.9881 - val_loss: 0.0527 - val_accuracy: 0.9890
Epoch 00007: early stopping


##Predictions

In [None]:
test_data = pd.read_csv("animal_dataset_intermediate/Testing_set_animals.csv") 
len(test_data['filename'])

910

In [None]:
test_arr = np.zeros((len(test_data['filename']),320,320,3))

In [None]:
for i,img in enumerate(test_data['filename']):
  #print(i,glob.glob("animal_dataset_intermediate/test/{}".format(img))[0])
  test_arr[i] = cv2.resize(cv2.imread(glob.glob("animal_dataset_intermediate/test/{}".format(img))[0]),(320,320))
  #if i==5: break

In [None]:
test_arr = test_arr/255.

In [None]:
predictions = model.predict(test_arr)

In [None]:
prediction_labels = np.argmax(predictions, axis=1)
prediction_labels[:50]

array([1, 1, 1, 1, 1, 1, 0, 3, 3, 3, 3, 0, 0, 0, 3, 1, 0, 0, 3, 3, 0, 0,
       3, 3, 3, 3, 0, 1, 1, 1, 3, 3, 3, 3, 3, 0, 3, 3, 3, 1, 1, 3, 1, 3,
       3, 0, 0, 0, 0, 0])

In [None]:
label_dict = {2:'mucca', 3:'pecora', 0:'elefante', 1:'farfalla', 4:'scoiattolo'}

In [None]:
prediction_labels_names = [label_dict[i] for i in prediction_labels]

In [None]:
import sys
np.set_printoptions(threshold=sys.maxsize)
prediction_labels_names

['farfalla',
 'farfalla',
 'farfalla',
 'farfalla',
 'farfalla',
 'farfalla',
 'elefante',
 'pecora',
 'pecora',
 'pecora',
 'pecora',
 'elefante',
 'elefante',
 'elefante',
 'pecora',
 'farfalla',
 'elefante',
 'elefante',
 'pecora',
 'pecora',
 'elefante',
 'elefante',
 'pecora',
 'pecora',
 'pecora',
 'pecora',
 'elefante',
 'farfalla',
 'farfalla',
 'farfalla',
 'pecora',
 'pecora',
 'pecora',
 'pecora',
 'pecora',
 'elefante',
 'pecora',
 'pecora',
 'pecora',
 'farfalla',
 'farfalla',
 'pecora',
 'farfalla',
 'pecora',
 'pecora',
 'elefante',
 'elefante',
 'elefante',
 'elefante',
 'elefante',
 'elefante',
 'farfalla',
 'farfalla',
 'farfalla',
 'pecora',
 'farfalla',
 'pecora',
 'elefante',
 'farfalla',
 'pecora',
 'pecora',
 'farfalla',
 'farfalla',
 'farfalla',
 'pecora',
 'farfalla',
 'pecora',
 'farfalla',
 'farfalla',
 'pecora',
 'elefante',
 'farfalla',
 'pecora',
 'pecora',
 'pecora',
 'farfalla',
 'elefante',
 'elefante',
 'farfalla',
 'farfalla',
 'farfalla',
 'farfalla'