In [None]:
import os
import cv2
import math
import json

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt

In [None]:
train_df = pd.read_csv('../input/petfinder-adoption-prediction/train/train.csv')
test_df = pd.read_csv('../input/petfinder-adoption-prediction/test/test.csv')

In [None]:
print(train_df.shape)
print(test_df.shape)
train_df.head()

In [None]:
# get target variable
name_target_dict = train_df.set_index('PetID')['AdoptionSpeed'].to_dict()
train_image_names = os.listdir('../input/petfinder-adoption-prediction/train_images')
test_image_names = os.listdir('../input/petfinder-adoption-prediction/test_images')

In [None]:
# name_target_dict

In [None]:
# display random saple of the photos
def display_samples(columns=4, rows=4):
    fig=plt.figure(figsize=(5*columns, 3*rows))

    for i in range(columns*rows):
        image_path = train_image_names[i]
        image_id = name_target_dict[image_path.split('-')[0]]
        img = cv2.imread(f'../input/petfinder-adoption-prediction/train_images/{image_path}')
        fig.add_subplot(rows, columns, i+1)
        plt.title(image_id)
        plt.imshow(img)

display_samples()

Each image has its label over it.  


##### DataGenerator from dataframe

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
# create empty dataframe to be used by the generator 
generator_dict = {'filename': [], 'class': []}

for name in train_image_names:
    # get the label of the main image without the numbering part
    short_name = name.split('-')[0]
    label = name_target_dict[short_name]
    
    generator_dict['filename'].append(name)
    generator_dict['class'].append(label)

generator_df = pd.DataFrame(generator_dict)
generator_df['class'] = generator_df['class'].astype(str)  

generator_df.head()

##### Create dategen object

In [None]:
datagen = ImageDataGenerator(
    rotation_range=15,  # randomly rotate images in the range (degrees, 0 to 180)
    width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
    height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
    shear_range=0.1,  # set range for random shear
    zoom_range=0.1,  # set range for random zoom
    horizontal_flip=True,  # randomly flip images
    rescale=1/255., # set rescaling factor
    validation_split=0.1    # fraction of images reserved for validation
)

In [None]:
def create_generator(subset):
    return datagen.flow_from_dataframe(
        generator_df, 
        '../input/petfinder-adoption-prediction/train_images', 
        x_col='filename',
        y_col='class', 
        target_size=(100, 100), 
        color_mode='rgb',
        class_mode='categorical', 
        batch_size=128, 
        shuffle=True, 
        seed=42,
        subset=subset
    )

train_generator = create_generator('training')
val_generator = create_generator('validation')

### Model

In [None]:
import keras
from keras.callbacks import ModelCheckpoint
from tensorflow.keras.applications import DenseNet121

model = DenseNet121(
    weights=None, 
    include_top=True, 
    classes=5,
    input_shape=(100, 100, 3)
)

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

model.summary()

In [None]:
checkpoint = ModelCheckpoint(
    'model.h5', 
    monitor='val_acc', 
    verbose=0, 
    save_best_only=True, 
    save_weights_only=False,
    mode='auto'
)

In [None]:
history = model.fit(
    train_generator,
    steps_per_epoch=75000 / 128, 
    epochs=10,
    callbacks=[checkpoint],
    validation_data=val_generator,
    use_multiprocessing=True,
    workers=2, 
    verbose=1
)

### Evaluation

In [None]:
# model.load_weights('model.h5')

val_scores = model.evaluate_generator(
    generator=val_generator,
    steps=len(val_generator),
    use_multiprocessing=True,
    verbose=1,
    workers=2
)

print('\nValidation loss:', val_scores[0])
print('Validation accuracy:', val_scores[1])

### Submission

In [None]:
sample_submission_df = pd.read_csv('../input/petfinder-adoption-prediction/test/sample_submission.csv')
print(sample_submission_df.shape)
sample_submission_df.head()

In [None]:
# create submission Dictionary like the above one
submission_dict = {'PetID': [], 'filename': []}

for name in test_image_names:
    pet_id = name.split('-')[0]
    submission_dict['PetID'].append(pet_id)
    submission_dict['filename'].append(name)
    
submission_df = pd.DataFrame(submission_dict)
submission_df.head()

In [None]:
# generator for predection
test_generator = ImageDataGenerator(rescale=1/255.).flow_from_dataframe(
    submission_df,
    '../input/petfinder-adoption-prediction/test_images',
    target_size=(100, 100),
    color_mode='rgb',
    batch_size=256,
    shuffle=False,
    class_mode=None # no labels are returned. better for validation and testing
)

In [None]:
test_predictions = model.predict_generator(
    test_generator,
    workers=2, 
    use_multiprocessing=True, 
    verbose=1
)

In [None]:
submission_df = submission_df.join(pd.DataFrame(test_predictions))
submission_df.drop(columns=['filename'], inplace=True)
print(submission_df.shape)
submission_df.head()

In [None]:
#Take the average of the classes probabilities for each pet 
# (since we have multiple pictures of each pet).

submission_df = submission_df.groupby('PetID', as_index=False).mean()
print(submission_df.shape)
submission_df.head()

In [None]:
# Select the class with highest probability to be added to the dataframe, 
# drop the class probabilities.

submission_df['AdoptionSpeed'] = submission_df.iloc[:,1:6].values.argmax(axis=1)
submission_df.drop(columns=[0,1,2,3,4], inplace=True)
print(submission_df.shape)
submission_df.head()

In [None]:
submission_df = sample_submission_df.drop('AdoptionSpeed', axis = 1).merge(submission_df, on = 'PetID', how = 'left')

In [None]:
submission_df.isna().sum()

In [None]:
submission_df = submission_df.fillna(4.0)

In [None]:
submission_df.isna().sum()

In [None]:
submission_df['AdoptionSpeed'] = submission_df['AdoptionSpeed'].astype(int)

In [None]:
submission_df.to_csv('submission.csv',index=False)