In [None]:
import os
import glob
import zipfile
import seaborn as sb
import numpy as np # linear algebra
import pandas as pd # data processing 
from PIL import Image
from keras.preprocessing.image import ImageDataGenerator, load_img
from tensorflow.keras.callbacks import EarlyStopping,ModelCheckpoint
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from keras.models import Model,Sequential
from keras.layers import Conv2D, GlobalAveragePooling2D, Dropout, Flatten, Dense, Activation,GlobalMaxPooling2D
from tensorflow.keras.applications.resnet50 import ResNet50,preprocess_input
from sklearn.metrics import confusion_matrix,classification_report
from keras.optimizers import Adam


for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


In [None]:
#Unzip the files 
zip_file = glob.glob('/kaggle/input/dogs-vs-cats/*.zip')  #return any files with .zip extension
for file in zip_file:
    #extract file into a temp folder
    with zipfile.ZipFile(file,"r") as zip_ref:
        zip_ref.extractall("datasets")
        

In [None]:
  def prepare_data(dir_path):
    filenames = os.listdir(dir_path)
    labels = []
    for filename in filenames:
        label = filename.split('.')[0]
        if label == 'dog':
            labels.append(1)
        else:
            labels.append(0)
    return filenames,labels

#prepare data 
train_filenames,train_labels = prepare_data('datasets/train') 
test_filenames,test_labels = prepare_data('datasets/test1') 

df_train = pd.DataFrame({'filename':train_filenames,'class': train_labels})
df_test = pd.DataFrame({'filename': test_filenames,'class': test_labels})

In [None]:
df_train.head()

In [None]:
df_test.head()

In [None]:
df_train.shape, df_test.shape

In [None]:
sb.countplot(x='class',data=df_train).set_title("Data Distribution")

In [None]:
# we can see that our dataset is prettey balanced. we will try to reduce the samples
# to 2000 for each class since we will use a pretrained model
cats = df_train[df_train['class']==0][:3000]
dogs = df_train[df_train['class']==1][:3000]
#concat the two dataframes
df = pd.concat([cats,dogs])
#shuffle
df = df.sample(frac=1)
df.shape

In [None]:
DIR_PATH = 'datasets/train/'
#Let's show some image from the dataframe
fig, axis = plt.subplots(3, 3, figsize=(20, 20))
for i, ax in enumerate(axis.flat):
    img = load_img(DIR_PATH+df['filename'].iloc[i]) 
    ax.imshow(img)
    ax.axis('off')

In [None]:
#set some parameters 
image_shape = (224,224)
epochs = 5
batch_size = 32
# to avoid type errors we cast df columns to string opjects
df['filename'] = df['filename'].astype('str')
df['class'] = df['class'].astype('str')
#split data ibto train and validation set
train, validation = train_test_split(df, test_size=0.2)
#reset indexes 
train = train.reset_index()
validation = validation.reset_index()

#data augmenetation 
train_gen = ImageDataGenerator(
    rotation_range=15,
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    width_shift_range=0.1,
    height_shift_range=0.1)

#train generator 
train_generator = train_gen.flow_from_dataframe(
    train, 
    directory = DIR_PATH,
    x_col='filename',
    y_col="class",
    class_mode="categorical",
    target_size=image_shape,
    batch_size=batch_size)
#validation generator 
validation_gen = ImageDataGenerator(
    rescale=1./255)

val_generator = validation_gen.flow_from_dataframe(
    validation,  
    directory = DIR_PATH,
    x_col='filename',
    y_col="class",
    class_mode="categorical",
    target_size=image_shape,
    batch_size=batch_size)

In [None]:
sample_image= train.sample(n=1).reset_index(drop=True)
sample_generator = train_gen.flow_from_dataframe(
    sample_image,
    directory = DIR_PATH,
    x_col='filename',
    y_col='class')
plt.figure(figsize=(12, 12))
for i in range(0, 9):
    plt.subplot(3, 3, i+1)
    for X_batch, Y_batch in sample_generator:
        image = X_batch[0]
        plt.imshow(image)
        break
plt.tight_layout()
plt.show()

In [None]:
#build the Resnet model 
resnet = ResNet50(weights='imagenet',
                      input_shape= (224,224,3),
                      include_top= False)  


#show the base model summary 
resnet.summary()
#show how manay layers in the Resnet Network
layers = resnet.layers
print(f'Number of Layers: {len(layers)} ')

In [None]:
# number of samples for each set 
size_train = train.shape[0]
size_val = validation.shape[0]
# early stopping 
callbacks = EarlyStopping(patience = 3, monitor='val_acc')
                        
# let's train our Model 
inputs = resnet.input
# add an average pooling layer
x = resnet.output
x = GlobalAveragePooling2D()(x)
#first dense layer
x = Dense(512, activation='relu')(x)
#dropout 
x = Dropout(0.5)(x)
# output layer
outputs = Dense(2, activation ='softmax')(x)
# this is the model we will train
model = Model(inputs=inputs, outputs=outputs)

# freeze all convolutional Resnet layers
for layer in layers:
    layer.trainable = False
# compile the model 
model.compile(optimizer=Adam(lr=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])
# train the model on the new data for a few epochs
history = model.fit_generator(
    train_generator,
    epochs= epochs,
    validation_data = val_generator,
    validation_steps = size_val//batch_size,
    steps_per_epoch = size_train//batch_size,
    callbacks = [callbacks])

In [None]:
# Plot training loss vs validation loss
sb.set_style('whitegrid')
plt.figure()
fig,(ax1, ax2)=plt.subplots(1,2,figsize=(19,8))
ax1.plot(history.history['loss'])
ax1.plot(history.history['val_loss'])
ax1.legend(['Training','Validation'])
ax1.set_title('Loss')
ax1.set_xlabel('#epochs')
## plot training accuracy vs validation accuracy 
ax2.plot(history.history['accuracy'])
ax2.plot(history.history['val_accuracy'])
ax2.legend(['Training','Validation'])
ax2.set_title('Acurracy')
ax2.set_xlabel('#epochs')

In [None]:
#Evaluate
loss, acc = model.evaluate_generator(val_generator)
print("Validation Accuracy = %f \nValidation Loss = %f " % (acc, loss))

In [None]:
#save the weights of the model to hdf5 file 
model.save_weights("resnet_model.h5")
print("Model Saved.")

In [None]:
labels = validation['class'].astype('int32')
predict =  model.predict_generator(val_generator)
predictions = np.where(predict > 0.5, 1,0)
predictions = np.argmax(predictions, axis=1)
#show the confusion matrix 
conf_matrix = confusion_matrix(labels, predictions) 
# plot the confusion matrix
fig,ax = plt.subplots(figsize=(8, 8))
sb.heatmap(conf_matrix, annot=True, linewidths=0.01,cmap="Greens",linecolor="gray", fmt= '.1f',ax=ax)
plt.xlabel("Predicted Class")
plt.ylabel("True Class")
plt.title("Confusion Matrix")
plt.show()

To be CONTINUED...