In [1]:
import os
from os import listdir
from os.path import isfile, join

In [2]:
from sklearn.metrics import cohen_kappa_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import numpy as np
from keras.models import Model
from keras.layers import Dense, Dropout, Flatten
from keras.preprocessing.image import load_img, img_to_array
from keras.optimizers import Adam
from keras.applications import resnet50
from keras.utils import to_categorical
import matplotlib.pyplot as plt
%matplotlib inline

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [3]:
resnet_model = resnet50.ResNet50(weights='imagenet', include_top=False, input_shape= (64,64,3))

Instructions for updating:
Colocations handled automatically by placer.




In [4]:
parent_dir = './dataset'
sub_dirs = next(os.walk(parent_dir))[1]
print (sub_dirs)

['motorbike', 'aeroplane', 'dog', 'bottle', 'boat', 'pottedplant', 'cow', 'horse', 'person', 'car', 'cat', 'diningtable', 'bus', 'bird', 'bicycle', 'chair']


We have 16 folders containing images from the above mentioned categories.  
Now we will create a dataframe containing category, file_name, and encoding of the catgeory.

In [5]:
files_details = []
for sub_dir in sub_dirs:
    dir_path = join(parent_dir, sub_dir)
    files = [f for f in listdir(dir_path) if isfile(join(dir_path, f))]
    for file in files:
        file_detail = {}
        file_detail['class'] = sub_dir
        file_detail['path'] = join(parent_dir, sub_dir, file)
        if os.path.getsize(file_detail['path'])>0:
            files_details.append(file_detail)
        else:
            pass
        
lbl = LabelEncoder()
df = pd.DataFrame(files_details)
df['class_encoded'] = lbl.fit_transform(df['class'])

In [6]:
df.head()

Unnamed: 0,class,path,class_encoded
0,motorbike,./dataset/motorbike/2008_008246.jpg,13
1,motorbike,./dataset/motorbike/2008_007739.jpg,13
2,motorbike,./dataset/motorbike/2008_002926.jpg,13
3,motorbike,./dataset/motorbike/2008_007485.jpg,13
4,motorbike,./dataset/motorbike/2008_007054.jpg,13


In [7]:
df.shape, df['class'].nunique()

((793, 3), 16)

It is a small dataset. 793 images are present and the number of categories present are 16

In [8]:
df['class'].value_counts()

car            50
motorbike      50
bird           50
aeroplane      50
bicycle        50
chair          50
boat           50
cow            50
dog            50
person         50
cat            50
diningtable    50
bus            50
horse          50
bottle         50
pottedplant    43
Name: class, dtype: int64

The number of images belonging to a category is around 50

In [9]:
# function for getting the image values in the dataframe
def get_np_images(x):
    original_image = load_img(x, target_size=(64, 64))
    numpy_image = img_to_array(original_image)
    #input_image = np.expand_dims(numpy_image, axis=0)
    #processed_image_resnet50 = resnet50.preprocess_input(input_image.copy())
    return numpy_image

In [10]:
df['image'] = df['path'].apply(lambda x : get_np_images(x))

In [11]:
# freeze all the parameters of the resnet model
for layer in resnet_model.layers:
    layer.trainable=False

In [12]:
# add our own layers
x = resnet_model.output
x = Flatten()(x)
x = Dense(256)(x) 
x = Dropout(0.4)(x)
x = Dense(64)(x)
x = Dropout(0.4)(x)
preds = Dense(16, activation='softmax')(x)

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [13]:
model=Model(inputs=resnet_model.input,outputs=preds)
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 64, 64, 3)    0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 70, 70, 3)    0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 32, 32, 64)   9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 32, 32, 64)   256         conv1[0][0]                      
__________________________________________________________________________________________________
activation

In [14]:
# divide the dataframe into training and validation dataframe
df = df.sample(frac=1, random_state=12).reset_index(drop=True)
train, valid = train_test_split(df, test_size=0.2)
train.reset_index(drop = True, inplace = True)
valid.reset_index(drop = True, inplace = True)
train.shape, valid.shape

((634, 4), (159, 4))

In [15]:
X_train, y_train = train['image'], train['class_encoded']
X_test, y_test = valid['image'], valid['class_encoded']

In [16]:
# make the Pandas series into numpy ndarray
X_train = np.array([image for image in X_train])
X_test = np.array([image for image in X_test])

In [17]:
# preprocess the images for feeding into the resnet model
X_train = resnet50.preprocess_input(X_train)
X_test = resnet50.preprocess_input(X_test)

In [18]:
# one hot encode the target variables
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [19]:
opt = Adam(lr=0.001)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

In [20]:
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size = 32)

Instructions for updating:
Use tf.cast instead.
Train on 634 samples, validate on 159 samples
Epoch 1/10

KeyboardInterrupt: 