In [1]:
%matplotlib inline

import pandas as pd
import numpy as np
from glob import glob
import matplotlib.pyplot as plt

from keras.models import Sequential
from keras.preprocessing import image
from keras.layers import Dense, InputLayer, Conv2D, MaxPool2D, Flatten
    
from sklearn.model_selection import train_test_split

Using TensorFlow backend.


In [2]:
# randomness consistancy
seed = 101
rng = np.random.RandomState(seed)

In [3]:
data = pd.read_csv("train.csv")

In [4]:
data.head()

Unnamed: 0,image_names,age_labels
0,16620.jpg,50
1,9507.jpg,36
2,16666.jpg,43
3,2536.jpg,49
4,9189.jpg,26


In [5]:
from keras.preprocessing.image import load_img, img_to_array, array_to_img

In [6]:
# load images
X = list()
for i in data.image_names:
    #img = load_img("images/"+i,target_size=(224,224,3))
    img = load_img("images/"+i,target_size=(64,64,3))
    #img = load_img("images/"+i,target_size=(128,128,3))
    img = img_to_array(img)
    X.append( img)

In [7]:
#Create an array of Images
X= np.array(X)
X.shape

(13911, 64, 64, 3)

#### Preprocess the image
scale the pixels of images to 0 to 1 range

In [8]:
X = X/X.max()

In [9]:
y = data.age_labels
y.shape

(13911,)

#### train validation split

In [10]:
from sklearn.model_selection import train_test_split

In [11]:
x_train,x_valid,y_train,y_valid = train_test_split(X,y,test_size=0.3,random_state=seed)

#### Image Augmentations

In [12]:
from keras.preprocessing.image import ImageDataGenerator

In [13]:
image_augments = ImageDataGenerator(
                    width_shift_range = 0.5,                
                    height_shift_range=0.5,                    
                    shear_range=0.5, 
                    zoom_range=0.5
                 )

#### Model the network

In [14]:
from keras.layers import InputLayer, Dense, Convolution2D, MaxPooling2D

In [23]:
from keras.layers import BatchNormalization, GlobalAveragePooling2D, Dropout

In [24]:
model = Sequential()
#model.add(InputLayer(input_shape=(224,224,3)))
#model.add(InputLayer(input_shape=(64,64,3)))
model.add(InputLayer(input_shape=(64,64,3)))

model.add(Conv2D(filters = 32, kernel_size = (1,1), activation='relu', padding='same'))
model.add(Conv2D(filters = 32, kernel_size = (5,5), activation='relu', padding='same'))
#model.add(Conv2D(filters = 32, kernel_size = (5,5), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=2))
model.add(BatchNormalization())
#model.add(Dropout(0.5))
#model.add(Dropout(0.25))
model.add(Dropout(0.3))

model.add(Conv2D(filters = 64, kernel_size = (3,3), activation='relu', padding='same'))
model.add(Conv2D(filters = 64, kernel_size = (3,3), activation='relu', padding='same'))
#model.add(Conv2D(filters = 64, kernel_size = (3,3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=2))
model.add(BatchNormalization())
#model.add(Dropout(0.5))
model.add(Dropout(0.3))

model.add(Conv2D(filters = 128, kernel_size = (3,3), activation='relu', padding='same'))
model.add(Conv2D(filters = 128, kernel_size = (3,3), activation='relu', padding='same'))
#model.add(Conv2D(filters = 128, kernel_size = (3,3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=2))
model.add(BatchNormalization())
model.add(Dropout(0.3))
'''
model.add(Conv2D(filters = 256, kernel_size = (3,3), activation='relu', padding='same'))
model.add(Conv2D(filters = 256, kernel_size = (3,3), activation='relu', padding='same'))
#model.add(Conv2D(filters = 256, kernel_size = (3,3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=2))
model.add(BatchNormalization())
#model.add(Dropout(0.5))
#model.add(Dropout(0.25))
model.add(Dropout(0.3))

model.add(Conv2D(filters = 256, kernel_size = (3,3), activation='relu', padding='same'))
model.add(Conv2D(filters = 256, kernel_size = (3,3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=2))
model.add(BatchNormalization())
model.add(Dropout(0.1))
'''
model.add(Conv2D(filters = 512, kernel_size = (3,3), activation='relu', padding='same'))
model.add(Conv2D(filters = 512, kernel_size = (3,3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=2))
model.add(BatchNormalization())
#model.add(Dropout(0.5))
#model.add(Dropout(0.2))
model.add(Dropout(0.3))

model.add(Conv2D(filters = 1024, kernel_size = (2,2), activation='relu', padding='same'))
model.add(Conv2D(filters = 1024, kernel_size = (2,2), activation='relu', padding='same'))
#model.add(MaxPooling2D(pool_size=2, padding='same'))
model.add(MaxPooling2D(pool_size=2))
model.add(BatchNormalization())
model.add(Dropout(0.5))
#model.add(Dropout(0.25))
model.add(GlobalAveragePooling2D())

model.add(Dense(units = 2048, activation ='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.3))
#model.add(Dropout(0.1))

model.add(Dense(units=1, activation = 'linear'))


model.compile(loss='mean_squared_error', optimizer="adam")

In [25]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_29 (Conv2D)           (None, 64, 64, 32)        128       
_________________________________________________________________
conv2d_30 (Conv2D)           (None, 64, 64, 32)        25632     
_________________________________________________________________
max_pooling2d_15 (MaxPooling (None, 32, 32, 32)        0         
_________________________________________________________________
batch_normalization_15 (Batc (None, 32, 32, 32)        128       
_________________________________________________________________
dropout_15 (Dropout)         (None, 32, 32, 32)        0         
_________________________________________________________________
conv2d_31 (Conv2D)           (None, 32, 32, 64)        18496     
_________________________________________________________________
conv2d_32 (Conv2D)           (None, 32, 32, 64)        36928     
__________

In [26]:
from keras.callbacks import  EarlyStopping

In [27]:
estop = EarlyStopping(monitor='val_loss', patience=20, verbose=1,mode='min',min_delta=0.001)

In [28]:
batch_size=128
#batch_size=32
#batch_size=16
#batch_size=64
#batch_size=10
epochs=200
perf_hist = model.fit_generator(image_augments.flow(x_train, y_train, batch_size=batch_size),epochs=epochs,steps_per_epoch=int(x_train.shape[0]/batch_size), validation_data=(x_valid,y_valid), callbacks=[estop], shuffle=False, workers=5, use_multiprocessing=True)

Epoch 1/200




Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 00048: early stopping


In [29]:
del X

In [30]:
#load test data
test_data = pd.read_csv("test.csv")

In [31]:
test_data.head()

Unnamed: 0,image_names
0,3950.jpg
1,1272.jpg
2,14384.jpg
3,18501.jpg
4,3165.jpg


In [32]:
test_images = list()
for i in test_data.image_names:
    #img = load_img("images/"+i,target_size=(224,224,3))
    img = load_img("images/"+i,target_size=(64,64,3))
    img = img_to_array(img)
    test_images.append( img)
test_images=np.array(test_images)

In [33]:
test_images.shape

(5962, 64, 64, 3)

In [34]:
test_images = test_images/test_images.max()

In [35]:
predictions = model.predict(test_images)

In [36]:
result = pd.DataFrame(index=None)

In [37]:
result['image_names']=test_data['image_names']
result['age_labels']=predictions

In [38]:
result.to_csv(path_or_buf='result.csv',index=False)