In [0]:
import os
import numpy as np
import pandas as pd
import cv2
from zipfile import ZipFile 

In [2]:
!wget -nv "http://he-s3.s3.amazonaws.com/media/hackathon/hackerearth-deep-learning-challenge-auto-tag-images-gala/auto-tag-images-of-the-gala-9e47fb31/9d34462453e311ea.zip"

2020-03-21 03:06:57 URL:http://he-s3.s3.amazonaws.com/media/hackathon/hackerearth-deep-learning-challenge-auto-tag-images-gala/auto-tag-images-of-the-gala-9e47fb31/9d34462453e311ea.zip [30062440/30062440] -> "9d34462453e311ea.zip.1" [1]


In [0]:
file_name = "9d34462453e311ea.zip"
with ZipFile(file_name, 'r') as zip:
  zip.extractall()

In [4]:
path = '/dataset'

train_df = pd.read_csv('dataset/train.csv')
test_df = pd.read_csv('dataset/test.csv')

train_df.head()

Unnamed: 0,Image,Class
0,image7042.jpg,Food
1,image3327.jpg,misc
2,image10335.jpg,Attire
3,image8019.jpg,Food
4,image2128.jpg,Attire


In [5]:
class_map = {
    'Food': 0,
    'Attire': 1,
    'Decorationandsignage': 2,
    'misc': 3
}

inverse_class_map = {
    0: 'Food',
    1: 'Attire',
    2: 'Decorationandsignage',
    3: 'misc'
}

train_df['Class'] = train_df['Class'].map(class_map).astype(np.uint8)
train_df.head()

Unnamed: 0,Image,Class
0,image7042.jpg,0
1,image3327.jpg,3
2,image10335.jpg,1
3,image8019.jpg,0
4,image2128.jpg,1


In [0]:
train_path = './dataset/Train Images/'
test_path = './dataset/Test Images/'

h, w = 224, 224

train_images, train_labels = [], []

for i in range(len(train_df.Image)):
    train_image = cv2.imread(train_path + str(train_df.Image[i]))
    train_image = cv2.resize(train_image, (h, w))
    train_images.append(train_image)
    train_labels.append(train_df.Class[i])

test_images = []

for i in range(len(test_df.Image)):
    test_image = cv2.imread(test_path + str(test_df.Image[i]))
    test_image = cv2.resize(test_image, (h, w))
    test_images.append(test_image)

train_images = np.array(train_images)
test_images = np.array(test_images)

In [7]:
from tensorflow.keras.utils import to_categorical

cat_train_labels = to_categorical(train_labels)

In [0]:
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [9]:
base_model = ResNet50(
    weights='imagenet',
    include_top=False, 
    input_shape=(h, w, 3), 
    pooling='avg'
)
base_model.summary()

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Model: "resnet50"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 230, 230, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, 112, 112, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
conv1_bn (BatchNormalization)   (None, 112, 112, 64) 256         conv1_conv[0][0]                 
__________

In [10]:
base_model.trainable = False

model = Sequential([
  base_model,
  Dropout(0.2),
  Dense(4, activation='softmax')
])

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
resnet50 (Model)             (None, 2048)              23587712  
_________________________________________________________________
dropout (Dropout)            (None, 2048)              0         
_________________________________________________________________
dense (Dense)                (None, 4)                 8196      
Total params: 23,595,908
Trainable params: 8,196
Non-trainable params: 23,587,712
_________________________________________________________________


In [0]:
from tensorflow.keras.callbacks import ModelCheckpoint
checkpoint = ModelCheckpoint('best_model.h5',  # model filename
                             monitor='val_loss', # quantity to monitor
                             verbose=1, # verbosity - 0 or 1
                             save_best_only= True, # The latest best model will not be overwritten
                             mode='auto') # The decision to overwrite model is made 
                                          # automatically depending on the quantity to monitor

In [0]:
batch_size = 32
epochs = 10

In [13]:
datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)


# history = model.fit_generator(datagen.flow(train_images, train_labels, batch_size=batch_size), validation_data= (train_images, train_labels),
#                     steps_per_epoch=len(train_images) / batch_size, epochs=epochs)

training_generator = datagen.flow(train_images, cat_train_labels,
                                  batch_size=batch_size)

history = model.fit_generator(
         training_generator,
         steps_per_epoch= training_generator.n//training_generator.batch_size,
         callbacks=[checkpoint],
         epochs= epochs)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [0]:
test_datagen = ImageDataGenerator(rescale=1./255)
validation_generator = test_datagen.flow(train_images,cat_train_labels, batch_size=batch_size)
test_generator = test_datagen.flow(test_images, batch_size=batch_size)

In [15]:
model.evaluate_generator(generator=validation_generator,
                            steps=validation_generator.n//validation_generator.batch_size)

[1.7230307440603934, 0.38071236]

In [16]:
labels = model.predict(test_images)
print(labels[:4])

[[3.88511956e-01 1.59577221e-01 1.02961545e-04 4.51807857e-01]
 [6.83434606e-01 7.97305536e-03 1.74066998e-04 3.08418274e-01]
 [8.61798227e-01 1.28552865e-03 1.64692159e-04 1.36751488e-01]
 [9.99120176e-01 4.54428971e-07 6.62589002e-07 8.78631836e-04]]


In [17]:
label = [np.argmax(i) for i in labels]
print(label[:20])

[3, 0, 0, 0, 1, 3, 3, 1, 3, 1, 3, 2, 2, 0, 0, 3, 1, 3, 3, 1]


In [18]:
class_label = [inverse_class_map[x] for x in label]
print(class_label[:3])

['misc', 'Food', 'Food']


In [19]:
submission = pd.DataFrame({ 'Image': test_df.Image, 'Class': class_label })
submission.head()

Unnamed: 0,Image,Class
0,image6245.jpg,misc
1,image10409.jpg,Food
2,image8692.jpg,Food
3,image10517.jpg,Food
4,image2580.jpg,Attire


In [0]:
submission.to_csv('sub.csv', index=False)