# Submission

---

## 1. Building business-photos correspondence
We build a a hash table to easily access the photos of a given business. This information is enclosed in the `test_photo_to_biz.csv` file. 

In [1]:
import numpy as np
import pandas as pd
from common import *

# Photo id to business id for the test dataset
photo2biz = pd.read_csv('data/test_photo_to_biz.csv', header = 0, names = ['photo','business'])

# First rows
photo2biz.head(n=10)

Using TensorFlow backend.


Unnamed: 0,photo,business
0,317818,003sg
1,30679,003sg
2,455084,003sg
3,371381,003sg
4,86224,003sg
5,36076,003sg
6,46999,003sg
7,74896,003sg
8,169399,003sg
9,110581,003sg


In [2]:
biz2photos = photo2biz.groupby('business')['photo'].apply(list)

biz2photos.head(n=10)

business
003sg    [317818, 30679, 455084, 371381, 86224, 36076, ...
00er5    [220529, 239591, 398090, 315725, 444173, 35412...
00kad    [96324, 333815, 101340, 398801, 465446, 123159...
00mc6    [219849, 327514, 189070, 366342, 227137, 15566...
00q7x    [207951, 44259, 25772, 256585, 375771, 284229,...
00v0t    [98656, 289068, 356683, 356072, 384160, 257167...
00y7p    [354534, 91842, 264321, 337598, 425924, 318190...
019fg    [329682, 293765, 151022, 310278, 214887, 41965...
019r1    [235703, 330900, 97541, 334820, 318846, 70608,...
01i5j    [159653, 186559, 210259, 104371, 230924, 39826...
Name: photo, dtype: object

In [3]:
biz2photos = biz2photos.to_dict()
biz = list(biz2photos.keys())

print('There are %d businesses in the test dataset.' % len(biz))

There are 10000 businesses in the test dataset.


## 2. Load Model
We load the model that will be used to make predictions.

In [4]:
from keras.models import Sequential
from keras.layers import GlobalAveragePooling2D, MaxPooling2D
from keras.layers import Conv2D, Dense, Dropout, BatchNormalization
from keras.regularizers import l2

model = Sequential()
model.add(GlobalAveragePooling2D(input_shape=(1, 1, 2048)))

model.add(Dense(1024, activation='relu', kernel_regularizer=l2(1e-4)))
model.add(BatchNormalization())
model.add(Dropout(0.3))

model.add(Dense(512, activation='relu', kernel_regularizer=l2(1e-4)))
model.add(BatchNormalization())
model.add(Dropout(0.3))

model.add(Dense(256, activation='relu', kernel_regularizer=l2(1e-4)))
model.add(BatchNormalization())

model.add(Dense(9, activation='sigmoid'))

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
global_average_pooling2d_1 ( (None, 2048)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 1024)              2098176   
_________________________________________________________________
batch_normalization_1 (Batch (None, 1024)              4096      
_________________________________________________________________
dropout_1 (Dropout)          (None, 1024)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 512)               524800    
_________________________________________________________________
batch_normalization_2 (Batch (None, 512)               2048      
_________________________________________________________________
dropout_2 (Dropout)          (None, 512)               0         
__________

In [5]:
model.load_weights('data/saved_models/weights_resnet50.hdf5')

## 3. Predictions

In [6]:
def id2files(ids):
    basename = 'data/test_photos/'
    files = [basename + str(id) + '.jpg' for id in ids]
    return files

def files2tensors(img_paths):
    tensors = [path_to_tensor(img_path) for img_path in img_paths]
    return np.vstack(tensors)

In [None]:
from keras.applications.resnet50 import preprocess_input
from keras.applications.resnet50 import ResNet50

resnet50 = ResNet50(include_top=False)

yhat = []

for i, b in enumerate(biz):
    if i % 500 == 0: print("%d/%d" % (i, len(biz)) )
    photos = biz2photos[b]
    files = id2files(photos)
    tensors = preprocess_input(files2tensors(files))
    features = resnet50.predict(tensors)
    predictions = model.predict(features)
    yhat.append(np.mean(predictions, axis=0))

yhat = np.array(yhat)

In [None]:
yhat_unique = (yhat >= 0.5).astype(int)

threshold = np.array([0.43, 0.55, 0.535, 0.525, 0.545, 0.54, 0.55, 0.47, 0.5])
yhat_custom = np.array([[1 if yhat[i,j] >= threshold[j] else 0 for j in range(9)] for i in range(len(yhat))])