In [1]:
%matplotlib inline
import os, sys
import numpy as np
from utils_data_exploration import *
import gc

### Directory Structure

In [2]:
%pwd # verify you are in the correct folder

'/home/javier/Documents/YelpRestaurantPhotoClassification/nbs'

In [3]:
dataset_folder = 'food101/'

In [4]:
# Assign Relevant General Paths, i.e. dataset invariant paths
LESSON_HOME_DIR = os.getcwd() # path tho the notebook
DATA_DIR = LESSON_HOME_DIR + '/../data/'
TRAIN_PATH = DATA_DIR + '/train_photos/'
VALID_PATH = DATA_DIR + '/valid_photos/'
SHARED_PATH = DATA_DIR + 'shared/'

# Assign Specific Paths, i.e. dataset specific paths
RESULTS_PATH = DATA_DIR + '/results/' + dataset_folder

FEATURES_PATH = RESULTS_PATH + 'features/'
CLASSIFIERS_PATH = RESULTS_PATH + 'classifiers/'

### Start with the list of business and the FC1 Buisness (see Extract_F1_Features.ipynb)

In [5]:
businesses = np.load(SHARED_PATH+'/businesses_shuffled.npy')
businesses_fc1_features = np.load(FEATURES_PATH + 'businesses_fc1_blueprint.npy')

### 4-fold Cross Validation

In [6]:
nb_folds = 4

folds = dict()
total_restaurants = len(businesses)
restaurants_per_fold = int(len(businesses) / nb_folds)
businesses_idx = np.arange(total_restaurants)

for fold in range(nb_folds):
    folds[fold] = businesses_idx[fold*restaurants_per_fold:(fold+1)*restaurants_per_fold]

### Retrieve the binary labels (apply / does'nt apply) for every attribute

In [7]:
attributes = 9 # possible restaurante attributes
labels_dict = dict()
nb_businesses = len(businesses)

for attribute in range(attributes):
    labels_dict[attribute] = np.zeros(nb_businesses)
    for i, business in enumerate(businesses):
        if attribute in df_train_labels.loc[business]['labels']:
            labels_dict[attribute][i] = 1

In [8]:
labels_dict

{0: array([ 1.,  0.,  1., ...,  1.,  0.,  0.]),
 1: array([ 1.,  1.,  0., ...,  0.,  1.,  1.]),
 2: array([ 0.,  1.,  1., ...,  0.,  1.,  0.]),
 3: array([ 1.,  1.,  0., ...,  1.,  1.,  0.]),
 4: array([ 0.,  1.,  0., ...,  0.,  0.,  0.]),
 5: array([ 1.,  1.,  0., ...,  0.,  1.,  0.]),
 6: array([ 1.,  1.,  1., ...,  0.,  1.,  0.]),
 7: array([ 0.,  1.,  0., ...,  0.,  0.,  0.]),
 8: array([ 0.,  0.,  1., ...,  1.,  0.,  1.])}

# Train classifiers on top of the blueprint bottleneck features per restaurant

In [9]:
from keras import backend as K
from keras import applications
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam

Using TensorFlow backend.


### Create a copy of the VGG16 classifier(image net) -> adding dropout and binarizing the output

In [10]:
def get_binary_vgg16_classifier(drop_rate = 0.5):
    
    # Replicate the fc classifier of vgg net
    model = Sequential()
    model.add(Dense(101, activation='relu', name='predictions', input_shape=(4096,)))
    
    # Load pre trained weights for food-101
    model.load_weights(DATA_DIR+'/weights/food101/'+'vgg16_food101_model.h5', by_name=True)
    
    # Remove the predictions layer and add a binary apply/doesen't apply neuron
    model.pop()
    model.add(Dense(1, activation='sigmoid', input_shape=(4096,)))
    model.compile(optimizer=Adam(lr=1e-04), loss='binary_crossentropy', metrics=['accuracy'])
    
    return model

# Train 4 classifiers per label, one for each fold

In [11]:
import gc

In [12]:
for k, v in labels_dict.items():
    pos_neg_num = np.bincount(v.astype(np.int32))
    print(str(k) + 'proportion:' + str(pos_neg_num.max()/(pos_neg_num.max() + pos_neg_num.min())))

0proportion:0.663827655311
1proportion:0.50250501002
2proportion:0.514028056112
3proportion:0.50250501002
4proportion:0.725951903808
5proportion:0.625751503006
6proportion:0.681362725451
7proportion:0.713426853707
8proportion:0.620240480962


In [13]:
for attribute in range(attributes):
    for val_fold in range(len(folds)):
        
        K.clear_session()
        gc.collect()
        
        # grab a binary classifier model with imagenet weights
        classifier_model = get_binary_vgg16_classifier()
        
        # grab the training-validation data
        train_idx = []
        train_idx = [fold_idx for i, fold_idx in folds.items() if i != val_fold]
        train_idx = np.array([idx for sublist in train_idx for idx in sublist], dtype=np.int32)
        valid_idx = np.array(folds[val_fold], dtype=np.int32)
        X_train = businesses_fc1_features[train_idx, :]; y_train = labels_dict[attribute][train_idx]
        X_valid = businesses_fc1_features[valid_idx, :]; y_valid = labels_dict[attribute][valid_idx]
        
        h = classifier_model.fit(X_train, y_train, validation_data=(X_valid, y_valid), epochs=100)
        np.save(RESULTS_PATH+'/history_prediction_attribute_%d_fold_%d'%(attribute, val_fold), h.history)
        
        
        classifier_model.compile(optimizer=Adam(lr=1e-04), loss='binary_crossentropy', metrics=['accuracy'])
        h = classifier_model.fit(X_train, y_train, validation_data=(X_valid, y_valid), epochs=100)
        np.save(RESULTS_PATH+'/history_fc2_attribute_%d_fold_%d'%(attribute, val_fold), h.history)
        
        classifier_model.save_weights(RESULTS_PATH+'/ft_200_attribute_%d_fold_%d.h5' % (attribute, val_fold))
        
        

Train on 1497 samples, validate on 499 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
  32/1497 [..............................] - ETA: 0s - loss: 0.4628 - acc: 0.7188

KeyboardInterrupt: 

## Predict Probabilites of each attribute of the given training restaurants

In [14]:
from keras import backend as K

In [15]:
nb_businesses = len(businesses)
nb_attributes = 9

proba_preds = np.empty((nb_businesses, nb_attributes))

In [None]:
for attribute in range(attributes):
    proba_attribute = np.array([]) # initialize attribute probabilities
    for val_fold in range(len(folds)):

        K.clear_session()
        gc.collect()

        # grab the binary model and load the trained weights
        classifier_model = get_binary_vgg16_classifier()
        classifier_model.load_weights(CLASSIFIERS_PATH+'weights/ft_200_attribute_%d_fold_%d.h5' % (attribute, val_fold))

        val_idx = folds[val_fold]
        # grab the businesses_fc1_features as the training set
        X_val = businesses_fc1_features[val_idx]
        
        # predict the probabilty of the attribute for the given classifier 
        predicted_proba = classifier_model.predict_proba(X_val).flatten()
        print(predicted_proba.shape)
        proba_attribute = np.concatenate((proba_attribute, classifier_model.predict_proba(X_val).flatten()))
    
    proba_preds[:, attribute] = (proba_attribute)
np.save(CLASSIFIERS_PATH+'/probas/'+'train_attributes_probas.npy', proba_preds)

## Test Set

### Start with the list of business and the FC1 Buisness (see Extract_F1_Features.ipynb)

In [5]:
businesses = np.load(SHARED_PATH+'/test_businesses_shuffled.npy')
businesses_fc1_features = np.load(FEATURES_PATH + 'test_businesses_fc1_blueprint.npy')

# Load classifiers and make predictions

In [6]:
from keras import backend as K
from keras import applications
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam

Using TensorFlow backend.


### Create a copy of the VGG16 classifier(image net) -> adding dropout and binarizing the output

In [7]:
def get_binary_vgg16_classifier(drop_rate = 0.5):
    
    # Replicate the fc classifier of vgg net
    model = Sequential()
    model.add(Dense(101, activation='relu', name='predictions', input_shape=(4096,)))
    
    # Load pre trained weights for food-101
    model.load_weights(DATA_DIR+'/weights/food101/'+'vgg16_food101_model.h5', by_name=True)
    
    # Remove the predictions layer and add a binary apply/doesen't apply neuron
    model.pop()
    model.add(Dense(1, activation='sigmoid', input_shape=(4096,)))
    model.compile(optimizer=Adam(lr=1e-04), loss='binary_crossentropy', metrics=['accuracy'])
    
    return model

## Predict Probabilites of each attribute of the given training restaurants

In [14]:
from keras import backend as K

In [15]:
nb_businesses = len(businesses)
nb_attributes = 9
nb_folds = 4

proba_preds = np.empty((nb_businesses, nb_attributes))

In [16]:
for attribute in range(nb_attributes):
    proba_attribute = np.zeros(nb_businesses) # initialize attribute probabilities
    for val_fold in range(nb_folds):

        K.clear_session()
        gc.collect()

        # grab the binary model and load the trained weights
        classifier_model = get_binary_vgg16_classifier()
        classifier_model.load_weights(CLASSIFIERS_PATH+'weights/ft_200_attribute_%d_fold_%d.h5' % (attribute, val_fold))

        # grab the businesses_fc1_features in the test set
        X_test = businesses_fc1_features
        
        # predict the probabilty of the attribute for the given classifier 
        predicted_proba = classifier_model.predict_proba(X_test).flatten()
        print(predicted_proba.shape)
        proba_attribute += predicted_proba
    
    proba_preds[:, attribute] = (proba_attribute) / 4
np.save(CLASSIFIERS_PATH+'/probas/'+'test_attributes_probas.npy', proba_preds)

(10000,)
(10000,)
(10000,)
(10000,)


In [21]:
proba_preds[2, :]

array([ 0.09096343,  0.89325167,  0.90448537,  0.46618379,  0.3133437 ,
        0.84506671,  0.99356112,  0.37737395,  0.54179413])

In [22]:
proba_preds[0, :]

array([ 0.13187651,  0.64056262,  0.75980102,  0.43684884,  0.18323648,
        0.8241327 ,  0.9741966 ,  0.17336291,  0.78725651])

In [23]:
proba_preds[1, :]

array([ 0.24460572,  0.41693479,  0.84464622,  0.7328427 ,  0.46463527,
        0.90677592,  0.95867929,  0.43787192,  0.51530321])