# Train Final Classifier On Top of train_fc1_features

In [1]:
%matplotlib inline
import os, sys
import numpy as np
from utils_data_exploration import *
import gc

### Directory Structure

In [2]:
%pwd # verify you are in the correct folder

'/home/javier/Documents/YelpRestaurantPhotoClassification/nbs'

In [3]:
# Assign Path Constants
LESSON_HOME_DIR = os.getcwd()
DATA_DIR = LESSON_HOME_DIR + '/../data/'
TRAIN_PATH = DATA_DIR + '/train_photos/'
VALID_PATH = DATA_DIR + '/valid_photos/'
SHARED_PATH = DATA_DIR + 'shared/'
WEIGHTS_PATH = DATA_DIR + '/weights/'

FINAL_DIR = DATA_DIR + '/results/final_classifier/'

### Grab the list of shuffled business

In [4]:
businesses = np.load(SHARED_PATH+'/businesses_shuffled.npy')

### Create the attribute predictions matrix in the following order [food101_preds, imagenet_preds, places_preds]

In [5]:
dataset_folders = ['food101', 'imagenet', 'places']
nb_businesses = len(businesses)
nb_attributes = 9
total_attributes = nb_attributes * 3

X = np.zeros((nb_businesses, 0)) # initialize the vector with zero predictions

for dataset_folder in dataset_folders:
    results_dir = DATA_DIR + '/results/' + dataset_folder
    
    representation_preds = np.load(results_dir + '/classifiers/probas/train_attributes_probas.npy')
    X = np.concatenate((X, representation_preds), axis = 1)

### retrieve the targets matrix

In [6]:
Y = np.zeros((nb_businesses, nb_attributes))
for i, business in enumerate(businesses):
    business_attributes = df_train_labels.loc[business]['labels']
    for attribute in range(nb_attributes):
        if attribute in business_attributes:
            Y[i, attribute] = 1

In [7]:
Y.shape

(1996, 9)

### create 4 fold split - on already shuffle businesses

In [8]:
folds = {}
nb_folds = 4
instances_per_fold = int(nb_businesses / nb_folds)
train_idx = list(range(nb_businesses))

for fold in range(nb_folds):
    folds[fold] = train_idx[fold*instances_per_fold:(fold+1)*instances_per_fold]

### Train linear regressors on top of representations predictions -> evaluate via cross validation

In [9]:
Y.shape

(1996, 9)

In [10]:
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import SGD
from keras.preprocessing.image import ImageDataGenerator
from keras import regularizers

Using TensorFlow backend.


KeyboardInterrupt: 

In [None]:
input_attributes = 27
output_classes = 9
def get_linear_regressor_model():
    lr_model = Sequential()
    lr_model.add(Dense(units=9, activation=None, use_bias=False, kernel_regularizer=regularizers.l1(1e-03), input_shape=(27,)))
    lr_model.compile(loss='mean_squared_error', optimizer=SGD(lr=0.01, momentum=0.9, nesterov=True, decay=0.001))
    return lr_model

In [None]:
nb_attributes = 9
total_attributes = 27
epochs = 250

for val_fold in range(len(folds)):
    # grab training and validation idx
    train_idx = [idx for fold, idx in folds.items() if fold != val_fold]
    train_idx = np.array([idx for sublist in train_idx for idx in sublist])
    val_idx = np.array(folds[val_fold])
    # select training and validation data
    X_train, Y_train = X[train_idx, :], Y[train_idx, :]
    X_valid, Y_valid = X[val_idx, :], Y[val_idx, :]
    num_train = X_train.shape[0]
    
    lr_model = get_linear_regressor_model()
    # train the classifier using data augmentation
    for epoch in range(epochs):
        # data augmentation
        
        random_jittering = np.random.randn(num_train, total_attributes) * 0.05
        X_augmented = X_train + random_jittering
        
        
        lr_model.fit(X_augmented, Y_train, batch_size=16, verbose=1, validation_data=(X_valid, Y_valid), shuffle=True, epochs=1)
    
    lr_model.save_weights(FINAL_DIR+'/weights/'+'final_classifier_weights_val_fold_%d.h5'%(val_fold))

In [24]:
lr_model = get_linear_regressor_model()

In [42]:
lr_model.load_weights(FINAL_DIR+'/weights/'+'final_classifier_weights_val_fold_%d.h5'%(0))

In [None]:
# less regularization exp-3

### Evaluate performance of the final classifier

In [124]:
nb_attributes = 9
total_attributes = 27
tot_val_acc = np.zeros(9)
val_acc = np.zeros((4, 9))

for val_fold in range(len(folds)):
    # grab validation data
    val_idx = np.array(folds[val_fold])
    # select validation data and labels
    X_valid = X[val_idx, :]
    Y_valid = Y[val_idx, :]
    
    nb_valid = X_valid.shape[0]
    
    # load trained model
    lr_model = get_linear_regressor_model()
    lr_model.load_weights(FINAL_DIR+'/weights/'+'final_classifier_weights_val_fold_%d.h5'%(val_fold))
    
    # predict probas
    proba_preds = lr_model.predict(X_valid)
    # convert to classes
    classes_preds = np.array([1 if x > 0.5 else 0 for row in proba_preds for x in row]).reshape(nb_valid, 9)
    tot_val_acc += np.mean(classes_preds == Y_valid, axis=0)
    val_acc[val_fold, :] = np.mean(classes_preds == Y_valid, axis=0)
print(tot_val_acc/4)

[ 0.81563126  0.83717435  0.87975952  0.69438878  0.88977956  0.86422846
  0.9243487   0.86523046  0.87174349]


In [16]:
# food, imagenet, places
datasets = ['food101_', 'imagenet_', 'places_']
preds_desc_dict = dict()
for i in range(27):
    choose_dataset = i // 9
    preds_desc_dict[i] = datasets[choose_dataset] + label_desc_dict[i%9]

## Good for Lunch

### Calculate preds accuracy per representation

In [12]:
food_preds = X[:, :9]
imagenet_preds = X[:, 9:18]
places_preds = X[:, 18:]

food_preds

food_preds = np.array([1 if x > 0.5 else 0 for row in food_preds for x in row]).reshape(1996, 9) 
imagenet_preds = np.array([1 if x > 0.5 else 0 for row in imagenet_preds for x in row]).reshape(1996, 9)
places_preds = np.array([1 if x > 0.5 else 0 for row in places_preds for x in row]).reshape(1996, 9)

print(np.mean(food_preds == Y, axis=0))
print(np.mean(imagenet_preds == Y, axis=0))
print(np.mean(places_preds == Y, axis=0))

[ 0.81012024  0.84168337  0.87274549  0.68136273  0.89178357  0.85370741
  0.91983968  0.85921844  0.86723447]
[ 0.80160321  0.8256513   0.87124248  0.65631263  0.8747495   0.84569138
  0.91883768  0.86573146  0.85921844]
[ 0.79809619  0.83567134  0.86823647  0.65731463  0.87775551  0.84819639
  0.89278557  0.85420842  0.85821643]


In [13]:
label_desc_dict

{0: 'good_for_lunch',
 1: 'good_for_dinner',
 2: 'takes_reservations',
 3: 'outdoor_seating',
 4: 'restaurant_is_expensive',
 5: 'has_alcohol',
 6: 'has_table_service',
 7: 'ambience_is_classy',
 8: 'good_for_kids'}

# Make Predictions on The Test Set

### Grab the list of shuffled business

In [50]:
businesses = np.load(SHARED_PATH+'/test_businesses_shuffled.npy')

### Create the attribute predictions data matrix in the following order [food101_preds, imagenet_preds, places_preds]

In [52]:
dataset_folders = ['food101', 'imagenet', 'places']
nb_businesses = len(businesses)
nb_attributes = 9
total_attributes = nb_attributes * 3

X_test = np.zeros((nb_businesses, 0)) # initialize the vector with zero predictions

for dataset_folder in dataset_folders:
    results_dir = DATA_DIR + '/results/' + dataset_folder
    
    representation_preds = np.load(results_dir + '/classifiers/probas/test_attributes_probas.npy')
    X_test = np.concatenate((X_test, representation_preds), axis = 1)

### Evaluate performance of the final classifier

In [54]:
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import SGD
from keras.preprocessing.image import ImageDataGenerator
from keras import regularizers

In [61]:
input_attributes = 27
output_classes = 9
def get_linear_regressor_model():
    lr_model = Sequential()
    lr_model.add(Dense(units=9, activation=None, use_bias=False, kernel_regularizer=regularizers.l1(1e-03), input_shape=(27,)))
    lr_model.compile(loss='mean_squared_error', optimizer=SGD(lr=0.01, momentum=0.9, nesterov=True, decay=0.001))
    return lr_model

In [63]:
nb_attributes = 9
total_attributes = 27
tot_val_acc = np.zeros(9)
val_acc = np.zeros((4, 9))
nb_folds = 4
nb_businesses = len(businesses)

proba_preds = np.zeros((nb_businesses, nb_attributes))

for val_fold in range(nb_folds):
    
    
    # load trained model
    lr_model = get_linear_regressor_model()
    lr_model.load_weights(FINAL_DIR+'/weights/'+'final_classifier_weights_val_fold_%d.h5'%(val_fold))
    
    # predict probas
    proba_preds += lr_model.predict(X_test)
    
# convert to classes
proba_preds /= 4
classes_preds = np.array([1 if x > 0.5 else 0 for row in proba_preds for x in row]).reshape(nb_businesses, 9)

In [69]:
label_desc_dict

{0: 'good_for_lunch',
 1: 'good_for_dinner',
 2: 'takes_reservations',
 3: 'outdoor_seating',
 4: 'restaurant_is_expensive',
 5: 'has_alcohol',
 6: 'has_table_service',
 7: 'ambience_is_classy',
 8: 'good_for_kids'}

In [154]:
def write_submission(classes_preds, fname, businesses):
    with open(fname, 'w') as f:
        f.write('business_id,labels\n')
        for business, labels in zip(businesses, classes_preds):
            f.write('{}, {}\n'.format(business, ' '.join(map(str, np.where(labels==1)[0]))))

In [155]:
write_submission(classes_preds, 'restaurant_preds2.csv', businesses)

In [100]:
%pwd

'/home/javier/Documents/YelpRestaurantPhotoClassification/nbs'

In [134]:
label_desc_dict

{0: 'good_for_lunch',
 1: 'good_for_dinner',
 2: 'takes_reservations',
 3: 'outdoor_seating',
 4: 'restaurant_is_expensive',
 5: 'has_alcohol',
 6: 'has_table_service',
 7: 'ambience_is_classy',
 8: 'good_for_kids'}

In [145]:
classes_preds[:, 0] = np.array([1 if prob > 0.20 else 0 for prob in proba_preds[:, 0]])

In [151]:
classes_preds[:, 7] = np.array([1 if prob > 0.4 else 0 for prob in proba_preds[:, 7]])

In [152]:
np.bincount(classes_preds[:, 7])

array([7613, 2387])

In [146]:
np.bincount(classes_preds[:, 0])

array([6992, 3008])

In [147]:
classes_preds[:100, :]

array([[0, 1, 1, 0, 0, 1, 1, 0, 1],
       [0, 1, 1, 1, 1, 1, 1, 1, 0],
       [0, 1, 1, 0, 0, 1, 1, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 0, 1],
       [0, 1, 1, 1, 0, 1, 1, 0, 0],
       [0, 1, 1, 1, 0, 1, 1, 1, 0],
       [0, 1, 1, 1, 0, 1, 1, 0, 0],
       [1, 1, 1, 0, 1, 1, 1, 0, 1],
       [0, 1, 1, 1, 0, 1, 1, 0, 0],
       [1, 1, 1, 0, 0, 1, 1, 0, 1],
       [0, 1, 1, 1, 0, 1, 1, 0, 1],
       [0, 1, 1, 1, 0, 1, 1, 0, 1],
       [1, 0, 0, 1, 0, 0, 0, 0, 1],
       [0, 1, 1, 1, 0, 1, 1, 0, 1],
       [0, 1, 1, 1, 0, 1, 1, 0, 1],
       [0, 1, 1, 1, 0, 1, 1, 0, 0],
       [0, 1, 1, 1, 0, 1, 1, 0, 0],
       [1, 0, 0, 1, 0, 0, 0, 0, 1],
       [0, 1, 1, 1, 0, 1, 1, 0, 1],
       [1, 1, 0, 0, 0, 1, 1, 0, 1],
       [0, 1, 1, 0, 0, 1, 1, 0, 0],
       [0, 1, 1, 1, 0, 1, 1, 0, 1],
       [1, 0, 1, 0, 0, 1, 1, 0, 1],
       [0, 1, 1, 0, 0, 1, 1, 0, 0],
       [0, 0, 1, 1, 0, 1, 1, 0, 1],
       [0, 1, 1, 1, 0, 1, 1, 0, 1],
       [0, 1, 1, 1, 0, 1, 1, 0, 1],
       [0, 1, 1, 1, 0, 1, 1,

In [103]:
np.bincount(Y[:, 0].astype(np.int32))

array([1325,  671])

In [126]:
# Good for lunch
671/1996

0.33617234468937873

In [105]:
np.bincount(Y[:, 7].astype(np.int32))

array([1424,  572])

In [106]:
np.bincount(classes_preds[:, 7])

array([8914, 1086])

In [107]:
# Ambience is classy
572/1996

0.2865731462925852

In [104]:
businesses[:10]

array(['l3hce', 'nim76', '57z69', 'bvw6i', '0rzi7', '4dnpo', '6v6r4',
       'yqld5', 'ub57s', 'r26ek'], dtype=object)

In [110]:
label_desc_dict

{0: 'good_for_lunch',
 1: 'good_for_dinner',
 2: 'takes_reservations',
 3: 'outdoor_seating',
 4: 'restaurant_is_expensive',
 5: 'has_alcohol',
 6: 'has_table_service',
 7: 'ambience_is_classy',
 8: 'good_for_kids'}

In [111]:
proba_preds[:, 0]

array([ 0.18322376,  0.15505676,  0.13372132, ...,  0.30394054,
        0.13328975,  0.20990028])