# Table of Contents

- [Imports](#Imports)
- [Classification](#Classification)
- [Load Model](#Load-the-Model)
- [Prediction](#Make-Predictions)
- [Save](#Save-Results)

# Imports

In [22]:
import numpy as np
import pandas as pd
from PIL import Image
from keras import utils
import matplotlib.pyplot as plt
from keras.models import load_model, Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.optimizers import Adam
from keras import backend as K
from keras.applications import VGG16
from keras.preprocessing.image import ImageDataGenerator

In [63]:
# load data as dataframe (we will generate image data from this)
photo_df = pd.read_csv('../datasets/all_pics.csv')

In [64]:
photo_df.head()

Unnamed: 0.1,Unnamed: 0,filename,city,url,latitude,longitude
0,0,./photos_to_classify/1.jpg,New York,https://live.staticflickr.com/65535/4089052561...,40.75922,-73.976891
1,1,./photos_to_classify/2.jpg,New York,https://live.staticflickr.com/65535/4089027516...,40.719006,-73.997447
2,2,./photos_to_classify/3.jpg,New York,https://live.staticflickr.com/65535/4694014229...,40.720457,-73.997071
3,3,./photos_to_classify/4.jpg,New York,https://live.staticflickr.com/65535/4785620686...,40.73018,-73.997042
4,4,./photos_to_classify/5.jpg,New York,https://live.staticflickr.com/65535/4780397280...,40.718221,-73.997833


# Classification

## Load the Model

the load_model function from Keras was deprecated. So we need to create a model with layers identical to our successful model, then compile and fit with 0 epochs. The weights are loaded from our saved h5 file from previous training.

In [16]:
# specify paths to the directories containing our train and validation images
train_dir = '../picture_data/train'

# the output classes we want to predict
classes = ['food', 'culture', 
           'view', 'park']

# Generate train batches from generator. The input to the VGG16 model has to be of size 
# (224, 224), hence the target size
train_batches = ImageDataGenerator().flow_from_directory(train_dir, 
                                                         target_size=(224, 224),
                                                         classes = classes, 
                                                         batch_size = 400)

Found 4000 images belonging to 4 classes.
Found 1200 images belonging to 4 classes.


In [17]:
# load most successful model 
# recreate model with similar layers
conv_base = VGG16()
classifier = Sequential()
for layer in conv_base.layers[0:-1]:
    classifier.add(layer)
    
classifier.add(Dropout(0.5))
classifier.add(Dense(4, activation='softmax'))

In [23]:
# compile the model
classifier.compile(loss='categorical_crossentropy',
                   optimizer=Adam(lr=0.0001),
                   metrics=['accuracy'])

In [24]:
# fit on 0 epochs. we will load the weights from our saved model
classifier.fit_generator(train_batches, 
                         epochs=0,
                         validation_data = valid_batches)

<keras.callbacks.History at 0x7fb8d283eb38>

In [25]:
# load the weights from saved model
classifier.load_weights('../data/keras_models/vgg16_dataaug_model.h5')

## Make Predictions

In [34]:
# test batches are generated from the dataframe we updated when we downloaded the images to classify
test_batches = ImageDataGenerator().flow_from_dataframe(photo_df,
                                                        class_mode = None,
                                                        shuffle = False,
                                                        target_size=(224, 224), 
                                                        batch_size = 32)

Found 16821 images.


In [35]:
# Make predictions
predictions = classifier.predict_generator(test_batches, verbose=1)



In [67]:
# We will check the predictions, assign them to their corresponding label and add that to the dataframe

# initialize label list
labels = []
index = 0

# loop through predictions
for pred in predictions:
    # round them to get at most 1 label with rounded probability = 1
    rounded = [round(p) for p in pred]
    if (rounded[0] == 1):
        labels.append('food')
    elif (rounded[1] == 1):
        labels.append('culture')
    elif (rounded[2] == 1):
        labels.append('view')
    elif (rounded[3] == 1):
        labels.append('park')
    else:
        labels.append('')
    index += 1
    
# add new column
photo_df['labels'] = labels

# Save Results

Will be used for mapping

In [81]:
photo_df.to_csv('../datasets/photos_with_pred.csv')