## Part 2 

Train an image classifier capable of detecting if a scene is indoors or outdoors.

### Setup

In [61]:
import numpy as np
import pandas as pd
from pathlib import Path
import os
import math
import sklearn
from glob import glob
import tensorflow as tf
from IPython.display import YouTubeVideo

In [62]:
# project directory
project_dir = Path('/Users/administrator/Documents/pex_challenge/')
data_dir = project_dir.joinpath('data/yt8m/frame')

In [63]:
# save the train/ test files
X_train = pd.read_csv(project_dir.joinpath('data/Xtrain.csv'))
X_test = pd.read_csv(project_dir.joinpath('data/Xtest.csv'))
y_train = pd.read_csv(project_dir.joinpath('data/ytrain.csv'))
y_test = pd.read_csv(project_dir.joinpath('data/ytest.csv'))

### Step 1

Build an artificial neural network.

In [94]:
# we will use sklearn.neural_network, specifically the MLPClassifier
# this classifier is based on a perceptron model
# it can also produce probabilitic estimates of the classification
from sklearn.neural_network import MLPClassifier
clf = sklearn.neural_network.MLPClassifier(activation = 'logistic', solver='sgd', alpha=1e-3,
                                           hidden_layer_sizes=(5, 2))

In [95]:
clf.fit(X_train, y_train['indoor'])



MLPClassifier(activation='logistic', alpha=0.001, batch_size='auto',
       beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(5, 2), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=None, shuffle=True, solver='sgd', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False)

### Step 2

Evaluate the accuracy using the ratio of all true results / the total number of examples tested.

In [96]:
# predict whether images are indoor outdoor
predictions = clf.predict(X_test)

In [97]:
def evaluate_method(pred, y_test):
    """
    This method evaluate the accuracy using the ratio of all true results / the total number of examples tested.
    It returns the fraction of true results over the total number of examples tested.
    """
    
    num_true_results = 0
    
    # iterate through the predictions
    for i in range(len(pred)):
        # compare the prediction to the true result
        if pred[i] == y_test.iloc[i]:
            num_true_results += 1
    
    return num_true_results/len(y_test)

In [98]:
# our model predicted the right class of the images 95% of the time
evaluate_method(predictions, y_test['indoor'])

0.9583333333333334

In [100]:
%pwd

'/Users/administrator/Documents/pex_challenge/analysis_notebooks'

In [101]:
# save the model to disk
import pickle
filename = project_dir.joinpath('model.sav')
pickle.dump(clf, open(filename.as_posix(), 'wb'))