# API for Best Classifier So Far (currently: Poly Kernel SVC)

Note: I wanted to be able to test my API on the labeled data, so I took advantage of the fact that I have already processed and stored this data. At some point I plan to circle back on add in the preprocessing steps so that this API can be used with unlabeled, unprocessed data.

In [1]:
# import
from pathlib import Path

# path for api script
prediction_api_script_file = Path.cwd().parent / 'src' / 'models'/ 'prediction_api.py'

In [2]:
%%writefile $prediction_api_script_file

# import
from flask import Flask, request
import pandas as pd
import numpy as np
import json
from sklearn.externals import joblib
from pathlib import Path

# create the flask app
app = Flask(__name__)

# create file path
project_dir = Path.home() / 'Python' / 'Kaggle' / 'titanic_survival'
model_file_path = project_dir / 'models' / 'poly_svc_cp_model.pkl'

# load model
model = joblib.load(model_file_path)

# use the Python decorator to create the API route with one endpoint ('/api') that will be used for POST requests
@app.route('/api', methods = ['POST'])

# the prediciton method that will be invoked internally once the API is invoked
def make_prediction():
    # read json object and conter to json string
    data = json.dumps(request.get_json(force = True))
    
    # create pandas dataframe from json string
    df = pd.read_json(data)
    
    # extract PassengerIds
    passenger_ids = df['PassengerId']
    
    # actual 'Survived' values
    # in practice, we wouldn't have this, but we'll include it for testing purposes since train is labeled
    actuals = df['Survived']
    
    # extract feature columns
    X = df.drop(['PassengerId', 'Survived'], axis = 1)
    
    # make predictions
    predictions = model.predict(X)
    
    # create respnse data fram
    df_response = pd.DataFrame({'PassengerId': passenger_ids,
                                'Predicted' : predictions,
                                'Actual' : actuals})
    
    # return json response object
    return df_response.to_json()
    
if __name__ == '__main__':
    # can pick any unused port
    # debug = True means if you have any problems in the API call, then you get a detailed stack trace
    # good for development process, probably set to False in the production environment
    app.run(port = 10001, debug = True)

Writing /Users/jgower/Python/Kaggle/titanic_survival/src/models/prediction_api.py


## Test the API

Open a terminal, navigate to /your_path/titanic_survival/src/models (which is where the API is located), then execute the script. Once that is running, proceed to the next cell...

In [3]:
# import
import pandas as pd

# get train data frame
train_file_path = Path.cwd().parent / 'data' / 'processed' / 'train_processed_cp.csv'
train_df = pd.read_csv(train_file_path)

In [13]:
# pick five passengers that survived to send to the API in order to test it
passengers_survived = train_df[train_df['Survived'] == 1][:5]

In [14]:
# verify that these passengers Survived by looking at the rows of these five passengers
passengers_survived

Unnamed: 0,PassengerId,Survived,Deck_Advantage,Embarked_C,Fare_Bin_0,Fare_Bin_2,Helpless,Pclass_1,Pclass_3,Sex_Female,Small_Support
0,693,1,0,0,0,1,0,0,1,0,0
3,856,1,0,0,0,0,0,0,1,1,1
4,802,1,0,0,0,1,0,0,0,1,1
6,510,1,0,0,0,1,0,0,1,0,0
8,829,1,0,0,1,0,0,0,1,0,0


In [15]:
# import
import requests

# function to make API requests
def make_api_request(data):
    # url for API
    url = 'http://127.0.0.1:10001/api'
    
    # make post request
    r = requests.post(url, data)
    
    # return the json object
    return r.json()

In [16]:
# use api request function to make the API call
# the actual values should all be 1
# the model is 80% accurate, but this sample is tiny, so unlikely to see 4 of the 5 Survived
make_api_request(passengers_survived.to_json())

{'PassengerId': {'0': 693, '3': 856, '4': 802, '6': 510, '8': 829},
 'Predicted': {'0': 0, '3': 1, '4': 1, '6': 0, '8': 0},
 'Actual': {'0': 1, '3': 1, '4': 1, '6': 1, '8': 1}}