 ### 1. Model Generation

In [160]:
import pandas as pd
import numpy as np
import pickle
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
pd.set_option('display.float_format', lambda x: '%.5f' % x)

In [161]:
data = load_wine()
df = pd.concat([pd.DataFrame(data.data), pd.DataFrame(data.target)],ignore_index=True,axis=1)
df =  df.sample(frac=1)
X_train, X_test, y_train, y_test = train_test_split(df.iloc[:,:-1],df.iloc[:,-1],test_size=0.33, random_state=42)
logreg = LogisticRegression(C=1e5, solver='lbfgs', multi_class='multinomial', max_iter=10000)
model = logreg.fit(X_train, y_train)

### 2. Now dump the model in the folder for AWS Lambda access

In [162]:
pickle.dump(model,open('model/wine_model.pkl','wb'))

### 3. Because AWS Lambda didn't support scikit-learn, you need to prepare and add a layer

<b>create_layer.sh<b>

In [None]:
PY_DIR='build/python/lib/python3.6/site-packages'
mkdir -p $PY_DIR
pip install -r requirements_aws.txt -t $PY_DIR

### 4. Create a AWS Lambda function which calls the model and repsonse with the prediction for new data

<b>get_prediction.py</b>

In [164]:
import json
import pickle
import logging
import numpy as np
from sklearn.linear_model import LogisticRegression

logger = logging.getLogger()
logger.setLevel(logging.INFO)

model = pickle.load(open("model/wine_model.pkl", "rb"))


def handler(event, context):
    logger.info("EVENT:{}".format(event))
    payload = event.get("body")
    data = json.loads(payload).get("data")
    pred = list(model.predict_proba(np.array(data).reshape(1, -1))[0])
    return {"statusCode": 200, "body": json.dumps(str(pred))}

### 5. Create a <b>serverless.yml</b> file to deploy API Gateway and the Lambda function including the layer to AWS

If you want to know more about serverless see <b> https://serverless.com/ <b>

In [None]:
service: aws-simple-predictive-engine

package:
  individually: true

provider:
  name: aws
  runtime: python3.6
  timeout: 3
  apiKeys:
  - ${self:provider.stage}-predictive-engine
  usagePlan:
    quota:
      limit: 1000000
      offset: 0
      period: DAY
    throttle:
      burstLimit: 10000
  region: eu-central-1
  endpointType: REGIONAL

layers:
  scikitLayer:
    path: build
    description: scikit-learn
    compatibleRuntimes:
     - python3.6

functions:
  get_prediction:
    handler: get_prediction.handler
    description: This function predict scores for new data
    memorySize: 256
    timeout: 3
    reservedConcurrency: 10
    package:
      exclude:      
      - build/**
      - venv/**
    events:
    - http:
        path: v1/predict
        method: post
        private: true
    layers:
    - {Ref: ScikitLayerLambdaLayer}  # See for details https://serverless.com/framework/docs/providers/aws/guide/layers/

### 6. Deploy your serverless.yml

In [None]:
!sls deploy

### 7. Generate some random data and post requests against your endpoint

Use your API-Key and Endpoint

In [158]:
import json
import requests

headers = {
    "Content-type": "application/json",
    "x-api-key": "YOUR_KEY",
}

endpoint = YOUR_ENDPOINT
    
def call_api_gateway(input_data, headers=headers,endpoint=endpoint):
    try:    
        input_data = {'data':list(input_data)}   
        r = requests.post(endpoint, data=json.dumps(input_data), headers=headers)
        response = r.json()
        scores = json.loads(response)
    except Exception as e:        
        scores = [None]*3
    return scores

#### Be aware this are just some dumb random numbers

In [159]:
response = []
df_scores = pd.DataFrame([call_api_gateway(v) for v in pd.DataFrame(np.random.randn(100,13)).values])
df_scores.columns =['class_0', 'class_1','class_2']
df_scores.tail(10)

Unnamed: 0,class_0,class_1,class_2
90,0.0,0.0,1.0
91,0.03671,0.96329,0.0
92,0.0,1.0,0.0
93,0.0,0.0,1.0
94,0.99396,0.0,0.00604
95,0.99997,0.0,3e-05
96,0.0143,0.0,0.9857
97,0.0,0.0,1.0
98,0.0,1.0,0.0
99,0.0,0.0,1.0
