In [2]:
try:
    import sys
    sys.path.append('/var/lang/lib/python37.zip')
    sys.path.append('/var/lang/lib/python3.7')
    sys.path.append('/var/lang/lib/python3.7/lib-dynload')
    sys.path.append('/var/lang/lib/python3.7/site-packages')
    sys.path.remove('/opt/.sagemakerinternal/conda/lib/python3.7/site-packages')
except:
    pass

In [3]:
import boto3
import awswrangler as wr
from decimal import Decimal

In [4]:
my_boto3_session = boto3.Session(region_name='us-east-1')

### 1. Criar uma tabela no DynamoDB

In [5]:
#Criar a tabela titanic-propensity-survive no DynamoDB
dynamodb = my_boto3_session.resource('dynamodb')

params = {
        'TableName': 'titanic-propensity-survive',
        'KeySchema': [
            {'AttributeName': 'passengerid', 'KeyType': 'HASH'}
        ],
        'AttributeDefinitions': [
            {'AttributeName': 'passengerid', 'AttributeType': 'N'}
        ],
        'BillingMode': 'PAY_PER_REQUEST'
    }
dynamodb.create_table(**params)

dynamodb.Table(name='titanic-propensity-survive')

In [6]:
#Ler a tabela titanic_propensity_survive e remover o campo embarked antes de salvar no DynamoDB
query = "SELECT * FROM auladeploymodelos.titanic_propensity_survive;"
dataprep_df = wr.athena.read_sql_query(query, database="auladeploymodelos", boto3_session=my_boto3_session)
del dataprep_df['embarked']
dataprep_df.head()

Unnamed: 0,pclass,cabine_prefix,ticket_str,nametitle,fare,sibsp,parch,age_mean,ticket_int,survived,passengerid,referencedate,partition_0
0,3,missing,missing,Mr,8.05,0.0,0.0,35.0,373450.0,0,5,1912-07,test_data
1,3,missing,missing,Mr,8.4583,0.0,0.0,29.699118,330877.0,0,6,1912-07,test_data
2,1,E,missing,Mr,51.8625,0.0,0.0,54.0,17463.0,0,7,1912-07,test_data
3,3,missing,missing,Master,21.075,3.0,1.0,2.0,349909.0,0,8,1912-07,test_data
4,3,G,PPP,Miss,16.7,1.0,1.0,4.0,9549.0,1,11,1912-07,test_data


In [7]:
#Inserir os dados do Titanic na nova tabela criada no Dynamo
def float_to_decimal(num):
    return Decimal(str(num))

def pandas_to_dynamodb(df):
    df = df.fillna(0)
    # convert any floats to decimals
    for i in df.columns:
        datatype = df[i].dtype
        if datatype == 'float64':
            df[i] = df[i].apply(float_to_decimal)
    # write to dynamodb
    wr.dynamodb.put_df(df=df, table_name='titanic-propensity-survive')

pandas_to_dynamodb(dataprep_df)

### 2. Substituir o StackedEnsemble_BestOfFamily_4_AutoML_1_20221011_230015.zip pelo nome do melhor modelo definido no step 2_Fast_Machine_Learning

### 2.1 Editar e testar a função handler que será salva no aquivo ./deploy/handler.py

In [36]:
def lambda_handler(event, context):
    import h2o
    import pandas as pd
    import boto3    
    import json
    
    #Best Model ID:
    BestModelId='./output_model/models/best/StackedEnsemble_BestOfFamily_4_AutoML_2_20221010_193539.zip'
    
    #Keep the ratings ranges updated
    def ratings(p1):
        if p1 <= 0.2508362656036639:
            return 1
        elif p1 <= 0.6540492277407066:
            return 2
        else:
            return 3

    passenger_id = event['queryStringParameters']['passenger_id']
    embarked = event['queryStringParameters']['embarked']
    
    #Criar conexão com o DynamoDB
    my_boto3_session = boto3.Session(region_name='us-east-1')
    
    titanicTable = my_boto3_session.resource('dynamodb').Table('titanic-propensity-survive').get_item(Key={'passengerid': int(passenger_id)})['Item']
    
    #Fazer o tratamento do campo embarked para deixar com os valores conhecidos pelo modelo
    if embarked == "Cherbourg":
        embarked = "C"
    elif embarked == "Queenstown":
        embarked = "Q"
    elif embarked == "Southampton":
        embarked = "S"
        
    titanicTable['embarked'] = embarked
    
    del titanicTable['referencedate']
    del titanicTable['passengerid']
    del titanicTable['partition_0']
    
    predict_df = h2o.mojo_predict_pandas(pd.DataFrame(titanicTable, index=[0]).set_index('embarked', inplace=False), mojo_zip_path=BestModelId, genmodel_jar_path='./output_model/models/best/h2o-genmodel.jar', verbose=False).loc[:,('predict','p1')]
            
    def predict_func(predict):
        if predict == 0:
            return 'Not survive'
        elif predict == 1:
            return 'Survive'
        else:
            return 'predict_ERROR'
    
    predict_df['predict'] = predict_df.apply(lambda x: predict_func(x['predict']),axis=1).astype(str)
    
    predict_df['rating'] = predict_df.apply(lambda x: ratings(x['p1']),axis=1).astype(str)

    body = {
        "message": "Prediction executed successfully!"        
    }

    body['probability'] = round(predict_df['p1'][0],4)
    body['rating'] = predict_df['rating'][0]
    body['predict'] = predict_df['predict'][0]
    
    response = {
        "statusCode": 200,
        "body": json.dumps(body),
        "headers": {
            "Access-Control-Allow-Origin": "*"
        }
    }
    
    return response

In [37]:
# DEV
event={
    "queryStringParameters":{"passenger_id":"2", "embarked": "Cherbourg"}
}
context='context'
print(lambda_handler(event, context))

{'statusCode': 200, 'body': '{"message": "Prediction executed successfully!", "probability": 0.9596, "rating": "3", "predict": "Survive"}', 'headers': {'Access-Control-Allow-Origin': '*'}}


### 3. Votlar para o arquivo README.md e seguir as instruções em [AWS Cloudformation: Deploy usando Serverless](https://github.com/maxreis86/FIEP-Modelos-de-Aprendizado-e-Arquiteturas-Cloud#aws-cloudformation-deploy-usando-serverless).