In [None]:
# test
!pip install xgboost==1.4.1

In [None]:
!pip install pickle5

In [None]:
!pip install scikit-learn==0.23.2

In [4]:
# AWS Sagemaker
import sagemaker
import boto3
from sagemaker.session import s3_input, Session

In [None]:
# AWS Sagemaker: Define bucket's name for project
bucket = 'ls-aishub-inflated'
my_region = boto3.session.Session().region_name
print(f"Region: {my_region}")

# Get permission to read from S3 buckets
from sagemaker import get_execution_role
role = get_execution_role()
print(f"Role: {role}")

In [None]:
import warnings
import sklearn
import numpy as np
import pandas as pd
import xgboost as xgb
import pickle5 as pickle

print(f"Packages' versions:")
print(f"\tScikit-learn: {sklearn.__version__}")
print(f"\tNumpy: {np.__version__}")
print(f"\tPandas: {pd.__version__}")
print(f"\tXGBoost: {xgb.__version__}")

In [27]:
def load_dev_csv(target, data='containershipdb_vu_devset.csv', **kwargs):
    print(f"Load data...")
    location = f's3://{bucket}/experimental_data/{data}'
    usecols_static = [
        "ais_dim_a",
        "ais_dim_b",
        "ais_dim_c",
        "ais_dim_d",
        "draught_fact",
    ] + [target]
    df = pd.read_csv(
        location,
        header=0,
        usecols=usecols_static,
        **kwargs,
    )
    return df

In [28]:
def make_example_df(target):
    """Create example df for checking model prediction
    Model fitted on pandas data frame, due that convert to df
    """
    # Get instances from DEV set
    rows = [0, 600, 800]
    df = load_dev_csv(target=target, nrows=1000).iloc[rows, :]

    # With 1 data example
    example_df_1 = df.iloc[[0], :-1]
    y_true_1 = df.iloc[[0], -1].to_list()
    # With 2 data examples
    example_df_2 = df.iloc[[1, 2], :-1]
    y_true_2 = df.iloc[[1, 2], -1].to_list()

    examples = [(example_df_1, y_true_1), (example_df_2, y_true_2)]

    return examples

In [29]:
def load_model(bucket, model_name):
    print(f"Load model...")
    model_path = f"models/vessel-utilization/{model_name}/model.pkl"
    model = "model.pkl"
    
    s3client = boto3.client('s3')
    s3client.download_file(bucket, model_path, model)

    with open(model, "rb") as f:
        model = pickle.load(f)
    return model

In [30]:
def mape(y_true, y_pred):
    """Mean absolute percentage error regression loss.

    Examples
    --------
    >>> from sklearn.metrics import mean_absolute_percentage_error
    >>> y_true = np.array([3, -0.5, 2, 7])
    >>> y_pred = np.array([2.5, 0.0, 2, 8])
    0.3273809523809524...

    >>> y_true = np.array([1.0, -1.0, 2, -2])
    >>> y_pred = np.array([0.7, -0.7, 1.4, -1.4])
    >>> mean_absolute_percentage_error(y_true, y_pred)
    0.30000000000000004...
    """
    # Epsilon: is an arbitrary small yet strictly positive numbe
    # to avoid undefined results when y is zero
    epsilon = np.finfo(np.float64).eps
    ape = np.abs(y_pred - y_true) / np.maximum(np.abs(y_true), epsilon)
    return np.average(ape)

In [31]:
def predict_example_df(model, target):
    examples = make_example_df(target=target)
    for e in examples:
        print(f"\nTest example w/ {len(e[0])} instances:")
        print(e[0])
        # Make prediction
        prediction = model.predict(e[0])
        mare = mape(e[1], prediction)
        # Print out
        print(f"\nReal Value: {e[1]}")
        print(f"Predicted Value: {list(prediction)}")
        print(f"Mean Absolute Ratio Error: {mare}\n")

In [32]:
def main(target=None):
    """Checks a one-off prediction using the trained model.

    Args:
        * model_name (str): Folder's  name for separatly saved model.\
            Defaults to None.
    """
    print(f"--------------- START: Check an one-off prediction ----------------")
    warnings.filterwarnings(action='ignore', category=UserWarning)
    if target is None:
        print(f"No target's name...")
        pass
    
    elif  target == "vu_estimated":
        print(f"Target is: <{target}>")
        model_name = "XGBRegressor-vu_estimated_0c1914e51e754b3893110f91279301f5"
    
    elif target == "teu_estimated":
        print(f"Target is: <{target}>")
        model_name = "XGBRegressor-teu_estimated_9ffee2d3fa3d469ea7bfc9df7e5d317f"
        
        
    model = load_model(bucket="ft-mlops", model_name=model_name)
    predict_example_df(model, target)
    print(f"!!! DONE: an one-off prediction passed !!!")

In [33]:
# To test perdiction for <vu_estimated>
main(target="vu_estimated")

--------------- START: Check an one-off prediction ----------------
Target is: <vu_estimated>
Load model...
Load data...

Test example w/ 1 instances:
   ais_dim_a  ais_dim_b  ais_dim_c  ais_dim_d  draught_fact
0      150.0       53.0        7.0       20.0           7.8

Real Value: [0.2432875019516509]
Predicted Value: [0.24320531]
Mean Absolute Ratio Error: 0.0003378432381727954


Test example w/ 2 instances:
     ais_dim_a  ais_dim_b  ais_dim_c  ais_dim_d  draught_fact
600      154.0       49.0        3.0       23.0           9.7
800      170.0       20.0       21.0        7.0          10.2

Real Value: [0.4631891770538467, 0.8733295127026977]
Predicted Value: [0.46559918, 0.8723557]
Mean Absolute Ratio Error: 0.0031590602840265527

!!! DONE: an one-off prediction passed !!!


In [34]:
# To test perdiction for <teu_estimated>
main(target="teu_estimated")

--------------- START: Check an one-off prediction ----------------
Target is: <teu_estimated>
Load model...
Load data...

Test example w/ 1 instances:
   ais_dim_a  ais_dim_b  ais_dim_c  ais_dim_d  draught_fact
0      150.0       53.0        7.0       20.0           7.8

Real Value: [349.84742780647395]
Predicted Value: [351.7416]
Mean Absolute Ratio Error: 0.005414302661643374


Test example w/ 2 instances:
     ais_dim_a  ais_dim_b  ais_dim_c  ais_dim_d  draught_fact
600      154.0       49.0        3.0       23.0           9.7
800      170.0       20.0       21.0        7.0          10.2

Real Value: [580.8392280255238, 1640.9861543683692]
Predicted Value: [560.57855, 1582.0873]
Mean Absolute Ratio Error: 0.03538704636646764

!!! DONE: an one-off prediction passed !!!
