# AutoML Classifier

This is a component that performs predictions using an AutoML Classifier implementation from [autosklearn](https://github.com/automl/auto-sklearn). 
<br>
auto-sklearn is an automated machine learning toolkit and a drop-in replacement for a scikit-learn estimator.

This notebook shows:
- how to use the [SDK](https://platiagro.github.io/sdk/) to load a model and other artifacts.
- how to use a model to provide real-time predictions.

In [None]:
%%writefile Model.py
import logging
from typing import List, Iterable, Dict, Union

import numpy as np
import pandas as pd
from platiagro import load_model
from platiagro.featuretypes import CATEGORICAL, NUMERICAL, infer_featuretypes
from sklearn.exceptions import NotFittedError 
from sklearn.utils.validation import check_is_fitted

logger = logging.getLogger(__name__)


class Model(object):
    def __init__(self, dataset: str = None, target: str = None, experiment_id: str = None, method: str = "predict_proba"):
        logger.info(f"dataset: {dataset}")
        logger.info(f"target: {target}")
        logger.info(f"experiment_id: {experiment_id}")
        logger.info(f"method: {method}")

        self.method = method

        # Load Artifacts: Estimator, Encoders, etc
        model = load_model(experiment_id=experiment_id)
        self.estimator = model["estimator"]
        self.feature_encoder = model["feature_encoder"]
        self.label_encoder = model["label_encoder"]
        self.columns = model["columns"]
        self.datetime_indexes = model["datetime_indexes"]
        self.categorical_indexes = model["categorical_indexes"]
        self.numerical_nan_replacement = model["numerical_nan_replacement"]
        self.categorical_nan_replacement = model["categorical_nan_replacement"]

    def class_names(self):
        if self.method == "predict_proba":
            return self.label_encoder.classes_.tolist()
        else:
            return ["class"]

    def predict(self, X: np.ndarray, feature_names: Iterable[str], meta: Dict = None) -> Union[np.ndarray, List, str, bytes]:
        """Takes an array (numpy) X and feature_names and returns an array of predictions.

        Args:
            X (numpy.array): Array-like with data.
            feature_names (iterable, optional): Array of feature names.
            meta (dict, optional): Dict of metadata.
        """
        # Put data in a pandas.DataFrame
        df = pd.DataFrame(X, columns=feature_names)

        # Replace NaNs
        df.fillna(self.categorical_nan_replacement, inplace=True)
        df.fillna(self.numerical_nan_replacement, inplace=True)

        # Put data back in a numpy.ndarray
        X = df[self.columns].to_numpy()

        # Remove datetime features
        X = X[:, np.where(~self.datetime_indexes)[0]]

        # Encode categorical features
        if np.ma.any(self.categorical_indexes):
            X[:, self.categorical_indexes] = \
                self.feature_encoder.transform(X[:, self.categorical_indexes])

        # Perform Prediction
        if self.method == "predict_proba":
            y_pred = self.estimator.predict_proba(X)
        else:
            y_pred = self.estimator.predict(X)
            y_pred = self.label_encoder.inverse_transform(y_pred)

        return y_pred

## Deployment Test

It simulates a model deployed by PlatIAgro

In [None]:
%%writefile env.sh
export MODEL_NAME="Model"
export API_TYPE="REST"
export SERVICE_TYPE="MODEL"
export PERSISTENCE=0
export LOG_LEVEL="DEBUG"
export PARAMETERS='[
{"type":"STRING","name":"dataset","value":"iris"},
{"type":"STRING","name":"target","value":"Species"},
{"type":"STRING","name":"experiment_id","value":"48f2668a-e31a-4b5a-a91a-28fd649f7adc"}]'

In [None]:
%%bash
source env.sh
seldon-core-microservice "$MODEL_NAME" "$API_TYPE" \
    --service-type "$SERVICE_TYPE" \
    --persistence "$PERSISTENCE" \
    --parameters "$PARAMETERS" \
    --log-level "$LOG_LEVEL" > log.txt 2>&1 &

ATTEMPT=0
until $(curl --output /dev/null --silent --head --fail http://localhost:5000/health/ping); do
    # exit process if not healthy after 10 seconds
    if [ "$ATTEMPT" -gt 10 ]; then
        cat log.txt
        exit 1
    fi
    ATTEMPT=$((ATTEMPT + 1))
    sleep 1
done
echo "Deployment successful. Waiting for requests."

## Make predictions

In [None]:
%%bash
curl -sSL localhost:5000/predict --data-binary @- << EOF
json={
    "data": {
        "names": ["SepalLengthCm","SepalWidthCm","PetalLengthCm","PetalWidthCm"],
        "ndarray": [
            [5.1,3.5,1.4,0.2]
        ]
    }
}
EOF

## View logs

In [None]:
!cat log.txt

## Cleans up the test

In [None]:
!ps -ef | grep [s]eldon-core-mic | awk '{print $2}' | xargs -r kill