# Transformation Graph - Auto Featuring - Deployment
This is a component that uses a transformation graph for AutoFeaturing.

This notebook shows:
- how to use the [SDK](https://platiagro.github.io/sdk/) to load datasets, save models and other artifacts.
- how to declare parameters and use them to build reusable components.

## Wrapping Model

Allows your component to expose a service over REST.

To wrap your model [follow the instructions](https://docs.seldon.io/projects/seldon-core/en/v0.3.0/python/python_component.html) for your chosen language or toolkit.

In [None]:
%%writefile Model.py
import logging
from typing import List, Iterable, Dict, Union

import numpy as np
import pandas as pd
from platiagro import load_model

logger = logging.getLogger(__name__)


class Transformer:
    
    def __init__(self):
        # Available transformations
        self.av_transformations = {"numeric" : ["sin", "cos", "tan", "square"],
                                "grouped": ["mean", "median", "std", "sum"],
                                "time": ["second", "minute", "hour", "day", "month", "year", 'dayofweek']}
    
    
    def apply_numeric(self, data, column):

        trans, col = column.split('---')

        np_func = getattr(np, trans)

        return data[col].apply(lambda x: np_func(x))


    def apply_timely(self, data, column):

        trans, col = column.split('---')

        data[col] = pd.to_datetime(data[col])

        if trans == 'dayofweek' or trans == 'weekday':
            return data[col].dt.dayofweek
        else:
            return data[col].apply(lambda x : getattr(x, trans))


    def is_number_repl_isdigit(self, s):
        """
        Returns True ifstring is a number.
        """
        return s.replace('.','',1).isdigit()


    def apply_grouped(self, data, trans_applied, column):

        new_data = data.copy()
        distinct_labels = trans_applied[column]

        new_data['temp_grouping_column'] = 0

        for label in distinct_labels:
            # Convert to original type
            c_label = label
            if self.is_number_repl_isdigit(label):
                if label.isdigit():
                    c_label = int(label)
                else:
                    c_label = float(label)

            new_data.loc[new_data[column.split('---')[2]] == c_label, 'temp_grouping_column'] = distinct_labels[label]

        # Verify if a label was not in the distinct labels
        if 0 in new_data['temp_grouping_column'].unique():
            new_data['temp_grouping_column'] = new_data[new_data['temp_grouping_column'] == 0][column.split('---')[1]]

        return new_data['temp_grouping_column']
    
    
    def transform(self, data, trans_applied, column):
    
        if trans_applied[column] == "":

            if column.split('---')[0] in self.av_transformations['numeric']:
                data[column] = self.apply_numeric(data, column)
            else:
                data[column] = self.apply_timely(data, column)

        else:
            data[column] = self.apply_grouped(data, trans_applied, column)

        return data


class Model(object):
    
    def __init__(self, dataset: str = None, target: str = None):
        logger.info(f"dataset: {dataset}")
        logger.info(f"target: {target}")

        # Load Artifacts: Estimator, Encoders, etc
        model = load_model()
        self.features = model["features"]
        self.transformations = model["transformations"]
        self.features_names_training = model["columns"]

        
    def class_names(self):
        return self.features_names_training

    
    def predict(self, X: np.ndarray, feature_names: Iterable[str], meta: Dict = None) -> Union[np.ndarray, List, str, bytes]:
        """Takes an array (numpy) X and feature_names and returns an array of predictions.

        Args:
            X (numpy.array): Array-like with data.
            feature_names (iterable, optional): Array of feature names.
            meta (dict, optional): Dict of metadata.
        """
        data = pd.DataFrame(data=X, columns=self.features_names_training)
        
        if feature_names:
            # Before feeding the model with 'X', resort its features like the training data
            df = pd.DataFrame(X, columns=feature_names)
            data = df[self.features_names_training]
        
        t = Transformer()
        for column in self.transformations:
            data = t.transform(data, self.transformations, column)
        
        return data.to_numpy()

## API Contract

There are two sections:

- `features` : The feature array you intend to send in a request
- `targets` : The response you expect back

Each section has a list of definitions. Each definition consists of:

- `name` : String : The name of the feature
- `ftype` : one of CONTINUOUS, CATEGORICAL : the type of the feature
- `dtype` : One of FLOAT, INT : Required for ftype CONTINUOUS : What type of feature to create
- `values` : list of Strings : Required for ftype CATEGORICAL : The possible categorical values
- `range` : list of two numbers : Optional for ftype CONTINUOUS : The range of values (inclusive) that a continuous value can take
- `repeat` : integer : Optional value for how many times to repeat this value
- `shape` : array of integers : Optional value for the shape of array to coerce the values

In [None]:
%%writefile contract.json
{
    "features": [
        {
            "name": "Abbrev",
            "dtype": "INT",
            "ftype": "continuous",
            "range": [1, 15]
        },
        {
            "name": "Rep",
            "dtype": "FLOAT",
            "ftype": "continuous",
            "range": [1.0, 3.0]
        },
        {
            "name": "Locality",
            "dtype": "INT",
            "ftype": "continuous",
            "range": [0, 7]
        },
        {
            "name": "Map_Ref",
            "dtype": "INT",
            "ftype": "continuous",
            "range": [1, 13]
        },
        {
            "name": "Latitude",
            "dtype": "INT",
            "ftype": "continuous",
            "range": [1, 11]
        },
        {
            "name": "Altitude",
            "dtype": "FLOAT",
            "ftype": "continuous",
            "range": [100, 300]
        },
        {
            "name": "Rainfall",
            "dtype": "FLOAT",
            "ftype": "continuous",
            "range": [850, 1750]
        },
        {
            "name": "Frosts",
            "dtype": "FLOAT",
            "ftype": "continuous",
            "range": [-2.0, -3.0]
        },
        {
            "name": "Year",
            "dtype": "INT",
            "ftype": "continuous",
            "range": [1800, 1900]
        },
        {
            "name": "Sp",
            "dtype": "INT",
            "ftype": "continuous",
            "range": [4, 26]
        },
        {
            "name": "PMCno",
            "dtype": "FLOAT",
            "ftype": "continuous",
            "range": [1500, 3200]
        },
        {
            "name": "DBH",
            "dtype": "FLOAT",
            "ftype": "continuous",
            "range": [5.0, 50.0]
        },
        {
            "name": "Ht",
            "dtype": "FLOAT",
            "ftype": "continuous",
            "range": [4.0, 13.0]
        },
        {
            "name": "Surv",
            "dtype": "FLOAT",
            "ftype": "continuous",
            "range": [20.0, 100.0]
        },
        {
            "name": "Vig",
            "dtype": "FLOAT",
            "ftype": "continuous",
            "range": [2.0, 5.0]
        },
        {
            "name": "Ins_res",
            "dtype": "FLOAT",
            "ftype": "continuous",
            "range": [1.0, 5.0]
        },
        {
            "name": "Stem_Fm",
            "dtype": "FLOAT",
            "ftype": "continuous",
            "range": [1.0, 5.0]
        },
        {
            "name": "Crown_Fm",
            "dtype": "FLOAT",
            "ftype": "continuous",
            "range": [1.0, 5.0]
        },
        {
            "name": "Brnch_Fm",
            "dtype": "FLOAT",
            "ftype": "continuous",
            "range": [1.0, 5.0]
        },
        {
            "name": "Utility",
            "dtype": "INT",
            "ftype": "continuous",
            "range": [0, 4]
        }
    ],
    "targets": []
}

## Test Deployment

Starts a service wrapping a Model, sends a request to the service, and shows the response.

In [None]:
from platiagro.deployment import test_deployment

test_deployment("contract.json")