# AutoML

This is a component that trains an AutoML model using [autosklearn](https://github.com/automl/auto-sklearn). 
<br>
auto-sklearn is an automated machine learning toolkit and a drop-in replacement for a scikit-learn estimator.

This notebook shows:
- how to use SDK to load the dataset and save a model.
- how to receive parameters from the platform.

In [None]:
dataset = "iris" #@param {type:"string"}
target = "col4" #@param {type:"string"}
experiment_id = "99284308-cd3f-47d4-ab71-9c57acbb4d7b" #@param {type:"string"}
duration = 60 #@param {type:"integer"}

## Load dataset (using the SDK)

In [None]:
from platiagro import load_dataset

df = load_dataset(name=dataset)
df

## Load metadata (using the SDK)

In [None]:
from platiagro import load_metadata

metadata = load_metadata(name=dataset)
metadata

## Get feature types

In [None]:
from platiagro.featuretypes import infer_featuretypes

try:
    featuretypes = metadata["featuretypes"]
except KeyError:
    featuretypes = infer_featuretypes(df)

## Encode categorical features

In [None]:
from sklearn.preprocessing import LabelEncoder

les = []
for i, ft in enumerate(featuretypes):
    if ft == "Categorical":
        x = df.iloc[:, i]
        le = LabelEncoder()
        le.fit(x)
        les.append(le)
        df[df.columns[i]] = le.transform(x)
df

## Split dataset into train/test splits

In [None]:
from sklearn.model_selection import train_test_split

y = df[target]
X = df.drop(target, axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y,  train_size=0.7)

## Fit a model using autosklearn

In [None]:
from autosklearn.regression import AutoSklearnRegressor
from autosklearn.classification import AutoSklearnClassifier
from autosklearn.metrics import accuracy

index = df.columns.tolist().index(target)
problem_type = featuretypes.pop(index)

if problem_type == "Categorical":
    estimator = AutoSklearnClassifier(
        time_left_for_this_task=duration,
        per_run_time_limit=duration,
        resampling_strategy="cv",
        resampling_strategy_arguments={"folds": 5},
        ensemble_size=5,
    )
    estimator.fit(X_train, y_train, feat_type=featuretypes, metric=accuracy)
    estimator.refit(X_train, y_train)

elif problem_type == "Numerical":
    estimator = AutoSklearnRegressor(
        time_left_for_this_task=duration,
        ensemble_size=5,
    )
    model.fit(X_train, y_train, feat_type=featuretypes)

## Save model (using the SDK)

In [None]:
from platiagro import save_model

save_model(name=experiment_id, model={"estimator": estimator, "label_encoder": les})