# Autopilot Example
>__NOTE:__ Make sure to use the Pyton 3 (Data Science) Jupyter Kernel.

In [None]:
import sagemaker
import pandas as pd

role = sagemaker.get_execution_role()
session = sagemaker.session.Session()

## Download Data

In [None]:
column_names = ["sex", "length", "diameter", "height", "whole_weight", "shucked_weight", "viscera_weight", "shell_weight", "rings"]
abalone_data = pd.read_csv("http://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data", names=column_names)
abalone_data.to_csv("abalone_with_headers.csv", index=False)

## Create the Autopilot Experiment

In [None]:
from sagemaker.automl.automl import AutoML
automl_job = AutoML(
    role=role,
    target_attribute_name="rings",
    output_path=f"s3://{session.default_bucket()}/abalone-v1/output",
    base_job_name="abalone",
    sagemaker_session=session,
    max_candidates=250
)


## Start the Autopilot Experiment

In [None]:
automl_job.fit(inputs=session.upload_data("abalone_with_headers.csv", bucket=session.default_bucket(), key_prefix="abalone-v1/input"), wait=False)

## Analyze the Autopilot Experiment

>__NOTE:__ Wait until the Autopilot Experiment has completed before proceeding.

In [None]:
from sagemaker.analytics import ExperimentAnalytics
automl_experiment = ExperimentAnalytics(
    sagemaker_session=session,
    experiment_name="{}-aws-auto-ml-job".format(automl_job.describe_auto_ml_job()["AutoMLJobName"])
)

In [None]:
df = automl_experiment.dataframe()
df = df.filter(["TrialComponentName","validation:accuracy - Last", "train:accuracy - Last"])
df = df.sort_values(by="validation:accuracy - Last", ascending=False)[:5]
df

## Plot Trial Comparison

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

legend_colors = ["r", "b", "g", "c", "m"]
ig, ax = plt.subplots(figsize=(15, 10))
legend = []
i = 0
for column, value in df.iterrows():
    ax.plot(value["train:accuracy - Last"], value["validation:accuracy - Last"], "o", c=legend_colors[i], label=value.TrialComponentName)
    i +=1
plt.title("Training vs.Testing Accuracy", fontweight="bold", fontsize=14)
plt.ylabel("validation:accuracy - Last", fontweight="bold", fontsize=14)
plt.xlabel("train:accuracy - Last", fontweight="bold", fontsize=14)
plt.grid()
plt.legend()
plt.show()

## Best Candidate Overview

### Best Candidate Job

In [None]:
automl_job.best_candidate()["CandidateName"]

### Best Candidate Evaluation Metrics

In [None]:
automl_job.best_candidate()["FinalAutoMLJobObjectiveMetric"]

## Candidate Artifacts

### Data Exploration Notebook 

In [None]:
automl_job.describe_auto_ml_job()["AutoMLJobArtifacts"]["DataExplorationNotebookLocation"]

### Candidate Definition Notebook

In [None]:
automl_job.describe_auto_ml_job()["AutoMLJobArtifacts"]["CandidateDefinitionNotebookLocation"]

### Explainability Report

In [None]:
automl_job.describe_auto_ml_job()["BestCandidate"]["CandidateProperties"]["CandidateArtifactLocations"]["Explainability"]

## Deploy the Best Candidate

>__NOTE:__ Deploying the Best Model will incur AWS usage costs.

In [None]:
automl_job.deploy(
    initial_instance_count=1,
    instance_type="ml.m5.xlarge",
    candidate=automl_job.best_candidate(),
    sagemaker_session=session,
    endpoint_name="-".join(automl_job.best_candidate()["CandidateName"].split("-")[0:7])
)

## Cleanup

### Delete Hoasted Endpoint

In [None]:
!aws sagemaker delete-endpoint --endpoint-name {"-".join(automl_job.best_candidate()["CandidateName"].split("-")[0:7])}

### Delete the Endpoint Configuration

In [None]:
!aws sagemaker delete-endpoint-config --endpoint-config-name {"-".join(automl_job.best_candidate()["CandidateName"].split("-")[0:7])}