<img src='https://github.com/jtobelem-simplon/prepa-dp100/blob/master/images/top.png?raw=true'>

# Configuration (à lancer avant tous les notebooks)

In [None]:
# version de python
import platform
platform.python_version()

In [None]:
# liste des kernels disponibles
!jupyter kernelspec list

In [None]:
# liste des environnements disponibles
!conda env list

In [None]:
# la liste des packages installés dans l'environnement
!conda list

In [None]:
# version de la SDK azureml
import azureml.core
print("Ready to use Azure ML", azureml.core.VERSION)

Si le notebook est executé en dehors d'Azure, il faut télécharger le fichier config.json depuis le portail https://portal.azure.com/, et le mettre dans le workspace qui contient le notebook.

Si le notebook est exécuté directement depuis le workspace Azure, le fichier de config devrait déjà être là.

In [None]:
# connexion au workspace
from azureml.core import Workspace

ws = Workspace.from_config()
print(ws.name, "loaded")

# Confiugure the dataset

In [None]:
from azureml.core import Dataset

dataset_name = 'titanic train dataset'

if 'titanic train dataset' in ws.datasets:
    print('ok to use the registered dataset : '+dataset_name)
else:
    print('please register dataset first')

In [None]:
# Split the dataset into training and validation subsets
titanic_ds = ws.datasets.get("titanic train dataset")
train_ds, test_ds = titanic_ds.random_split(percentage=0.7, seed=0)

In [None]:
titanic_ds.to_pandas_dataframe().head()

# Configure the compute target

In [None]:
compute_name = "aml-cluster"

if compute_name in ws.compute_targets:
    compute_target = ws.compute_targets[compute_name]
    print('found compute target. just use it. ' + compute_name)
        
else :
    compute_target = 'local'
    print('compute target not found. work locally.')

In [None]:
compute_target = 'local'

# Configure Automated Machine Learning

[configurer une expérience autoML](https://docs.microsoft.com/fr-fr/azure/machine-learning/how-to-configure-auto-train#configure-your-experiment-settings)

[liste des modeles utilisés](https://docs.microsoft.com/en-us/python/api/azureml-train-automl-client/azureml.train.automl.constants.supportedmodels?view=azure-ml-py)

In [None]:
from azureml.train.automl import AutoMLConfig

automl_config = AutoMLConfig(name='Automated ML Experiment on titanic dataset',
                             task='classification',
                             compute_target=compute_target,
                             #enable_local_managed=True,
                             allowed_models=['KNN'],
                             blocked_models=['XGBoostClassifier'],
                             training_data = train_ds,
                             validation_data = test_ds,
                             label_column_name='Survived',
                             iterations=6,
                             primary_metric = 'AUC_weighted',
                             max_concurrent_iterations=4,
                             featurization='auto'
                             )

print("Ready for Auto ML run.")

# Run an Automated Machine Learning Experiment

In [None]:
from azureml.core.experiment import Experiment
from azureml.widgets import RunDetails

print('Submitting Auto ML experiment...')
automl_experiment = Experiment(ws, 'titanic_automl')
automl_run = automl_experiment.submit(automl_config)
RunDetails(automl_run).show()
automl_run.wait_for_completion(show_output=True)

In [None]:
best_run, fitted_model = automl_run.get_output()
print(best_run)
print(fitted_model)
best_run_metrics = best_run.get_metrics()
for metric_name in best_run_metrics:
    metric = best_run_metrics[metric_name]
    print(metric_name, metric)

In [None]:
if fitted_model :
    for step in fitted_model.named_steps:
        print(step)

In [None]:
from azureml.core import Model

# Register model
best_run.register_model(model_path='outputs/model.pkl', model_name='titanic_model_automl',
                        tags={'Training context':'Auto ML'},
                        properties={'AUC': best_run_metrics['AUC_weighted'], 'Accuracy': best_run_metrics['accuracy']})

# List registered models
for model in Model.list(ws):
    print(model.name, 'version:', model.version)
    for tag_name in model.tags:
        tag = model.tags[tag_name]
        print ('\t',tag_name, ':', tag)
    for prop_name in model.properties:
        prop = model.properties[prop_name]
        print ('\t',prop_name, ':', prop)
    print('\n')

# Utilisation du meilleur modèle pour faire des prédictions

In [None]:
X_test = ws.datasets.get("titanic test dataset").to_pandas_dataframe()

In [None]:
from azureml.core.model import Model
Model.list(ws)

In [None]:
model_path = Model.get_model_path("titanic_model_automl", 4, ws)

In [None]:
import joblib
model = joblib.load(model_path)