<img src='https://github.com/jtobelem-simplon/prepa-dp100/blob/master/images/top.png?raw=true'>

# Configuration (à lancer avant tous les notebooks)

In [None]:
# version de python
import platform
platform.python_version()

In [None]:
# la liste des packages installés
!conda list

In [None]:
# version de la SDK azureml
import azureml.core
print("Ready to use Azure ML", azureml.core.VERSION)

Si le notebook est executé en dehors d'Azure, il faut télécharger le fichier config.json depuis le portail https://portal.azure.com/, et le mettre dans le workspace qui contient le notebook.

Si le notebook est exécuté directement depuis le workspace Azure, le fichier de config devrait déjà être là.

In [None]:
# connexion au workspace
from azureml.core import Workspace

ws = Workspace.from_config()
print(ws.name, "loaded")

# Création du script

In [None]:
import os, shutil

# Create a folder for the experiment files
script_folder_name = 'script/2-titanic-files'
os.makedirs(script_folder_name, exist_ok=True)

# Copy the data file into the experiment folder
shutil.copy('data/titanic/train.csv', os.path.join(script_folder_name, "titanic.csv"))

<img src='https://github.com/jtobelem-simplon/prepa-dp100/blob/master/images/designer.png?raw=true'>

In [None]:
%%writefile $script_folder_name/titanic_training.py
from azureml.core import Run
import joblib

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import accuracy_score

# Get the experiment run context
run = Run.get_context()

train_data = pd.read_csv("titanic.csv")

features = ["Age","Pclass","SibSp", "Parch", "Fare","Sex", "Embarked"]

X = pd.get_dummies(train_data[features])
y = train_data["Survived"]

# missing values
imputer = SimpleImputer(strategy='most_frequent')
imputed_X = pd.DataFrame(imputer.fit_transform(X))
imputed_X.columns = X.columns
imputed_X[["Age","Pclass","SibSp", "Parch", "Fare"]] = imputed_X[["Age","Pclass","SibSp", "Parch", "Fare"]].astype('int')


# Break off validation set from training data
X_train, X_valid, y_train, y_valid = train_test_split(imputed_X, y, train_size=0.8, test_size=0.2,
                                                                random_state=0)

# model
model = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=1)
model.fit(X_train, y_train)


# score
predictions = model.predict(X_valid)

mae = mean_absolute_error(predictions.astype('int'), y_valid)
acc = accuracy_score(y_valid, predictions.astype('int'))
print("mae : {}, accuracy : {}".format(mae, acc))
run.log('mae', mae)
run.log('acc',acc)

# Save the trained model in the outputs folder
os.makedirs('outputs', exist_ok=True)
joblib.dump(value=model, filename='outputs/titanic_model.pkl')

run.complete()

# Entrainement d'un modèle sur la machine locale

[tutoriel microsoft : partie 1](https://docs.microsoft.com/en-us/azure/machine-learning/tutorial-train-models-with-aml)

[tutoriel microsoft : partie 2](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-train-ml-models)

- [experiment dans la SDK](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.experiment.experiment?view=azure-ml-py)

- [estimator dans la SDK](https://docs.microsoft.com/en-us/python/api/azureml-train-core/azureml.train.estimator.estimator?view=azure-ml-py)

NB : en cas d'erreur avec docker, https://askubuntu.com/questions/477551/how-can-i-use-docker-without-sudo

`sudo usermod -aG docker $USER`

In [None]:
from azureml.train.estimator import Estimator
from azureml.core import Experiment

# Create an Azure ML experiment in your workspace
experiment = Experiment(workspace = ws, name = "titanic-training-experiment")

# Create an estimator
estimator = Estimator(source_directory=script_folder_name,
                      entry_script='titanic_training.py',
                      compute_target='local',
                      conda_packages=['scikit-learn']
                      )

# Run the experiment based on the estimator
run = experiment.submit(config=estimator)
run.wait_for_completion(show_output=True)

In [None]:
from azureml.widgets import RunDetails

RunDetails(run).show()

# Enregistrer le modèle

In [None]:
from azureml.core import Model

# Register the model
run.register_model(model_path='outputs/titanic_model.pkl', model_name='titanic_model',
                   tags={'Training context':'Estimator'},
                   properties={'Mean Absolute Error': run.get_metrics()['mae'], 'Accuracy': run.get_metrics()['acc']})

In [None]:
# List registered models
for model in Model.list(ws):
    print(model.name, 'version:', model.version)
    for tag_name in model.tags:
        tag = model.tags[tag_name]
        print ('\t',tag_name, ':', tag)
    for prop_name in model.properties:
        prop = model.properties[prop_name]
        print ('\t',prop_name, ':', prop)
    print('\n')

# Ne pas oublier à la fin de l'expérience!!
(si votre travail à utilisé une instance de calcul)

<img src='https://github.com/jtobelem-simplon/prepa-dp100/blob/master/images/down.png?raw=true'>



In [None]:
# stop toutes les instances de calcul
from azureml.core.compute import ComputeTarget, AmlCompute, ComputeInstance
from azureml.core.compute_target import ComputeTargetException

for compute in ComputeTarget.list(ws):
    if type(compute) is ComputeInstance and compute.get_status().state != 'Stopped':
        print('try to stop compute', compute.name)
        compute.stop(show_output=True)

In [None]:
# liste tous les compute pour vérifier qu'elles sont éteintes
for compute in ComputeTarget.list(ws):
    if type(compute) is ComputeInstance:
        print(compute.name, compute.get_status())

# Ressources

[api azure](https://docs.microsoft.com/en-us/python/api/azureml-core)

[parcours d'apprentissage microsoft](https://docs.microsoft.com/fr-fr/learn/paths/build-ai-solutions-with-azure-ml-service/)

[le repository microsoft](https://github.com/MicrosoftDocs/mslearn-aml-labs.git)