ESPACE DE RECHERCHE / HYPERPARAMETRE

In [None]:
%%writefile conda_dependencies.yml

dependencies:
- python=3.6.2
- scikit-learn=0.24.1
- pip:
  - numpy==1.19.5
  - pandas==0.25.3
  - azureml-defaults

from azureml.core import Environment
sklearn_env = Environment.from_conda_specification(name = 'sklearn-env', file_path = './conda_dependencies.yml')

In [None]:
#-------------------------- ETAPE 1 : MODEL INIT

# connection au WS
from azureml.core import Workspace
ws = Workspace.get(subscription_id = "-----------",
                   resource_group = "------------",
                   name = "-------------")

ws.get_details()

# Execution du script en tant qu'experience
from azureml.core import Experiment, ScriptRunConfig, Environment
from azureml.core.conda_dependencies import CondaDependencies
from azureml.train.hyperdrive import HyperDriveConfig, PrimaryMetricGoal,RandomParameterSampling

# Create a Python environment for the experiment
sklearn_env = Environment("sklearn-env")
# Ensure the required packages are installed
packages = CondaDependencies.create(conda_packages=['scikit-learn','pip'],
                                    pip_packages=['azureml-defaults','pandas'])
sklearn_env.python.conda_dependencies = packages

# Create a script config + target compute ressource
script_config = ScriptRunConfig(source_directory=".",
                                script='scripts/training_script_hyperparametres.py',
                                compute_target = 'jucalcul1',
                                environment=sklearn_env) 

# Submit the experiment
experiment = Experiment(workspace=ws, name='test_hyperparametres')
run  = experiment.submit(config=script_config)

from azureml.widgets import RunDetails
RunDetails(run).show()

In [None]:
# save 
from azureml.core import Model
from azureml.core.resource_configuration import ResourceConfiguration

model = run.register_model(model_name='modele3', 
                           model_path='outputs/model.pkl',
                           model_framework=Model.Framework.SCIKITLEARN,
                           model_framework_version='0.24.1',
                           resource_configuration=ResourceConfiguration(cpu=1, memory_in_gb=0.5))

# deploy
hosting_model = Model.deploy(ws, "modele3", [model])

In [None]:
run.wait_for_completion(show_output=True)

In [None]:
#-------------------------- ETAPE 2 : TUNNING PARAMETERS
# Espace de recherche des hyperparamètres
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.parameter_expressions import choice
    

param_sampling = RandomParameterSampling( {
    "--n_estimators": choice(15, 50, 100, 200, 300),
    "--criterion": choice("gini", "entropy")
    }
)


hyperdrive_config = HyperDriveConfig(run_config=script_config,
                                     hyperparameter_sampling=param_sampling, 
                                     primary_metric_name='accuracy',
                                     primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                     max_total_runs=4,
                                     max_concurrent_runs=2)

# start the HyperDrive run
hyperdrive_run = experiment.submit(hyperdrive_config)
RunDetails(hyperdrive_run).show()

In [80]:
# get all metrics per ID
hyperdrive_run.get_metrics()

{'HD_c5c5f7d8-e312-4ae9-90c4-fbab81143c55_3': {'Train Accuracy': 0.9998582364615821,
  'Train Recall': 1.0,
  'Train Precison': 0.9998059759410167,
  'Train F1 Score': 0.9999029785582614,
  'Test Accuracy': 0.8958333333333334,
  'Test Recall': 0.9478930675124604,
  'Test Precison': 0.912739965095986,
  'Test F1 Score': 0.9299844409868859},
 'HD_c5c5f7d8-e312-4ae9-90c4-fbab81143c55_2': {'Train Accuracy': 1.0,
  'Train Recall': 1.0,
  'Train Precison': 1.0,
  'Train F1 Score': 1.0,
  'Test Accuracy': 0.8998015873015873,
  'Test Recall': 0.9565610859728507,
  'Test Precison': 0.910814304179233,
  'Test F1 Score': 0.9331273449569631},
 'HD_c5c5f7d8-e312-4ae9-90c4-fbab81143c55_1': {'Train Accuracy': 1.0,
  'Train Recall': 1.0,
  'Train Precison': 1.0,
  'Train F1 Score': 1.0,
  'Test Accuracy': 0.8958333333333334,
  'Test Recall': 0.947935368043088,
  'Test Precison': 0.9138900908697534,
  'Test F1 Score': 0.930601454064772},
 'HD_c5c5f7d8-e312-4ae9-90c4-fbab81143c55_0': {'Train Accuracy': 

In [81]:
hyperdrive_run.get_children()

<generator object Run._rehydrate_runs at 0x000001E4E4AD8430>

In [82]:
run = hyperdrive_run.get_context()
run

<azureml.core.run._OfflineRun at 0x1e4e4aa7bb0>

In [83]:
# ID du parent
run_ID = hyperdrive_run.get_details()['runId']
run_ID

'HD_c5c5f7d8-e312-4ae9-90c4-fbab81143c55'

In [84]:
# Child ID
summaryid = [0]
summarymetric = [0]

for child_run in hyperdrive_run.get_children():
    listid = child_run.id
    print(listid)
    summaryid.append(listid)
    list_metric = child_run.get_metrics()['Test Precison']
    print(list_metric)
    summarymetric.append(list_metric)
 

HD_c5c5f7d8-e312-4ae9-90c4-fbab81143c55_3
0.912739965095986
HD_c5c5f7d8-e312-4ae9-90c4-fbab81143c55_2
0.910814304179233
HD_c5c5f7d8-e312-4ae9-90c4-fbab81143c55_1
0.9138900908697534
HD_c5c5f7d8-e312-4ae9-90c4-fbab81143c55_0
0.9113321799307958


In [85]:
maxi = summarymetric.index(max(summarymetric))
maxi_id = summaryid[maxi]
print(maxi_id)

HD_c5c5f7d8-e312-4ae9-90c4-fbab81143c55_1


del max
max(summarymetric)

In [86]:
hyperdrive_run.get_children('child_run')

<generator object Run._rehydrate_runs at 0x000001E4E4AD83C0>

In [88]:
for child_run in hyperdrive_run.get_children():
    if child_run.id == maxi_id:
        child_run.register_model(model_name='rf_tuning', model_path='outputs/model.pkl')
        print("ok it's saved")

ok it's saved


In [89]:
for child_run in hyperdrive_run.get_children():
    print(child_run)

Run(Experiment: test_hyperparametres,
Id: HD_c5c5f7d8-e312-4ae9-90c4-fbab81143c55_3,
Type: azureml.scriptrun,
Status: Completed)
Run(Experiment: test_hyperparametres,
Id: HD_c5c5f7d8-e312-4ae9-90c4-fbab81143c55_2,
Type: azureml.scriptrun,
Status: Completed)
Run(Experiment: test_hyperparametres,
Id: HD_c5c5f7d8-e312-4ae9-90c4-fbab81143c55_1,
Type: azureml.scriptrun,
Status: Completed)
Run(Experiment: test_hyperparametres,
Id: HD_c5c5f7d8-e312-4ae9-90c4-fbab81143c55_0,
Type: azureml.scriptrun,
Status: Completed)


In [90]:
hyperdrive_run.get_details()

{'runId': 'HD_c5c5f7d8-e312-4ae9-90c4-fbab81143c55',
 'target': 'jucalcul1',
 'status': 'Completed',
 'startTimeUtc': '2021-08-30T06:39:50.438538Z',
 'endTimeUtc': '2021-08-30T06:46:50.647981Z',
 'properties': {'primary_metric_config': '{"name": "accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': '19fa999f-e701-4651-94b4-c430ba59c712',
  'user_agent': 'python/3.8.8 (Windows-10-10.0.19041-SP0) msrest/0.6.21 Hyperdrive.Service/1.0.0 Hyperdrive.SDK/core.1.33.0'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://deuxiemeespace6230319654.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_c5c5f7d8-e312-4ae9-90c4-fbab81143c55/azureml-logs/hyperdrive.txt?sv=2019-07-07&sr=b&sig=hA%2BflgGKFCxJj5XiOpw9y7m3%2Bx4T0dzKtgB%2FwogAS%2BU%3D&st=2021-08-30T06%3A37%3A16Z&se=2021-08-30T14%3A47%3A16Z&sp=r'},
 'submittedBy': 'justine charley'

In [None]:
#-------------------------- ETAPE 3 : ENREGISTREMENT

#récuperer le plus performant
best_run = hyperdrive_run.get_best_run_by_primary_metric()
print(best_run.get_details()['runDefinition']['arguments'])

In [None]:
#host / save the best one 
model = best_run.register_model(model_name='rf_tuning', model_path='outputs/model_rf.joblib')