In [1]:
from azureml.core import Workspace, Experiment
from azureml.core.authentication import InteractiveLoginAuthentication
from azureml.train.sklearn import SKLearn

In [2]:
interactive_auth = InteractiveLoginAuthentication(tenant_id="39288a38-ff19-432c-8011-1cd9d0dff445")
ws = Workspace(subscription_id="793146d9-d4dc-4a73-9728-76c4ffd0cc0d", resource_group="rg_dynamics_test", workspace_name="resdynml1test", auth=interactive_auth)

In [None]:
%%writefile ./src/train.py

from azureml.core import Run
from xgboost import XGBClassifier
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import recall_score, precision_score, hamming_loss, zero_one_loss
import os
import joblib

run = Run.get_context()

# load data
train_data = run.input_datasets['train_data'].to_pandas_dataframe()
test_data = run.input_datasets['test_data'].to_pandas_dataframe()

# split train/test and feat/target
X_train = train_data[[ col for col in train_data.columns if col.startswith('feat')]]
y_train = train_data[[ col for col in train_data.columns if col.startswith('target')]]
X_test = test_data[[col for col in test_data.columns if col.startswith('feat')]]
y_test = test_data[[ col for col in test_data.columns if col.startswith('target')]]

# train classifier
model = MultiOutputClassifier(
            XGBClassifier(n_jobs=-1, max_depth=3, learning_rate=0.1, n_estimators=100, reg_alpha=0, reg_lambda=1)
        )
model.fit(X_train, y_train)

# evaluate test data
y_pred = model.predict(X_test)
run.log('precision_macro', precision_score(y_test, y_pred, average='macro'))
run.log('precision_samples', precision_score(y_test, y_pred, average='samples'))
run.log('recall_macro', recall_score(y_test, y_pred, average='macro'))
run.log('recall_macro', recall_score(y_test, y_pred, average='samples'))
run.log('hamming_loss', hamming_loss(y_test, y_pred))
run.log('zero_one_loss', zero_one_loss(y_test, y_pred))

# evaluate train data
y_pred = model.predict(X_train)
run.log('precision_macro_train', precision_score(y_train, y_pred, average='macro'))
run.log('precision_samples_train', precision_score(y_train, y_pred, average='samples'))
run.log('recall_macro_train', recall_score(y_train, y_pred, average='macro'))
run.log('recall_macro_train', recall_score(y_train, y_pred, average='samples'))
run.log('hamming_loss_train', hamming_loss(y_train, y_pred))
run.log('zero_one_loss_train', zero_one_loss(y_train, y_pred))

# save model
os.makedirs('outputs', exist_ok=True)
joblib.dump(value=model, filename='outputs/model.pkl')

run.complete()

In [None]:
est = SKLearn(entry_script='train.py', source_directory='src',
             inputs=[    ws.datasets['ProductPredictionTrain'].as_named_input('train_data'), 
                         ws.datasets['ProductPredictionTest'].as_named_input('test_data')    ],
             compute_target='local')

In [None]:
exp = Experiment(ws, 'ProductPrediction')
run = exp.submit(est)
run.wait_for_completion(show_output=True)