In [1]:
def getSecret(secretName):
    linked_service = "ifmpmvault"
    akv_name = "ifm-vault"

    # Fetch the key from Azure Key Vault
    secretValue = mssparkutils.credentials.getSecret(
        linkedService=linked_service,
        akvName=akv_name, 
        secret=secretName)
    return secretValue

In [2]:
feat_data = spark.sql('''select * from machine_data_features''')

split_date = "2022-12-21" #"2015-10-30"
training = feat_data.filter(feat_data.dt_truncated < split_date)
testing = feat_data.filter(feat_data.dt_truncated >= split_date)

drop_cols =['msdyn_customerassetid','dt_truncated','model','failure','msdyn_name', 'msdyn_productname', 'modifiedon','component', 'failure']
# Remove the extra names if that are in the input_features list
input_features = [x for x in feat_data.columns if x not in set(drop_cols)]

df_train = training.select(input_features).toPandas()

# print(training.count(),testing.count())

In [3]:
from sklearn.utils import resample
import pandas as pd
from collections import Counter

all_classes = Counter(df_train['label_e'])
majority_class = all_classes.most_common(1)
minority_classes = all_classes.most_common()[1:]

minority_classes_size = 0
for c in minority_classes:
    minority_classes_size += c[1]

desired_minority_classes_size = df_train['label_e'].count() * 0.5
# print(desired_minority_classes_size)

scale = int(int(desired_minority_classes_size) / minority_classes_size)
# print(scale)

df_0 = df_train[df_train['label_e'] == 0]

df_F1 = df_train[df_train['label_e'] == 1]
df_F1 = resample(df_F1,replace=True,n_samples=(len(df_F1) * scale),random_state=42)

df_F2 = df_train[df_train['label_e'] == 2]
df_F2 = resample(df_F2,replace=True,n_samples=(len(df_F2) * scale),random_state=42)

df_F3 = df_train[df_train['label_e'] == 3]
df_F3 = resample(df_F3,replace=True,n_samples=(len(df_F3) * scale),random_state=42)

df_F4 = df_train[df_train['label_e'] == 4]
df_F4 = resample(df_F4,replace=True,n_samples=(len(df_F4) * scale),random_state=42)

df_train_upsampled = pd.concat([df_0, df_F1,df_F2,df_F3,df_F4])

In [5]:
import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.core.dataset import Dataset

from azureml.train.automl.run import AutoMLRun
from azureml.train.automl import AutoMLConfig
from azureml.automl.runtime.onnx_convert import OnnxConverter
from azureml.core.model import Model
from azureml.core import Environment
from azureml.core.model import InferenceConfig
from azureml.core.webservice import AciWebservice
from azureml.core.webservice import Webservice

subscription_id = getSecret('subscription-id')
resource_group = getSecret('resource-group')
workspace_name = getSecret('workspace-name')

ws = Workspace(workspace_name = workspace_name,
               subscription_id = subscription_id,
               resource_group = resource_group)

In [6]:
# increase  the interation and experiment_timeout_hours as needed 
automl_settings = {
    "iterations": 10,
    "n_cross_validations": 5,
    "primary_metric": 'AUC_weighted',
    "enable_early_stopping": True,
    "max_concurrent_iterations": 5, 
    "model_explainability":True,
    "experiment_timeout_hours": 0.25
}
automl_config = AutoMLConfig(task = 'classification',
                             training_data = df_train_upsampled,
                             label_column_name = 'label_e',
                             **automl_settings
                            )
experiment = Experiment(ws, "PredictiveMaintenanceExperiment")

In [7]:
local_run = experiment.submit(automl_config, show_output=False)