In [None]:
import logging
from azureml.automl.core.constants import FeaturizationConfigMode
from azureml.automl.core.featurization import FeaturizationConfig

from matplotlib import pyplot as plt
import numpy as np
import pandas as pd

import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.train.automl import AutoMLConfig
from azureml.core.dataset import Dataset

import azureml.dataprep
print(azureml.dataprep.__version__)
import azureml.core
print(azureml.core.VERSION)


In [None]:
from azureml.core import Workspace

ws = Workspace.from_config()

In [None]:
from azureml.core import Datastore

sql_datastore = Datastore.get(workspace=ws, datastore_name="ado_sql_datastore")

In [None]:
from azureml.core import Dataset
from azureml.data.datapath import DataPath
query = DataPath(sql_datastore, 'SELECT *  FROM Improvements')
improvements_sql_ds = Dataset.Tabular.from_sql_query(query)

improvements_sql_ds.register(workspace=ws,
                             name="ai_ag_ado_improvements",
                             description = "Improvements from Azure DevOps",
                             create_new_version=True)

In [None]:
from azureml.core import Dataset

label ="dc_impact_score"
query_string = 'SELECT *, (POWER(1.5,MitigationScore) * POWER(2,Priority) * POWER(6.585, IsBlocker)) as dc_impact_score FROM FeedbackItems'

query = DataPath(sql_datastore, query_string)
feedback_sql_ds = Dataset.Tabular.from_sql_query(query)

feedback_sql_ds.register(workspace=ws,
                         name="ai_ag_ado_feedack",
                         description = "Feedback from Azure DevOps",
                         create_new_version=True)

In [None]:
# feedback_sql_pd = feedback_sql_ds.to_pandas_dataframe()
# 
# label ="dc_impact_score"
# 
# def dc_impact_score_calculation(mitigation_score, priority, is_blocker):
#     return (1.5**mitigation_score) * (2**priority) * (6.585**is_blocker)
#     
# 
# feedback_sql_pd[label] = dc_impact_score_calculation(feedback_sql_pd['MitigationScore'], feedback_sql_pd['Priority'], feedback_sql_pd['IsBlocker'])
# 
# file="temp"
# feedback_sql_pd.to_csv(file)
# feedback_sql_ds_labeled = Dataset.Tabular.from_delimited_files(path=file)

In [None]:
def split_dataset(dataset):
    # Split the dataset into train and test datasets
    train_data, test_data = dataset.random_split(percentage=0.8, seed=223)

    # Register the train dataset with your workspace
    train_data.register(workspace = ws, 
                        name = 'ai_ag_ado_feedack_train_dataset',
                        description = 'Feedback from Azure DevOps training data',
                        create_new_version=True)

    # Register the test dataset with your workspace
    test_data.register(workspace = ws, 
                       name = 'ai_ag_ado_feedack_test_dataset', 
                       description = 'Feedback from Azure DevOps test data',
                       create_new_version=True)
    return train_data, test_data
    
train_data, test_data = split_dataset(feedback_sql_ds)    

In [None]:
import logging
from azureml.train.automl import AutoMLConfig

automl_config = AutoMLConfig(task = 'regression',
                             debug_log = 'automl_errors.log',
                             training_data = train_data,
                             label_column_name = label,
                             verbosity = logging.INFO,
                             enable_early_stopping = True, 
                             experiment_timeout_minutes = 60,
                             max_concurrent_iterations = 4,
                             max_cores_per_iteration = -1,
                             n_cross_validations = 5,
                             primary_metric ='normalized_root_mean_squared_error',
                             preprocess=True
                            )

In [None]:
from azureml.core.experiment import Experiment
experiment = Experiment(ws, "ai-impact-score-experiment-dc-sql")

local_run = experiment.submit(automl_config, show_output=True)

In [None]:
best_run, fitted_model = local_run.get_output()

model = best_run.register_model(model_name='best_sql_dc_impact_score_model', model_path='./outputs/model.pkl')

In [None]:
print("Registered model:\n --> Name: {}\n --> Version: {}\n --> URL: {}".format(model.name, model.version, model.url))