Copyright (c) Microsoft. All rights reserved.

Licensed under the MIT license.

In [None]:
import azureml.core
from azureml.core import Workspace

# Load the workspace
ws = Workspace.from_config()

In [None]:
# Get the default datastore
default_ds = ws.get_default_datastore()

default_ds.upload_files(files=['./data/Customer.csv','./data/residents_source1.csv','./data/residents_source2.csv',
                               './data/payments.csv','./data/surveys.csv','./data/leases.csv','./data/workorders.csv'], # Upload the csv files in /data
                       target_path='propertymgmt-data/', # Put it in a folder path in the datastore
                       overwrite=True, # Replace existing files of the same name
                       show_progress=True)

In [None]:
from azureml.data import DataType
Customer_data_types = {
    'CustomerId': DataType.to_string(),
    'pid': DataType.to_string(),
    'surveytype': DataType.to_string(),
    'surverydate': DataType.to_datetime(),
    'question': DataType.to_string(),
    'answer': DataType.to_float(),
    'FirstName': DataType.to_string(),
    'LastName': DataType.to_string(),
    'Name': DataType.to_string(),
    'Gender': DataType.to_string(),
    'Email': DataType.to_string(),
    'Telephone': DataType.to_string(),
    'Country': DataType.to_string(),
    'City': DataType.to_string(),
    'State': DataType.to_string(),
    'PostCode': DataType.to_string(),
    'StreetAddress': DataType.to_string(),
    'DateOfBirth': DataType.to_datetime(),
    'CreatedDate': DataType.to_string(),
    'Source': DataType.to_string(),
    'SurveyEmail': DataType.to_string(),
    'sourcedata_residents_source1_cid': DataType.to_string(),
    'sourcedata_residents_source1_cid_Alternate': DataType.to_string(),
    'sourcedata_residents_source2_cid': DataType.to_string(),
    'sourcedata_residents_source2_cid_Alternate': DataType.to_string(),
    'sourcedata_surveys_sid': DataType.to_string(),
    'sourcedata_surveys_sid_Alternate': DataType.to_string()
}


In [None]:
#Create a Tabular dataset from the path on the datastore

from azureml.core import Dataset

tab_data_set = Dataset.Tabular.from_delimited_files(path=(default_ds, 'propertymgmt-data/Customer.csv'),
                                                    set_column_types=Customer_data_types)

tab_data_set = tab_data_set.register(workspace=ws,
                                        name='CustomerData',
                                        description='Customer Data',
                                        tags = {'format':'CSV'},
                                        create_new_version=True)


tab_data_set = Dataset.Tabular.from_delimited_files(path=(default_ds, 'propertymgmt-data/residents_source1.csv'))

tab_data_set = tab_data_set.register(workspace=ws,
                                        name='Residents1Data',
                                        description='Resident Data',
                                        tags = {'format':'CSV'},
                                        create_new_version=True)

tab_data_set = Dataset.Tabular.from_delimited_files(path=(default_ds, 'propertymgmt-data/residents_source2.csv'))

tab_data_set = tab_data_set.register(workspace=ws,
                                        name='Residents2Data',
                                        description='Resident Data',
                                        tags = {'format':'CSV'},
                                        create_new_version=True)

#Create a Tabular dataset from the path on the datastore
tab_data_set = Dataset.Tabular.from_delimited_files(path=(default_ds, 'propertymgmt-data/leases.csv'))

tab_data_set = tab_data_set.register(workspace=ws,
                                        name='LeasesData',
                                        description='Leases Data',
                                        tags = {'format':'CSV'},
                                        create_new_version=True)

#Create a Tabular dataset from the path on the datastore
tab_data_set = Dataset.Tabular.from_delimited_files(path=(default_ds, 'propertymgmt-data/payments.csv'))

tab_data_set = tab_data_set.register(workspace=ws,
                                        name='PaymentsData',
                                        description='Payments Data',
                                        tags = {'format':'CSV'},
                                        create_new_version=True)

#Create a Tabular dataset from the path on the datastore
tab_data_set = Dataset.Tabular.from_delimited_files(path=(default_ds, 'propertymgmt-data/surveys.csv'))

tab_data_set = tab_data_set.register(workspace=ws,
                                        name='SurveysData',
                                        description='Survey Data',
                                        tags = {'format':'CSV'},
                                        create_new_version=True)


#Create a Tabular dataset from the path on the datastore
tab_data_set = Dataset.Tabular.from_delimited_files(path=(default_ds, 'propertymgmt-data/workorders.csv'))

tab_data_set = tab_data_set.register(workspace=ws,
                                        name='WorkOrdersData',
                                        description='Work Orders Data',
                                        tags = {'format':'CSV'},
                                        create_new_version=True)

In [None]:
from azureml.core import Workspace, Dataset, Datastore, ScriptRunConfig, Experiment
from azureml.data.data_reference import DataReference
import os
import azureml.dataprep as dprep
import pandas as pd
import numpy as np
import scripts.pipeline_library as pl

import azureml.core
from azureml.core import Workspace


ws = Workspace.from_config()

customerData = Dataset.get_by_name(ws, name='CustomerData')
resident1Data = Dataset.get_by_name(ws, name='Residents1Data')
resident2Data = Dataset.get_by_name(ws, name='Residents2Data')
leaseData = Dataset.get_by_name(ws, name='LeasesData')
paymentData = Dataset.get_by_name(ws, name='PaymentsData')
surveyData = Dataset.get_by_name(ws, name='SurveysData')
workorderData = Dataset.get_by_name(ws, name='WorkOrdersData')

config = {
    "output_datastore" : None,
    "output_path" : None,
    "model" : None,
    "run" : None,
    "workspace": ws,
    "step_type" : "train",
    "model_folder" : "models",
    "model_name" : 'model',
    "description" : "Lease Renewal Prediction Model"
}

pl.pipeline_steps(customerData,resident1Data, resident2Data, leaseData,paymentData,surveyData,workorderData,config)

In [None]:
# from azureml.core import Workspace, Dataset, Datastore, ScriptRunConfig, Experiment
# from azureml.data.data_reference import DataReference
# import os
# import azureml.dataprep as dprep
# import pandas as pd
# import numpy as np
# import scripts.pipeline_library as pl

# import azureml.core
# from azureml.core import Workspace

# from azureml.core.model import Model

# ws = Workspace.from_config()

# customerData = Dataset.get_by_name(ws, name='CustomerData')
# resident1Data = Dataset.get_by_name(ws, name='Residents1Data')
# resident2Data = Dataset.get_by_name(ws, name='Residents2Data')
# leaseData = Dataset.get_by_name(ws, name='LeasesData')
# paymentData = Dataset.get_by_name(ws, name='PaymentsData')
# surveyData = Dataset.get_by_name(ws, name='SurveysData')
# workorderData = Dataset.get_by_name(ws, name='WorkOrdersData')

# import joblib
# model_name='model'
# model_path = Model.get_model_path(model_name=model_name, _workspace=ws)
# loaded_model = joblib.load(model_path)

# config = {
#     "output_datastore" : None,
#     "output_path" : None,
#     "model" : loaded_model,
#     "run" : None,
#     "workspace": ws,
#     "step_type" : "test",
#     "model_folder" : "models",
#     "model_name" : 'model',
#     "description" : "Lease Renewal Prediction Model"
# }

# pl.pipeline_steps(customerData,resident1Data, resident2Data, leaseData,paymentData,surveyData,workorderData,config)