# Azure AutoML Fair & Interpretable Model
We are using credit risk modelling dataset from Kaggle for the task. In this project, we train an AutoML model using Azure AutoML and after selecting the model,
 we assess it's fairness and interpretablity.

#### Install Required Packages

In [16]:
!pip install --upgrade fairlearn==0.6.2 raiwidgets azureml-contrib-fairness azureml-explain-model azureml-interpret

Requirement already up-to-date: fairlearn==0.6.2 in /anaconda/envs/azureml_py36/lib/python3.6/site-packages (0.6.2)
Requirement already up-to-date: raiwidgets in /anaconda/envs/azureml_py36/lib/python3.6/site-packages (0.8.0)
Requirement already up-to-date: azureml-contrib-fairness in /anaconda/envs/azureml_py36/lib/python3.6/site-packages (1.32.0)
Requirement already up-to-date: azureml-explain-model in /anaconda/envs/azureml_py36/lib/python3.6/site-packages (1.32.0)
Requirement already up-to-date: azureml-interpret in /anaconda/envs/azureml_py36/lib/python3.6/site-packages (1.32.0)


#### Get Data

In [17]:
from azureml.core import Workspace, Datastore, Dataset

# Get Workspace
ws = Workspace.from_config()

# Datastore Params
datastore_name = 'rohands'
storage_account_name = 'rohansa'
container_name ='rohan-blob'
# account_key = Add your account key for storage account

# Get Datastore otherwise create new datastore and register blob storage
if datastore_name in ws.datastores:
    blob_ds = Datastore.get(ws, datastore_name=datastore_name)
else:
    try:
        blob_ds = Datastore.register_azure_blob_container(
            workspace=ws,
            datastore_name=datastore_name,
            account_name=storage_account_name,
            #account_key = account_key
            container_name=container_name
        )
    except Exception as ex:
        print(ex)

# Set as Default Datastore
default_ds = ws.set_default_datastore(datastore_name)
default_ds = ws.get_default_datastore()

# Get Dataset otherise create new dataset and register
dataset_name = 'credit risk dataset'

if dataset_name not in ws.datasets:
    default_ds.upload_files(
        files=['./data/credit_risk_dataset.csv'],
        target_path='credit-risk-data/',
        overwrite=True,
        show_progress=True)
    
    tab_dataset = Dataset.Tabular.from_delimited_files(
        path=(default_ds,'credit-risk-data/*.csv'))
        
    try:
        tab_dataset = tab_dataset.register(workspace=ws, 
                                name=dataset_name,
                                description='credit risk data from Kaggle',
                                tags = {'format':'CSV'},
                                create_new_version=True)
        print('Dataset registered.')
    except Exception as ex:
        print(ex)
else:
    tab_dataset = Dataset.get_by_name(ws, dataset_name)
    print('Dataset already registered.')

train_ds, test_ds = tab_dataset.random_split(percentage=0.7, seed=999)

Dataset already registered.


##### Compute Target

In [None]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# Compute params
compute_name = 'rohan-vm-cluster'
training_cluster = None

if compute_name in ws.compute_targets:
    training_cluster = ComputeTarget(ws, compute_name)
    print("Using existing cluster.")
else:
    try:
        compute_config = AmlCompute.provisioning_configuration(
            vm_size ='STANDARD_DS11_V2', 
            max_nodes=2 )
        training_cluster = ComputeTarget.create(ws, compute_name, compute_config)
        training_cluster.wait_for_completion(show_output=True)
    except Exception as ex:
        print(ex)
    print("Cluster created.")

#### Configure and Run AML

In [None]:
from azureml.train.automl import AutoMLConfig
from azureml.core.experiment import Experiment
from azureml.widgets import RunDetails

# Configuration
automl_config = AutoMLConfig(name='Automated Credit Risk Modelling',
                             task='classification',
                             compute_target=training_cluster,
                             training_data = train_ds,
                             validation_data = test_ds,
                             label_column_name='loan_status',
                             iterations=4,
                             primary_metric = 'AUC_weighted',
                             max_concurrent_iterations=2,
                             featurization='auto'
                             )


# Run the Experiment
automl_experiment = Experiment(ws, 'azure-automl-fair-interpret-model')
automl_run = automl_experiment.submit(automl_config)
RunDetails(automl_run).show()
automl_run.wait_for_completion(show_output=True)

####  Get Best Run

#### Explain Model and Get Feature Importance Values

#### Detect and Mitigate Unfairness