# Example Mitigation Run

### Import main libraries and the mitigate_disparity module

In [None]:
import platform
import os
import logging
from datetime import datetime
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import mitigate_disparity as mit

### Prepare paths, load data, and split data into train and test dataframes

In [None]:
# Define OS
try: 
    op_sys = platform.system()
except Exception as e:
    print('Error:', e)

In [None]:
# Define Input Data Paths for OS
in_data_directory_linux_unix = r'../data'
in_data_directory_windows = r'..\data'

# Load Data (IF USER IS NOT PASSING DATAFRAME INTO METHOD)
if op_sys == 'Windows':
    input_df = pd.read_csv(os.path.join(in_data_directory_windows, 'diabetes_data.csv'))
elif op_sys == 'Linux' or op_sys == 'Darwin':
    input_df = pd.read_csv(os.path.join(in_data_linux_unix, 'diabetes_data.csv'))
else: print('Error: Unknown OS!')    
train_df, test_df = train_test_split(input_df, test_size=0.4, random_state=42)

### Set values that will be referenced by the Mitigator class and its methods

In [None]:
# CRITICAL: Enter column names as a list of each demographic feature (as strings) you want to measure social fairness for
## NOTE: CATEGORICAL FEATURES MUST BE CODED AS INTEGERS
protected_features = ['race','age','gender']


# CRITICAL: List all features that will be input into the LightGBM model as predictors
model_features = ['admission_type_id', 'discharge_disposition_id', 'admission_source_id',
                  'time_in_hospital', 'num_lab_procedures', 'num_procedures',
                  'num_medications', 'number_outpatient', 'number_emergency',
                  'number_inpatient', 'number_diagnoses', 'max_glu_serum', 'A1Cresult',
                  'change', 'diabetesMed', 'service_utilization',
                  'numchange', 'repaglinide_', 'nateglinide_', 'chlorpropamide_',
                  'acetohexamide_', 'tolbutamide_', 'acarbose_', 'miglitol_',
                  'troglitazone_', 'tolazamide_', 'insulin_', 'metformin_', 'glyburide_',
                  'glipizide_', 'glimepiride_', 'pioglitazone_', 'rosiglitazone_',
                  'glyburide-metformin_', 'glipizide-metformin_',
                  'glimepiride-pioglitazone_', 'metformin-rosiglitazone_',
                  'metformin-pioglitazone_', 'level1_diag1']

# CRITICAL: Model features that are categorical
## ## NOTE: CATEGORICAL FEATURES MUST BE CODED AS INTEGERS
cat_features = ['admission_type_id', 'discharge_disposition_id', 'admission_source_id']

# CRITICAL: Enter column name of true label value
y_bar = 'readmitted'

# CRTIICAL: Enter column name of sample weights
samp_weight = 'sw'

# CRITICAL: Set LightGBM Train Parameters
train_params = {
                "objective": "binary",
                "metric": "binary_error",
                "verbosity": -1,
                "boosting_type": "gbdt",
                "seed": 538,
                "learning_rate": .1,
                'num_leaves': 2, 
    } 

### Initialize Mitigator class instance and save as object

In [None]:
mitigator = mit.Mitigator(protected_features, model_features, cat_features, y_bar, samp_weight)

### Run transform method to rebalance the train dataframe using SMOTE and Tomek Links

In [None]:
data_smote = mitigator.transform(train_df)

### Fit model to rebalanced dataframe and optionally pass LightGBM model parameters different that the default Mitigator class attribute. Returns model object.

In [None]:
model1 = mitigator.fit(data_smote, train_params)

### Predict on test dataframe with the previously saved model

Ignore RuntimeWarning as they are expected when calculating the metrics at certain values of possible thresholds

In [None]:
predictions = mitigator.predict(model1, test_df)

### Create a mitigate_report.html by running the same measurement analysis on the mitigated predictions as is done on initial measure_disparity.py. measure() method also results in a dataframe with all metric values for all protected feature groups

In [None]:
measurement = mitigator.measure(predictions)

In [None]:
measurement.head()