In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_datasets as tfds

import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('creditdata.csv')
numerical = ['DEFAULT', 'LIMIT_BAL', 'AGE', 'BILL_AMT1','BILL_AMT2', 'BILL_AMT3','BILL_AMT4','BILL_AMT5','BILL_AMT6', 'PAY_AMT1', 'PAY_AMT2', 'PAY_AMT3', 'PAY_AMT4', 'PAY_AMT5', 'PAY_AMT6', 'USED_BAL', 'PAY_BAL']


In [3]:
df.rename({'PAY_0':'PAY_1','default payment next month':'DEFAULT'},inplace=True,axis =1)
df.head()

Unnamed: 0,ID,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_1,PAY_2,PAY_3,PAY_4,PAY_5,PAY_6,BILL_AMT1,BILL_AMT2,BILL_AMT3,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,DEFAULT
0,1,20000,2,2,1,24,2,2,-1,-1,-2,-2,3913,3102,689,0,0,0,0,689,0,0,0,0,1
1,2,120000,2,2,2,26,-1,2,0,0,0,2,2682,1725,2682,3272,3455,3261,0,1000,1000,1000,0,2000,1
2,3,90000,2,2,2,34,0,0,0,0,0,0,29239,14027,13559,14331,14948,15549,1518,1500,1000,1000,1000,5000,0
3,4,50000,2,2,1,37,0,0,0,0,0,0,46990,48233,49291,28314,28959,29547,2000,2019,1200,1100,1069,1000,0
4,5,50000,1,2,1,57,-1,0,-1,0,0,0,8617,5670,35835,20940,19146,19131,2000,36681,10000,9000,689,679,0


In [4]:
def PaymentBalance(row):
  if sum([row['BILL_AMT' + str(i)] for i in range(1,7)]) > 0:
    return sum([row['BILL_AMT' + str(i)]- row['PAY_AMT' + str(i)] for i in range(1,7)])/sum([row['BILL_AMT' + str(i)] for i in range(1,7)])
  else:
    return 0

def UsedBalance(row):
  if row['LIMIT_BAL'] > 0:
    return np.mean([row['BILL_AMT' + str(i)]- row['PAY_AMT' + str(i)] for i in range(1,7)])/row['LIMIT_BAL']
  else:
    return 0

def NegativeBal(row):
  return sum([1 for i in range(1,7) if row['BILL_AMT' + str(i)]<0])

def ZeroBal(row):
  return sum([1 for i in range(1,7) if row['BILL_AMT' + str(i)]==0])

def ZeroPay(row):
  return sum([1 for i in range(1,7) if row['PAY_AMT' + str(i)]==0])

df['USED_BAL'] = df.apply(lambda x: UsedBalance(x), axis =1)
df['PAY_BAL']  = df.apply(lambda x: PaymentBalance(x), axis =1)
df['NEG_BAL']  = df.apply(lambda x: NegativeBal(x), axis =1)
df['ZERO_BAL']  = df.apply(lambda x: ZeroBal(x), axis =1)
df['ZERO_PAY']  = df.apply(lambda x: ZeroPay(x), axis =1)

In [5]:
categorical_cols = ['SEX','EDUCATION','MARRIAGE','PAY_1','PAY_2','PAY_3','PAY_4','PAY_5','PAY_6','NEG_BAL','ZERO_BAL','ZERO_PAY']

In [6]:
df = pd.get_dummies(data=df, columns = categorical_cols)

In [7]:
df.drop(columns='ID', inplace = True)

In [8]:
from sklearn.model_selection import train_test_split
cols = list(set(df.columns) - set(['DEFAULT']))
X_train_full, X_test_full, y_train, y_test = train_test_split(df,df['DEFAULT'],test_size = 0.33, stratify = df['DEFAULT'])
X_train = X_train_full.drop(columns = 'DEFAULT')
X_test = X_test_full.drop(columns = 'DEFAULT')

-----------------------------------  COUNTERFACTUALS ---------------------------

In [9]:
from sklearn.neural_network import MLPClassifier

cfmodel = MLPClassifier(random_state = 10)
cfmodel.fit(X_train, y_train)

MLPClassifier(random_state=10)

In [10]:
!pip install dice_ml
import dice_ml
from dice_ml.utils import helpers  # helper functions

from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
numerical = ['LIMIT_BAL', 'AGE', 'BILL_AMT1','BILL_AMT2', 'BILL_AMT3','BILL_AMT4','BILL_AMT5','BILL_AMT6', 'PAY_AMT1', 'PAY_AMT2', 'PAY_AMT3', 'PAY_AMT4', 'PAY_AMT5', 'PAY_AMT6', 'USED_BAL', 'PAY_BAL']
d = dice_ml.Data(dataframe = df, continuous_features = numerical, outcome_name = 'DEFAULT')
backend= 'sklearn'
m = dice_ml.Model(model = cfmodel, backend = backend)
exp = dice_ml.Dice(d, m, method = 'random')
query_instances = pd.DataFrame(X_test[0:1])
cfs = exp.generate_counterfactuals(query_instances, total_CFs=5, desired_class="opposite", verbose=False)

Collecting dice_ml
  Downloading dice_ml-0.7.2-py3-none-any.whl (242 kB)
[?25l[K     |█▍                              | 10 kB 15.3 MB/s eta 0:00:01[K     |██▊                             | 20 kB 19.0 MB/s eta 0:00:01[K     |████                            | 30 kB 12.5 MB/s eta 0:00:01[K     |█████▍                          | 40 kB 9.5 MB/s eta 0:00:01[K     |██████▊                         | 51 kB 5.5 MB/s eta 0:00:01[K     |████████▏                       | 61 kB 6.1 MB/s eta 0:00:01[K     |█████████▌                      | 71 kB 5.8 MB/s eta 0:00:01[K     |██████████▉                     | 81 kB 6.5 MB/s eta 0:00:01[K     |████████████▏                   | 92 kB 6.5 MB/s eta 0:00:01[K     |█████████████▌                  | 102 kB 5.3 MB/s eta 0:00:01[K     |██████████████▉                 | 112 kB 5.3 MB/s eta 0:00:01[K     |████████████████▎               | 122 kB 5.3 MB/s eta 0:00:01[K     |█████████████████▋              | 133 kB 5.3 MB/s eta 0:00:01[K

100%|██████████| 1/1 [00:00<00:00,  1.01it/s]


In [11]:
query_instances

Unnamed: 0,LIMIT_BAL,AGE,BILL_AMT1,BILL_AMT2,BILL_AMT3,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,USED_BAL,PAY_BAL,SEX_1,SEX_2,EDUCATION_0,EDUCATION_1,EDUCATION_2,EDUCATION_3,EDUCATION_4,EDUCATION_5,EDUCATION_6,MARRIAGE_0,MARRIAGE_1,MARRIAGE_2,MARRIAGE_3,PAY_1_-2,PAY_1_-1,PAY_1_0,PAY_1_1,PAY_1_2,PAY_1_3,PAY_1_4,PAY_1_5,PAY_1_6,PAY_1_7,PAY_1_8,...,PAY_5_-1,PAY_5_0,PAY_5_2,PAY_5_3,PAY_5_4,PAY_5_5,PAY_5_6,PAY_5_7,PAY_5_8,PAY_6_-2,PAY_6_-1,PAY_6_0,PAY_6_2,PAY_6_3,PAY_6_4,PAY_6_5,PAY_6_6,PAY_6_7,PAY_6_8,NEG_BAL_0,NEG_BAL_1,NEG_BAL_2,NEG_BAL_3,NEG_BAL_4,NEG_BAL_5,NEG_BAL_6,ZERO_BAL_0,ZERO_BAL_1,ZERO_BAL_2,ZERO_BAL_3,ZERO_BAL_4,ZERO_BAL_5,ZERO_BAL_6,ZERO_PAY_0,ZERO_PAY_1,ZERO_PAY_2,ZERO_PAY_3,ZERO_PAY_4,ZERO_PAY_5,ZERO_PAY_6
1420,200000,31,6618,6777,7957,4667,76187,74900,1900,3008,3500,73000,2700,3800,0.074332,0.503642,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0


In [12]:
current_cf = cfs.cf_examples_list[0].final_cfs_df
current_cf = current_cf.reset_index().drop(columns = ['index'])
current_cf

Unnamed: 0,LIMIT_BAL,AGE,BILL_AMT1,BILL_AMT2,BILL_AMT3,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,USED_BAL,PAY_BAL,SEX_1,SEX_2,EDUCATION_0,EDUCATION_1,EDUCATION_2,EDUCATION_3,EDUCATION_4,EDUCATION_5,EDUCATION_6,MARRIAGE_0,MARRIAGE_1,MARRIAGE_2,MARRIAGE_3,PAY_1_-2,PAY_1_-1,PAY_1_0,PAY_1_1,PAY_1_2,PAY_1_3,PAY_1_4,PAY_1_5,PAY_1_6,PAY_1_7,PAY_1_8,...,PAY_5_0,PAY_5_2,PAY_5_3,PAY_5_4,PAY_5_5,PAY_5_6,PAY_5_7,PAY_5_8,PAY_6_-2,PAY_6_-1,PAY_6_0,PAY_6_2,PAY_6_3,PAY_6_4,PAY_6_5,PAY_6_6,PAY_6_7,PAY_6_8,NEG_BAL_0,NEG_BAL_1,NEG_BAL_2,NEG_BAL_3,NEG_BAL_4,NEG_BAL_5,NEG_BAL_6,ZERO_BAL_0,ZERO_BAL_1,ZERO_BAL_2,ZERO_BAL_3,ZERO_BAL_4,ZERO_BAL_5,ZERO_BAL_6,ZERO_PAY_0,ZERO_PAY_1,ZERO_PAY_2,ZERO_PAY_3,ZERO_PAY_4,ZERO_PAY_5,ZERO_PAY_6,DEFAULT
0,200000.0,31.0,6618.0,6777.0,443788.0,4667.0,76187.0,74900.0,1900.0,3008.0,3500.0,73000.0,2700.0,3800.0,0.074332,0.503642,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0
1,200000.0,31.0,6618.0,6777.0,258849.0,4667.0,76187.0,74900.0,1900.0,3008.0,3500.0,73000.0,2700.0,3800.0,0.074332,0.503642,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0
2,200000.0,31.0,6618.0,417989.0,7957.0,4667.0,76187.0,74900.0,1900.0,3008.0,3500.0,73000.0,2700.0,3800.0,0.074332,0.503642,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0
3,200000.0,31.0,6618.0,6777.0,1591413.0,4667.0,76187.0,74900.0,1900.0,3008.0,3500.0,73000.0,2700.0,3800.0,0.074332,0.503642,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0
4,200000.0,31.0,6618.0,830462.0,7957.0,4667.0,76187.0,74900.0,1900.0,3008.0,3500.0,73000.0,2700.0,489946.0,0.074332,0.503642,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0


In [15]:
cfs.visualize_as_dataframe()

Query instance (original outcome : 1)


Unnamed: 0,LIMIT_BAL,AGE,BILL_AMT1,BILL_AMT2,BILL_AMT3,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,USED_BAL,PAY_BAL,SEX_1,SEX_2,EDUCATION_0,EDUCATION_1,EDUCATION_2,EDUCATION_3,EDUCATION_4,EDUCATION_5,EDUCATION_6,MARRIAGE_0,MARRIAGE_1,MARRIAGE_2,MARRIAGE_3,PAY_1_-2,PAY_1_-1,PAY_1_0,PAY_1_1,PAY_1_2,PAY_1_3,PAY_1_4,PAY_1_5,PAY_1_6,PAY_1_7,PAY_1_8,...,PAY_5_0,PAY_5_2,PAY_5_3,PAY_5_4,PAY_5_5,PAY_5_6,PAY_5_7,PAY_5_8,PAY_6_-2,PAY_6_-1,PAY_6_0,PAY_6_2,PAY_6_3,PAY_6_4,PAY_6_5,PAY_6_6,PAY_6_7,PAY_6_8,NEG_BAL_0,NEG_BAL_1,NEG_BAL_2,NEG_BAL_3,NEG_BAL_4,NEG_BAL_5,NEG_BAL_6,ZERO_BAL_0,ZERO_BAL_1,ZERO_BAL_2,ZERO_BAL_3,ZERO_BAL_4,ZERO_BAL_5,ZERO_BAL_6,ZERO_PAY_0,ZERO_PAY_1,ZERO_PAY_2,ZERO_PAY_3,ZERO_PAY_4,ZERO_PAY_5,ZERO_PAY_6,DEFAULT
0,200000,31,6618,6777,7957,4667,76187,74900,1900,3008,3500,73000,2700,3800,0.074332,0.503642,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1



Diverse Counterfactual set (new outcome: 0.0)


Unnamed: 0,LIMIT_BAL,AGE,BILL_AMT1,BILL_AMT2,BILL_AMT3,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,USED_BAL,PAY_BAL,SEX_1,SEX_2,EDUCATION_0,EDUCATION_1,EDUCATION_2,EDUCATION_3,EDUCATION_4,EDUCATION_5,EDUCATION_6,MARRIAGE_0,MARRIAGE_1,MARRIAGE_2,MARRIAGE_3,PAY_1_-2,PAY_1_-1,PAY_1_0,PAY_1_1,PAY_1_2,PAY_1_3,PAY_1_4,PAY_1_5,PAY_1_6,PAY_1_7,PAY_1_8,...,PAY_5_0,PAY_5_2,PAY_5_3,PAY_5_4,PAY_5_5,PAY_5_6,PAY_5_7,PAY_5_8,PAY_6_-2,PAY_6_-1,PAY_6_0,PAY_6_2,PAY_6_3,PAY_6_4,PAY_6_5,PAY_6_6,PAY_6_7,PAY_6_8,NEG_BAL_0,NEG_BAL_1,NEG_BAL_2,NEG_BAL_3,NEG_BAL_4,NEG_BAL_5,NEG_BAL_6,ZERO_BAL_0,ZERO_BAL_1,ZERO_BAL_2,ZERO_BAL_3,ZERO_BAL_4,ZERO_BAL_5,ZERO_BAL_6,ZERO_PAY_0,ZERO_PAY_1,ZERO_PAY_2,ZERO_PAY_3,ZERO_PAY_4,ZERO_PAY_5,ZERO_PAY_6,DEFAULT
0,200000.0,31.0,6618.0,6777.0,443788.0,4667.0,76187.0,74900.0,1900.0,3008.0,3500.0,73000.0,2700.0,3800.0,1.074332,0.503642,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0
1,200000.0,31.0,6618.0,6777.0,258849.0,4667.0,76187.0,74900.0,1900.0,3008.0,3500.0,73000.0,2700.0,3800.0,1.074332,0.503642,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0
2,200000.0,31.0,6618.0,417989.0,7957.0,4667.0,76187.0,74900.0,1900.0,3008.0,3500.0,73000.0,2700.0,3800.0,1.074332,0.503642,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0
3,200000.0,31.0,6618.0,6777.0,1591413.0,4667.0,76187.0,74900.0,1900.0,3008.0,3500.0,73000.0,2700.0,3800.0,1.074332,0.503642,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0
4,200000.0,31.0,6618.0,830462.0,7957.0,4667.0,76187.0,74900.0,1900.0,3008.0,3500.0,73000.0,2700.0,489946.0,1.074332,0.503642,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0


In [14]:
import numpy as np

def get_metrics(dataset, cont, counterfactuals):
    
    
    data = d
    query = query_instances
    true_y = y_test[:5].reset_index().drop(columns = ['index'])
    
    
    mad_dict = data.get_mads(normalized=True)
    mad_df = pd.DataFrame.from_dict(mad_dict,orient='index')
    
    #for each counterfactual set
    cont_prox_l = []
    sparsity_l = []
    validity_l = []
    diversity_l = []
    #########CHANGE RANGE
    for cfset in range(1):
        
        cf_dc = []
        #get initial
        current_data = query.iloc[cfset]
        #get counterfactual set
        current_cf = counterfactuals.cf_examples_list[cfset].final_cfs_df
        current_cf = current_cf.reset_index().drop(columns = ['index'])

        disc_cont = 0
        spar_tracker = 0
        validity = 0
        diversity = 0
        for cf in range(len(current_cf)):

            for feature in cont:
               
                if current_cf.loc[cf,feature] != current_data[feature]:
                    spar_tracker+=1
                
                
                disc_cont += ((current_cf.loc[cf,feature]-current_data[feature])/mad_df.loc[feature].values[0])
                
                
            cf_dc.append(disc_cont*(1/len(cont)))

            if current_cf.iloc[cf,-1] != true_y.iloc[cf].values[0]:
                validity += 1
                
        disc_cont_div = 0
        cf_div = []
        for cf in range(len(current_cf) - 1):
            for cf1 in range(cf, len(current_cf)):
                for feature in cont:
                    disc_cont_div += ((current_cf.loc[cf,feature]-current_cf.loc[cf1,feature])/mad_df.loc[feature].values[0])
            cf_div.append(disc_cont_div*(1/len(cont)))
        
        #print(current_cf)
        diversity = sum(cf_div)/(len(current_cf)**2)
        diversity_l.append(diversity)

        cont_prox = sum(cf_dc)/len(current_cf)*-1
        
        cont_prox_l.append(cont_prox)
            
        sparsity_l.append(1-(spar_tracker/(len(current_cf)*len(cont))))
        
        validity_l.append(validity/len(current_cf))
    
    tot_cont_prox = sum(cont_prox_l)#np.mean(np.array(cont_prox_l))
    tot_sparsity = np.mean(np.array(sparsity_l))
    print('Avergage Diversity', np.mean(np.array(diversity_l)))
    print('Total Sparsity', tot_sparsity)
    print('Average Proximity', np.mean(np.array(cont_prox_l)))

    
get_metrics(d,numerical,cfs)

Avergage Diversity -6508095.969424704
Total Sparsity 0.925
[0.0]
Average Proximity -10778874.92662095
