In [2]:
pip install tempeh

Collecting tempeh
  Using cached tempeh-0.1.12-py3-none-any.whl (39 kB)
Collecting shap
  Using cached shap-0.39.0-cp38-cp38-win_amd64.whl (414 kB)
Collecting memory-profiler
  Using cached memory_profiler-0.58.0.tar.gz (36 kB)
Note: you may need to restart the kernel to use updated packages.
Collecting slicer==0.0.7
  Using cached slicer-0.0.7-py3-none-any.whl (14 kB)
Building wheels for collected packages: memory-profiler
  Building wheel for memory-profiler (setup.py): started
  Building wheel for memory-profiler (setup.py): finished with status 'done'
  Created wheel for memory-profiler: filename=memory_profiler-0.58.0-py3-none-any.whl size=30183 sha256=cf2758078a70e5644e311585cb172b3b41df64144edcc369c3b82278850cef08
  Stored in directory: c:\users\sujin\appdata\local\pip\cache\wheels\6a\37\3e\d9e8ebaf73956a3ebd2ee41869444dbd2a702d7142bcf93c42
Successfully built memory-profiler
Installing collected packages: slicer, shap, memory-profiler, tempeh
Successfully installed memory-profil

In [3]:
import pandas as pd

from aif360.datasets import StandardDataset

In [6]:
default_mappings = {
    'label_maps': [{1.0: '1'}],
    'protected_attribute_maps': [{1.0: 'Male', 0.0: 'Female'}]
}


class PhysionetDataset(StandardDataset):

    def __init__(self, label_name='return.to.emergency.department.within.6.months',
                 favorable_classes=[1],
                 protected_attribute_names=['gender'],
                 privileged_classes=[[1]],
                 instance_weights_name=None,
                 categorical_features=['DestinationDischarge', 'admission.ward', 'admission.way',
                                       "occupation", "discharge.department", "visit.times",
                                       "type.of.heart.failure", "NYHA.cardiac.function.classification",
                                       'Killip.grade', "ageCat", "type.II.respiratory.failure", "consciousness",
                                       "respiratory.support.", "oxygen.inhalation", "outcome.during.hospitalization"],
                 features_to_keep=[], features_to_drop=[],
                 na_values=[], custom_preprocessing=None,
                 metadata=default_mappings):

        # filepath = os.path.join(os.path.dirname(os.path.abspath(__file__)),
        #                         '..', 'data', 'raw', 'german', 'german.data')

        filepath = 'PhysioNetBinary.csv'

        column_names = ['inpatient.number',
                        'DestinationDischarge', 'admission.ward', 'admission.way', 'occupation',
                        'discharge.department', 'visit.times', 'gender', 'body.temperature',
                        'pulse', 'respiration', 'map', 'weight', 'height', 'BMI',
                        'type.of.heart.failure', 'NYHA.cardiac.function.classification',
                        'Killip.grade', 'myocardial.infarction', 'congestive.heart.failure',
                        'peripheral.vascular.disease', 'cerebrovascular.disease', 'dementia',
                        'Chronic.obstructive.pulmonary.disease', 'connective.tissue.disease',
                        'peptic.ulcer.disease', 'diabetes',
                        'moderate.to.severe.chronic.kidney.disease', 'hemiplegia',
                        'malignant.lymphoma', 'solid.tumor', 'liver.disease', 'AIDS',
                        'type.II.respiratory.failure', 'consciousness', 'eye.opening',
                        'movement', 'respiratory.support.', 'oxygen.inhalation', 'fio2',
                        'acute.renal.failure', 'left.ventricular.end.diastolic.diameter.LV',
                        'outcome.during.hospitalization',
                        'return.to.emergency.department.within.6.months',
                        'creatinine.enzymatic.method', 'urea', 'uric.acid',
                        'glomerular.filtration.rate', 'cystatin', 'monocyte.ratio',
                        'lymphocyte.count', 'mean.hemoglobin.volume',
                        'mean.hemoglobin.concentration', 'mean.platelet.volume',
                        'basophil.count', 'eosinophil.count', 'hemoglobin',
                        'platelet.distribution.width', 'platelet.hematocrit',
                        'neutrophil.count', 'D.dimer', 'international.normalized.ratio',
                        'activated.partial.thromboplastin.time', 'thrombin.time',
                        'prothrombin.activity', 'fibrinogen', 'carbon.dioxide.binding.capacity',
                        'calcium', 'potassium', 'sodium',
                        'creatine.kinase.isoenzyme.to.creatine.kinase',
                        'hydroxybutyrate.dehydrogenase.to.lactate.dehydrogenase',
                        'hydroxybutyrate.dehydrogenase', 'glutamic.oxaloacetic.transaminase',
                        'creatine.kinase', 'creatine.kinase.isoenzyme',
                        'brain.natriuretic.peptide', 'nucleotidase', 'fucosidase',
                        'white.globulin.ratio', 'glutamyltranspeptidase',
                        'glutamic.pyruvic.transaminase', 'alkaline.phosphatase',
                        'total.bilirubin', 'total.bile.acid', 'total.protein',
                        'low.density.lipoprotein.cholesterol', 'triglyceride',
                        'high.density.lipoprotein.cholesterol', 'ageCat']
        try:
            df = pd.read_csv(filepath, header=None, names=column_names,
                             na_values=na_values)
        except IOError as err:
            print("IOError: {}".format(err))
            print("To use this class, please download the following files:")
            print("\n\thttps://archive.ics.uci.edu/ml/machine-learning-databases/statlog/german/german.data")
            print("\thttps://archive.ics.uci.edu/ml/machine-learning-databases/statlog/german/german.doc")
            print("\nand place them, as-is, in the folder:")
            print("\n\t{}\n".format(os.path.abspath(os.path.join(
                os.path.abspath(__file__), '..', '..', 'data', 'raw', 'german'))))
            import sys
            sys.exit(1)

        super(PhysionetDataset, self).__init__(df=df, label_name=label_name,
                                               favorable_classes=favorable_classes,
                                               protected_attribute_names=protected_attribute_names,
                                               privileged_classes=privileged_classes,
                                               instance_weights_name=instance_weights_name,
                                               categorical_features=categorical_features,
                                               features_to_keep=features_to_keep,
                                               features_to_drop=features_to_drop, na_values=na_values,
                                               custom_preprocessing=custom_preprocessing, metadata=metadata)

In [7]:
TestClass1 = PhysionetDataset()

ValueError: could not convert string to float: 'inpatient.number'


ValueError: DataFrame values must be numerical.

In [10]:
train_path = "Aif360ExampleDataRaw/adult.data"

column_names = ['age', 'workclass', 'fnlwgt', 'education',
            'education-num', 'marital-status', 'occupation', 'relationship',
            'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week',
            'native-country', 'income-per-year']

na_values=['?']

train = pd.read_csv(train_path, header=None, names=column_names,
                skipinitialspace=True, na_values=na_values)

In [30]:
train.dropna()

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,income-per-year
0,39,State-gov,77516,Bachelors,13,Never-married,Adm-clerical,Not-in-family,White,Male,2174,0,40,United-States,<=50K
1,50,Self-emp-not-inc,83311,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,13,United-States,<=50K
2,38,Private,215646,HS-grad,9,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,40,United-States,<=50K
3,53,Private,234721,11th,7,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0,0,40,United-States,<=50K
4,28,Private,338409,Bachelors,13,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0,0,40,Cuba,<=50K
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32556,27,Private,257302,Assoc-acdm,12,Married-civ-spouse,Tech-support,Wife,White,Female,0,0,38,United-States,<=50K
32557,40,Private,154374,HS-grad,9,Married-civ-spouse,Machine-op-inspct,Husband,White,Male,0,0,40,United-States,>50K
32558,58,Private,151910,HS-grad,9,Widowed,Adm-clerical,Unmarried,White,Female,0,0,40,United-States,<=50K
32559,22,Private,201490,HS-grad,9,Never-married,Adm-clerical,Own-child,White,Male,0,0,20,United-States,<=50K


In [28]:
train2 = pd.read_csv('PhysioNetDataEdited.csv')
train2

Unnamed: 0,inpatient.number,DestinationDischarge,admission.ward,admission.way,occupation,discharge.department,visit.times,sex,body.temperature,pulse,...,glutamyltranspeptidase,glutamic.pyruvic.transaminase,alkaline.phosphatase,total.bilirubin,total.bile.acid,total.protein,low.density.lipoprotein.cholesterol,triglyceride,high.density.lipoprotein.cholesterol,ageCat
0,857781,Home,Cardiology,NonEmergency,UrbanResident,Cardiology,1,Male,36.7,87,...,83,65.0,61,18.3,4.9,61.9,1.90,2.69,0.84,"(69,79]"
1,743087,Home,Cardiology,NonEmergency,UrbanResident,Cardiology,1,Female,36.8,95,...,17,19.0,69,18.3,2.4,66.1,1.26,0.84,1.30,"(69,79]"
2,866418,Home,Cardiology,NonEmergency,farmer,Cardiology,2,Male,36.5,98,...,27,10.0,76,11.9,2.4,60.8,2.13,1.53,0.94,"(59,69]"
3,775928,Home,Cardiology,Emergency,UrbanResident,Cardiology,1,Male,36.0,73,...,134,9.0,191,51.6,9.4,67.1,1.31,0.92,0.98,"(69,79]"
4,810128,Home,Cardiology,NonEmergency,UrbanResident,Cardiology,1,Female,35.0,88,...,17,19.0,61,11.8,3.3,71.9,1.21,0.62,0.91,"(69,79]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2003,740689,HealthcareFacility,GeneralWard,Emergency,Others,GeneralWard,1,Female,36.1,117,...,27,6.0,59,12.3,1.9,68.4,1.76,0.96,1.07,"(79,89]"
2004,734280,HealthcareFacility,GeneralWard,Emergency,UrbanResident,GeneralWard,1,Female,36.3,70,...,14,10.0,66,41.8,3.0,63.7,0.93,0.38,0.61,"(79,89]"
2005,781004,HealthcareFacility,Cardiology,Emergency,UrbanResident,Cardiology,1,Male,36.8,113,...,39,21.0,80,18.3,4.7,64.7,1.76,0.96,1.07,"(39,49]"
2006,744870,Unknown,Cardiology,NonEmergency,UrbanResident,Cardiology,1,Male,36.4,134,...,39,21.0,80,18.3,4.7,64.7,1.76,0.96,1.07,"(49,59]"


In [24]:
train2.gender = train2.gender.replace('Male', 1.0)
train2.gender = train2.gender.replace('Female', 0.0)

In [25]:
train2.to_csv(r'PhysioNetBinary.csv', index = False)


In [31]:
train2.dropna()

Unnamed: 0,inpatient.number,DestinationDischarge,admission.ward,admission.way,occupation,discharge.department,visit.times,sex,body.temperature,pulse,...,glutamyltranspeptidase,glutamic.pyruvic.transaminase,alkaline.phosphatase,total.bilirubin,total.bile.acid,total.protein,low.density.lipoprotein.cholesterol,triglyceride,high.density.lipoprotein.cholesterol,ageCat
0,857781,Home,Cardiology,NonEmergency,UrbanResident,Cardiology,1,Male,36.7,87,...,83,65.0,61,18.3,4.9,61.9,1.90,2.69,0.84,"(69,79]"
1,743087,Home,Cardiology,NonEmergency,UrbanResident,Cardiology,1,Female,36.8,95,...,17,19.0,69,18.3,2.4,66.1,1.26,0.84,1.30,"(69,79]"
2,866418,Home,Cardiology,NonEmergency,farmer,Cardiology,2,Male,36.5,98,...,27,10.0,76,11.9,2.4,60.8,2.13,1.53,0.94,"(59,69]"
3,775928,Home,Cardiology,Emergency,UrbanResident,Cardiology,1,Male,36.0,73,...,134,9.0,191,51.6,9.4,67.1,1.31,0.92,0.98,"(69,79]"
4,810128,Home,Cardiology,NonEmergency,UrbanResident,Cardiology,1,Female,35.0,88,...,17,19.0,61,11.8,3.3,71.9,1.21,0.62,0.91,"(69,79]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2003,740689,HealthcareFacility,GeneralWard,Emergency,Others,GeneralWard,1,Female,36.1,117,...,27,6.0,59,12.3,1.9,68.4,1.76,0.96,1.07,"(79,89]"
2004,734280,HealthcareFacility,GeneralWard,Emergency,UrbanResident,GeneralWard,1,Female,36.3,70,...,14,10.0,66,41.8,3.0,63.7,0.93,0.38,0.61,"(79,89]"
2005,781004,HealthcareFacility,Cardiology,Emergency,UrbanResident,Cardiology,1,Male,36.8,113,...,39,21.0,80,18.3,4.7,64.7,1.76,0.96,1.07,"(39,49]"
2006,744870,Unknown,Cardiology,NonEmergency,UrbanResident,Cardiology,1,Male,36.4,134,...,39,21.0,80,18.3,4.7,64.7,1.76,0.96,1.07,"(49,59]"


In [34]:
df = pd.read_csv('PhysioNetDataEdited.csv', header=None, names=column_names,
                             na_values=na_values)

In [35]:
df

Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25,Unnamed: 26,Unnamed: 27,Unnamed: 28,Unnamed: 29,Unnamed: 30,Unnamed: 31,Unnamed: 32,Unnamed: 33,Unnamed: 34,Unnamed: 35,Unnamed: 36,Unnamed: 37,Unnamed: 38,Unnamed: 39,Unnamed: 40,Unnamed: 41,Unnamed: 42,Unnamed: 43,Unnamed: 44,Unnamed: 45,Unnamed: 46,Unnamed: 47,Unnamed: 48,Unnamed: 49,Unnamed: 50,Unnamed: 51,Unnamed: 52,Unnamed: 53,Unnamed: 54,Unnamed: 55,Unnamed: 56,Unnamed: 57,Unnamed: 58,Unnamed: 59,Unnamed: 60,Unnamed: 61,Unnamed: 62,Unnamed: 63,Unnamed: 64,Unnamed: 65,Unnamed: 66,Unnamed: 67,Unnamed: 68,Unnamed: 69,Unnamed: 70,Unnamed: 71,Unnamed: 72,Unnamed: 73,Unnamed: 74,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,income-per-year
inpatient.number,DestinationDischarge,admission.ward,admission.way,occupation,discharge.department,visit.times,sex,body.temperature,pulse,respiration,map,weight,height,BMI,type.of.heart.failure,NYHA.cardiac.function.classification,Killip.grade,myocardial.infarction,congestive.heart.failure,peripheral.vascular.disease,cerebrovascular.disease,dementia,Chronic.obstructive.pulmonary.disease,connective.tissue.disease,peptic.ulcer.disease,diabetes,moderate.to.severe.chronic.kidney.disease,hemiplegia,malignant.lymphoma,solid.tumor,liver.disease,AIDS,type.II.respiratory.failure,consciousness,eye.opening,movement,respiratory.support.,oxygen.inhalation,fio2,acute.renal.failure,left.ventricular.end.diastolic.diameter.LV,outcome.during.hospitalization,return.to.emergency.department.within.6.months,creatinine.enzymatic.method,urea,uric.acid,glomerular.filtration.rate,cystatin,monocyte.ratio,lymphocyte.count,mean.hemoglobin.volume,mean.hemoglobin.concentration,mean.platelet.volume,basophil.count,eosinophil.count,hemoglobin,platelet.distribution.width,platelet.hematocrit,neutrophil.count,D.dimer,international.normalized.ratio,activated.partial.thromboplastin.time,thrombin.time,prothrombin.activity,fibrinogen,carbon.dioxide.binding.capacity,calcium,potassium,sodium,creatine.kinase.isoenzyme.to.creatine.kinase,hydroxybutyrate.dehydrogenase.to.lactate.dehydrogenase,hydroxybutyrate.dehydrogenase,glutamic.oxaloacetic.transaminase,creatine.kinase,creatine.kinase.isoenzyme,brain.natriuretic.peptide,nucleotidase,fucosidase,white.globulin.ratio,glutamyltranspeptidase,glutamic.pyruvic.transaminase,alkaline.phosphatase,total.bilirubin,total.bile.acid,total.protein,low.density.lipoprotein.cholesterol,triglyceride,high.density.lipoprotein.cholesterol,ageCat
857781,Home,Cardiology,NonEmergency,UrbanResident,Cardiology,1,Male,36.7,87,19,76.66666667,50,1.64,18.59012493,Both,III,III,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,NonTypeII,Clear,4,6,,OxygenTherapy,33,0,53,Alive,0,108.3,12.55,685,58.57,1.32,0.085,1.51,32,338,14,0.05,0.02,131,16.6,0.142,7.05,1.19,1.39,33.4,17,60.3,3.84,21.6,2.28,5.59,134.6,0.22,0.63,185,81,43,9.6,1500.17,3.5,20.5,1.6,83,65,61,18.3,4.9,61.9,1.9,2.69,0.84,"(69,79]"
743087,Home,Cardiology,NonEmergency,UrbanResident,Cardiology,1,Female,36.8,95,18,96.66666667,51,1.63,19.1953028,Both,III,I,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,NonTypeII,Clear,4,6,,OxygenTherapy,33,0,40,Alive,0,62,4.29,170,85.43,1.25,0.066,1.18,28.9,326,11.3,0.01,0.05,114,16.1,0.139,3.73,1.06,1.16,35.9,18.4,65.1,2.62,26.5,2.28,3.62,144,0.16,0.84,170,21,107,17.2,361.7,2.2,21.9,1.6,17,19,69,18.3,2.4,66.1,1.26,0.84,1.3,"(69,79]"
866418,Home,Cardiology,NonEmergency,farmer,Cardiology,2,Male,36.5,98,18,78.66666667,70,1.7,24.22145329,Both,II,II,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,NonTypeII,Clear,4,6,,OxygenTherapy,33,0,46,Alive,0,185.1,15.99,567,31.51,2.43,0.051,0.75,32.6,339,12,0.03,0.02,144,16.5,0.203,11.54,0.83,1.1,36.2,14.9,85.7,5.74,21.6,2.56,4.15,142.2,0.38,0.77,105,9,38,14.6,293.95,2.3,12.3,1.2,27,10,76,11.9,2.4,60.8,2.13,1.53,0.94,"(59,69]"
775928,Home,Cardiology,Emergency,UrbanResident,Cardiology,1,Male,36,73,19,86,65,1.7,22.49134948,Both,III,II,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,NonTypeII,Clear,4,6,,OxygenTherapy,33,0,53,Alive,1,104.8,8.16,635,58.01,2.32,0.087,0.71,34.7,339,12.4,0,0.06,94,19.4,0.032,1.22,1.39,1.37,38.3,18.3,60.9,2.63,21.1,2.35,3.76,136.3,0.17,0.82,187,26,91,16.1,1071.4,11.2,19.4,1.5,134,9,191,51.6,9.4,67.1,1.31,0.92,0.98,"(69,79]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
740689,HealthcareFacility,GeneralWard,Emergency,Others,GeneralWard,1,Female,36.1,117,22,93.33333333,35,1.5,15.55555556,Both,IV,I,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,NonTypeII,Clear,4,6,,OxygenTherapy,33,0,53,DischargeAgainstOrder,0,115.3,8.72,739,38.41,2.49,0.134,0.6,28.6,329,14.4,0.02,0.05,126,23.3,0.174,2.83,1.35,1.08,25.3,17.8,83.33,2.01,24.6,2.4,3.51,141,0.2,0.88,194,16,44,8.9,1056.28,4.3,14.9,1,27,6,59,12.3,1.9,68.4,1.76,0.96,1.07,"(79,89]"
734280,HealthcareFacility,GeneralWard,Emergency,UrbanResident,GeneralWard,1,Female,36.3,70,21,97.33333333,50,1.55,20.81165453,Both,IV,II,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,NonTypeII,Clear,4,6,,OxygenTherapy,29,0,53,Alive,0,63.1,5.96,396,64.79,1.31,0.072,0.73,23.6,294,11.3,0.02,0.05,99,14.4,0.17,3.45,0.83,1.34,43.3,17.9,52.63,1.96,23.5,2.23,3.81,136,0.31,0.85,165,11,31,9.5,503.82,1.7,22.3,1,14,10,66,41.8,3,63.7,0.93,0.38,0.61,"(79,89]"
781004,HealthcareFacility,Cardiology,Emergency,UrbanResident,Cardiology,1,Male,36.8,113,19,111.6666667,75,1.7,25.95155709,Both,III,I,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,NonTypeII,Clear,4,6,,OxygenTherapy,33,0,53,Alive,1,51.9,4.27,377,172.28,1.16,0.07,0.86,30.9,349,12.8,0.02,0.04,178,16.6,0.174,4.09,0.39,1.27,38.6,20.4,67.8,1.68,24,2.34,3.56,132.1,0.26,0.87,186,58,93,24.5,548.97,3,18.7,1.3,39,21,80,18.3,4.7,64.7,1.76,0.96,1.07,"(39,49]"
744870,Unknown,Cardiology,NonEmergency,UrbanResident,Cardiology,1,Male,36.4,134,19,78.66666667,40,1.5,17.77777778,Both,IV,IV,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,NonTypeII,Nonresponsive,1,1,IMV,OxygenTherapy,41,0,53,DischargeAgainstOrder,0,191.1,35.87,1409,32.75,2.84,0.079,1.02,32.8,344,13.1,0.02,0,133,16.3,0.235,7.9,1.38,2.26,30.6,16,22.7,4.31,19.3,2.37,5.82,120,0.11,0.47,322,702,274,29.8,2614.12,3,18.7,1.3,39,21,80,18.3,4.7,64.7,1.76,0.96,1.07,"(49,59]"
