In [4]:
import pandas as pd
import os

In [5]:
folder_raw = os.path.join("data", "raw")
folder_preprocessed = os.path.join("data", "preprocessed")

In [6]:
def bp_to_cat(ap_hi: int=120, ap_lo: int=80) -> int:
    '''
    blood pressure to category:
        0 - low (hypotension)
        1 - normal
        2 - elevated
        3 - hypertension stage 1
        4 - hypertension stage 2
        5 - hypertension crisis
    '''
    if ap_hi <= 90 and ap_lo <= 60:
        return 0
    if ap_hi <= 120 and ap_lo <= 80:
        return 1
    if ap_hi <= 130 and ap_lo <= 80:
        return 2
    if ap_hi < 140 or ap_lo < 90:
        return 3
    if ap_hi < 180 or ap_lo < 120:
        return 4
    return 5

In [7]:
def chol_to_cat(chol: int) -> int:
    '''
    cholesterol to category:
        0 - normal
        1 - above normal
        2 - well above normal
    '''
    if chol < 200:
        return 0
    if chol < 240:
        return 1
    return 2

In [8]:
def gluc_to_cat(gluc: int) -> int:
    '''
    glucose to category:
        0 - normal
        1 - above normal
        2 - well above normal
    '''
    if gluc < 100:
        return 0
    if gluc < 125:
        return 1
    return 2

## Cardiovascular disease

In [156]:
cardio_df = pd.read_csv(os.path.join(folder_raw, "cardio_train.csv"), sep=";")

In [157]:
cardio_df.head()

Unnamed: 0,id,age,gender,height,weight,ap_hi,ap_lo,cholesterol,gluc,smoke,alco,active,cardio
0,0,18393,2,168,62.0,110,80,1,1,0,0,1,0
1,1,20228,1,156,85.0,140,90,3,1,0,0,1,1
2,2,18857,1,165,64.0,130,70,3,1,0,0,0,1
3,3,17623,2,169,82.0,150,100,1,1,0,0,1,1
4,4,17474,1,156,56.0,100,60,1,1,0,0,0,0


In [158]:
cardio_df['age'] = cardio_df['age']//365.25
cardio_df['age'] = cardio_df['age'].astype(int)

In [159]:
cardio_df['gender'] = cardio_df['gender'].apply(lambda x: 1 if x==1 else 0)

In [160]:
cardio_df['weight'] = cardio_df['weight'].astype(int)

In [161]:
cardio_df['blood_pressure'] = cardio_df.apply(lambda row: bp_to_cat(row['ap_hi'], row['ap_lo']), axis=1)

In [162]:
cardio_df['hypertension'] = cardio_df['blood_pressure'].apply(lambda x: 1 if x>=3 else 0)

In [163]:
cardio_df['cholesterol'] -= 1

In [164]:
cardio_df['glucose'] = cardio_df['gluc']-1

In [165]:
cardio_df['diabetes'] = cardio_df['glucose'].apply(lambda x: 1 if x >= 2 else 0)

In [166]:
cardio_df['alcohol'] = cardio_df['alco']

In [167]:
cardio_df['cardio_disease'] = cardio_df['cardio']

In [168]:
cardio_df.head()

Unnamed: 0,id,age,gender,height,weight,ap_hi,ap_lo,cholesterol,gluc,smoke,alco,active,cardio,blood_pressure,hypertension,glucose,diabetes,alcohol,cardio_disease
0,0,50,0,168,62,110,80,0,1,0,0,1,0,1,0,0,0,0,0
1,1,55,1,156,85,140,90,2,1,0,0,1,1,4,1,0,0,0,1
2,2,51,1,165,64,130,70,2,1,0,0,0,1,2,0,0,0,0,1
3,3,48,0,169,82,150,100,0,1,0,0,1,1,4,1,0,0,0,1
4,4,47,1,156,56,100,60,0,1,0,0,0,0,1,0,0,0,0,0


In [169]:
columns = ['age', 'gender', 'height', 'weight', 'blood_pressure', 'cholesterol', 'glucose', 'smoke', 'alcohol', 'active', 'cardio_disease', 'hypertension', 'diabetes']
cardio_df.to_csv(os.path.join(folder_preprocessed, "cardio.csv"), columns=columns)

## Heart disease

In [92]:
heart_df = pd.read_csv(os.path.join(folder_raw, "heart.csv"))

In [93]:
heart_df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [94]:
heart_df['gender'] = heart_df['sex']

In [95]:
heart_df['chest_pain_type'] = heart_df['cp']

In [96]:
heart_df['blood_pressure'] = heart_df['trestbps'].apply(lambda bps: bp_to_cat(ap_hi=bps))

In [97]:
heart_df['hypertension'] = heart_df['blood_pressure'].apply(lambda x: 1 if x>=3 else 0)

In [98]:
heart_df['cholesterol'] = heart_df['chol'].apply(lambda chol: chol_to_cat(chol))

In [99]:
heart_df['diabetes'] = heart_df['fbs']

In [100]:
heart_df['max_heart_rate'] = heart_df['thalach']

In [103]:
# coronary artery disease
# 0 not present, 1 - present 
heart_df['cad'] =  heart_df['oldpeak'].apply(lambda x: 1 if x>1.0 else 0)

In [104]:
heart_df['heart_disease'] = heart_df['target']

In [105]:
heart_df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,...,target,gender,chest_pain_type,blood_pressure,hypertension,cholesterol,diabetes,max_heart_rate,heart_disease,cad
0,63,1,3,145,233,1,0,150,0,2.3,...,1,1,3,3,1,1,1,150,1,1
1,37,1,2,130,250,0,1,187,0,3.5,...,1,1,2,2,0,2,0,187,1,1
2,41,0,1,130,204,0,0,172,0,1.4,...,1,0,1,2,0,1,0,172,1,1
3,56,1,1,120,236,0,1,178,0,0.8,...,1,1,1,1,0,1,0,178,1,0
4,57,0,0,120,354,0,1,163,1,0.6,...,1,0,0,1,0,2,0,163,1,0


In [106]:
heart_df.columns

Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target', 'gender',
       'chest_pain_type', 'blood_pressure', 'hypertension', 'cholesterol',
       'diabetes', 'max_heart_rate', 'heart_disease', 'cad'],
      dtype='object')

In [107]:
columns = ['age', 'gender', 'chest_pain_type', 'blood_pressure', 'cholesterol', 'diabetes', 'max_heart_rate', 'cad', 'heart_disease', 'hypertension']
heart_df.to_csv(os.path.join(folder_preprocessed, "heart.csv"), columns=columns)

## Chronic Kidney disease

In [9]:
kidney_df = pd.read_csv(os.path.join(folder_raw, "kidney_disease.csv"))

In [10]:
kidney_df.head()

Unnamed: 0,id,age,bp,sg,al,su,rbc,pc,pcc,ba,...,pcv,wc,rc,htn,dm,cad,appet,pe,ane,classification
0,0,48.0,80.0,1.02,1.0,0.0,,normal,notpresent,notpresent,...,44,7800,5.2,yes,yes,no,good,no,no,ckd
1,1,7.0,50.0,1.02,4.0,0.0,,normal,notpresent,notpresent,...,38,6000,,no,no,no,good,no,no,ckd
2,2,62.0,80.0,1.01,2.0,3.0,normal,normal,notpresent,notpresent,...,31,7500,,no,yes,no,poor,no,yes,ckd
3,3,48.0,70.0,1.005,4.0,0.0,normal,abnormal,present,notpresent,...,32,6700,3.9,yes,no,no,poor,yes,yes,ckd
4,4,51.0,80.0,1.01,2.0,0.0,normal,normal,notpresent,notpresent,...,35,7300,4.6,no,no,no,good,no,no,ckd


In [11]:
kidney_df[kidney_df['age'].isna()]

Unnamed: 0,id,age,bp,sg,al,su,rbc,pc,pcc,ba,...,pcv,wc,rc,htn,dm,cad,appet,pe,ane,classification
30,30,,70.0,,,,,,notpresent,notpresent,...,,,,yes,yes,no,good,no,no,ckd
73,73,,100.0,1.015,2.0,0.0,abnormal,abnormal,notpresent,notpresent,...,14.0,6300.0,,yes,no,no,good,yes,yes,ckd
112,112,,60.0,1.015,3.0,0.0,abnormal,abnormal,notpresent,notpresent,...,33.0,,,no,no,no,good,no,no,ckd
116,116,,70.0,1.015,4.0,0.0,abnormal,normal,notpresent,notpresent,...,,,,no,no,no,good,yes,no,ckd
117,117,,70.0,1.02,0.0,0.0,,,notpresent,notpresent,...,37.0,9800.0,4.4,no,no,no,good,no,no,ckd
169,169,,70.0,1.01,0.0,2.0,,normal,notpresent,notpresent,...,27.0,,,yes,yes,no,good,no,yes,ckd
191,191,,70.0,1.01,3.0,0.0,normal,normal,notpresent,notpresent,...,26.0,9200.0,3.4,yes,yes,no,poor,no,no,ckd
203,203,,90.0,,,,,,notpresent,notpresent,...,,,,yes,yes,no,good,no,yes,ckd
268,268,,80.0,,,,,,notpresent,notpresent,...,53.0,8500.0,4.9,no,no,no,good,no,no,notckd


In [12]:
kidney_df = kidney_df[kidney_df['age'].notna()]
kidney_df['age'] = kidney_df['age'].astype(int)

In [13]:
kidney_df['blood_pressure'] = kidney_df['bp'].apply(lambda x: bp_to_cat(ap_lo=x))

In [14]:
kidney_df['hypertension'] = kidney_df['blood_pressure'].apply(lambda x: 1 if x>=3 else 0)

In [15]:
kidney_df['glucose'] = kidney_df['bgr'].apply(gluc_to_cat)

In [16]:
kidney_df['diabetes'] = kidney_df['glucose'].apply(lambda x: 1 if x >= 2 else 0)

In [17]:
yesno_to_bin = lambda x: 1 if x=='yes' else 0

In [18]:
kidney_df['appetite'] = kidney_df['appet'].apply(yesno_to_bin)

In [19]:
kidney_df['pedal_edema'] = kidney_df['pe'].apply(yesno_to_bin)

In [20]:
kidney_df['anemia'] = kidney_df['ane'].apply(yesno_to_bin)

In [21]:
kidney_df['cad'] = kidney_df['cad'].apply(yesno_to_bin)

In [22]:
kidney_df['kidney_disease'] = kidney_df['classification'].apply(lambda x: 1 if x == 'notckd' else 0)

In [23]:
kidney_df.head()

Unnamed: 0,id,age,bp,sg,al,su,rbc,pc,pcc,ba,...,ane,classification,blood_pressure,hypertension,glucose,diabetes,appetite,pedal_edema,anemia,kidney_disease
0,0,48,80.0,1.02,1.0,0.0,,normal,notpresent,notpresent,...,no,ckd,1,0,1,0,0,0,0,0
1,1,7,50.0,1.02,4.0,0.0,,normal,notpresent,notpresent,...,no,ckd,1,0,2,1,0,0,0,0
2,2,62,80.0,1.01,2.0,3.0,normal,normal,notpresent,notpresent,...,yes,ckd,1,0,2,1,0,0,1,0
3,3,48,70.0,1.005,4.0,0.0,normal,abnormal,present,notpresent,...,yes,ckd,1,0,1,0,0,1,1,0
4,4,51,80.0,1.01,2.0,0.0,normal,normal,notpresent,notpresent,...,no,ckd,1,0,1,0,0,0,0,0


In [24]:
kidney_df.columns

Index(['id', 'age', 'bp', 'sg', 'al', 'su', 'rbc', 'pc', 'pcc', 'ba', 'bgr',
       'bu', 'sc', 'sod', 'pot', 'hemo', 'pcv', 'wc', 'rc', 'htn', 'dm', 'cad',
       'appet', 'pe', 'ane', 'classification', 'blood_pressure',
       'hypertension', 'glucose', 'diabetes', 'appetite', 'pedal_edema',
       'anemia', 'kidney_disease'],
      dtype='object')

In [25]:
columns = ['age', 'cad', 'blood_pressure',
       'hypertension', 'glucose', 'diabetes', 'appetite', 'pedal_edema',
       'anemia', 'kidney_disease']
kidney_df.to_csv(os.path.join(folder_preprocessed, "kidney.csv"), columns=columns)

## Ocular disease

In [108]:
ocular_df = pd.read_csv(os.path.join(folder_raw, "ocular_disease.csv"))

In [109]:
del ocular_df['filepath']
del ocular_df['filename']
del ocular_df['target']
del ocular_df['labels']
del ocular_df['Left-Fundus']
del ocular_df['Right-Fundus']


In [110]:
ocular_df.head()

Unnamed: 0,ID,Patient Age,Patient Sex,Left-Diagnostic Keywords,Right-Diagnostic Keywords,N,D,G,C,A,H,M,O
0,0,69,Female,cataract,normal fundus,0,0,0,1,0,0,0,0
1,1,57,Male,normal fundus,normal fundus,1,0,0,0,0,0,0,0
2,2,42,Male,laser spot，moderate non proliferative retinopathy,moderate non proliferative retinopathy,0,1,0,0,0,0,0,1
3,4,53,Male,macular epiretinal membrane,mild nonproliferative retinopathy,0,1,0,0,0,0,0,1
4,5,50,Female,moderate non proliferative retinopathy,moderate non proliferative retinopathy,0,1,0,0,0,0,0,0


In [111]:
ocular_df['id'] = ocular_df['ID']

In [112]:
ocular_df['age'] = ocular_df['Patient Age']

In [113]:
ocular_df['gender'] = ocular_df['Patient Sex'].apply(lambda s: 0 if s=='Male' else 1)

In [114]:
ocular_df.rename(columns={'D': 'diabetes', 'G': 'glaucoma', 'C': 'cataract', 'A': 'degeneration', 'M': 'myopia', 'O': 'other', 'H': 'hypertension', 'N': 'normal' }, inplace=True)


In [115]:
columns = ['age', 'gender', 'normal', 'diabetes', 'glaucoma', 'cataract', 'hypertension', 'myopia', 'other']
ocular_df.to_csv(os.path.join(folder_preprocessed, "ocular.csv"), columns=columns)

In [116]:
ocular_diagnostics = set()

In [117]:
def add_keywords(diagnostics: str) -> None:
    l = diagnostics.replace('，', ',').split(',')
    for elem in l:
        ocular_diagnostics.add(elem)

In [118]:
ocular_df['Left-Diagnostic Keywords'].apply(add_keywords)
ocular_df['Right-Diagnostic Keywords'].apply(add_keywords)
pass

In [119]:
ocular_diagnostics_df = pd.DataFrame.from_dict(ocular_diagnostics)

In [120]:
ocular_diagnostics_df.index.names = ['id']
ocular_diagnostics_df.rename(columns={0: 'diagnostic'}, inplace=True)

In [121]:
eye_diagnostics = []

In [122]:
for index, row in ocular_df.iterrows():
    for diagnostic in row['Left-Diagnostic Keywords'].replace('，', ',').split(','):
        diag_id = ocular_diagnostics_df.index[ocular_diagnostics_df['diagnostic'] == diagnostic][0]
        eye_diagnostics.append({'case_id': index, 'diag_id': diag_id, 'eye': 0})
    for diagnostic in row['Right-Diagnostic Keywords'].replace('，', ',').split(','):
        diag_id = ocular_diagnostics_df.index[ocular_diagnostics_df['diagnostic'] == diagnostic][0]
        eye_diagnostics.append({'case_id': index, 'diag_id': diag_id, 'eye': 1})

In [123]:
len(eye_diagnostics)

14039

In [124]:
eye_diagnostics = pd.DataFrame(eye_diagnostics)

In [125]:
ocular_diagnostics_df.to_csv(os.path.join(folder_preprocessed, "ocular_diag_keywords.csv"))
ocular_diagnostics_df.head()

Unnamed: 0_level_0,diagnostic
id,Unnamed: 1_level_1
0,retinal pigment epithelial hypertrophy
1,proliferative diabetic retinopathy
2,central serous chorioretinopathy
3,hypertensive retinopathy
4,pigment epithelium proliferation


In [126]:
eye_diagnostics.to_csv(os.path.join(folder_preprocessed, "eye_diagnostics.csv"))
eye_diagnostics.head()

Unnamed: 0,case_id,diag_id,eye
0,0,91,0
1,0,61,1
2,1,61,0
3,1,61,1
4,2,19,0


## India Covid

In [131]:
covid_india_df = pd.read_csv(os.path.join(folder_raw, "covid_19_india.csv"))

In [132]:
del covid_india_df['Time']
del covid_india_df['Sno']

In [133]:
covid_india_df.head()

Unnamed: 0,Date,State/UnionTerritory,ConfirmedIndianNational,ConfirmedForeignNational,Cured,Deaths,Confirmed
0,30/01/20,Kerala,1,0,0,0,1
1,31/01/20,Kerala,1,0,0,0,1
2,01/02/20,Kerala,2,0,0,0,2
3,02/02/20,Kerala,3,0,0,0,3
4,03/02/20,Kerala,3,0,0,0,3


In [134]:
covid_india_df.rename(columns={'Date': 'date', 'State/UnionTerritory': 'state', 'ConfirmedIndianNational': 'indians',
 'ConfirmedForeignNational': 'foreigns', 'Cured': 'cured', 'Deaths': 'deaths', 'Confirmed': 'confirmed'}, inplace=True)

In [135]:
def dmy_to_iso(date: str) -> str:
    d, m, y = date.split('/')
    return f'20{y}-{m}-{d}'

In [136]:
covid_india_df['date'] = covid_india_df['date'].apply(dmy_to_iso)

In [137]:
indian_states = pd.DataFrame(covid_india_df['state'].unique())
indian_states.rename(columns={0:'state'}, inplace=True)
indian_states.to_csv(os.path.join(folder_preprocessed, "indian_states.csv"))
indian_states.head()

Unnamed: 0,state
0,Kerala
1,Telengana
2,Delhi
3,Rajasthan
4,Uttar Pradesh


In [138]:
covid_india_df['state'] = covid_india_df['state'].map(lambda state: indian_states.index[indian_states['state'] == state][0])

In [139]:
covid_india_df.to_csv(os.path.join(folder_preprocessed, "covid_india.csv"))
covid_india_df

Unnamed: 0,date,state,indians,foreigns,cured,deaths,confirmed
0,2020-01-30,0,1,0,0,0,1
1,2020-01-31,0,1,0,0,0,1
2,2020-02-01,0,2,0,0,0,2
3,2020-02-02,0,3,0,0,0,3
4,2020-02-03,0,3,0,0,0,3
...,...,...,...,...,...,...,...
9286,2020-12-09,1,-,-,266120,1480,275261
9287,2020-12-09,31,-,-,32169,373,32945
9288,2020-12-09,13,-,-,72435,1307,79141
9289,2020-12-09,4,-,-,528832,7967,558173


## Covid Italy

In [76]:
covid_italy_df = ocular_df = pd.read_csv(os.path.join(folder_raw, "covid19_italy_province.csv"))

In [77]:
del covid_italy_df['Country']
covid_italy_df.rename(columns={'TotalPositiveCases': 'cases'}, inplace=True)

In [78]:
covid_italy_df.head()

Unnamed: 0,SNo,Date,RegionCode,RegionName,ProvinceCode,ProvinceName,ProvinceAbbreviation,Latitude,Longitude,cases
0,0,2020-02-24T18:00:00,13,Abruzzo,66,L'Aquila,AQ,42.351222,13.398438,0
1,1,2020-02-24T18:00:00,13,Abruzzo,67,Teramo,TE,42.658918,13.7044,0
2,2,2020-02-24T18:00:00,13,Abruzzo,68,Pescara,PE,42.464584,14.213648,0
3,3,2020-02-24T18:00:00,13,Abruzzo,69,Chieti,CH,42.351032,14.167546,0
4,4,2020-02-24T18:00:00,13,Abruzzo,979,In fase di definizione/aggiornamento,,,,0


In [79]:
covid_italy_df.columns = covid_italy_df.columns.str.lower()
covid_italy_df.head()

Unnamed: 0,sno,date,regioncode,regionname,provincecode,provincename,provinceabbreviation,latitude,longitude,cases
0,0,2020-02-24T18:00:00,13,Abruzzo,66,L'Aquila,AQ,42.351222,13.398438,0
1,1,2020-02-24T18:00:00,13,Abruzzo,67,Teramo,TE,42.658918,13.7044,0
2,2,2020-02-24T18:00:00,13,Abruzzo,68,Pescara,PE,42.464584,14.213648,0
3,3,2020-02-24T18:00:00,13,Abruzzo,69,Chieti,CH,42.351032,14.167546,0
4,4,2020-02-24T18:00:00,13,Abruzzo,979,In fase di definizione/aggiornamento,,,,0


In [80]:
covid_italy_df['date'] = covid_italy_df['date'].apply(lambda s: s.split('T')[0])

In [81]:
italy_regions = pd.DataFrame(covid_italy_df['regionname'].unique())
italy_regions.rename(columns={0:'region'}, inplace=True)
italy_regions.to_csv(os.path.join(folder_preprocessed, "italy_regions.csv"))
italy_regions.head()

Unnamed: 0,region
0,Abruzzo
1,Basilicata
2,Calabria
3,Campania
4,Emilia-Romagna


In [82]:
provinces_unique = set()
provinces = []

for index, row in covid_italy_df.iterrows():
    if row['provincename'] in provinces_unique:
        continue
    region_code = italy_regions.index[italy_regions['region'] == row['regionname']][0]
    province_name = row['provincename']
    province_abbr = row['provinceabbreviation']

    provinces.append({"region": region_code, "province": province_name, "abbreviation": province_abbr})


In [127]:
provinces_df = pd.DataFrame(provinces)
provinces_df.to_csv(os.path.join(folder_preprocessed, "italy_provinces.csv"))
provinces_df.head()

Unnamed: 0,region,province,abbreviation
0,0,L'Aquila,AQ
1,0,Teramo,TE
2,0,Pescara,PE
3,0,Chieti,CH
4,0,In fase di definizione/aggiornamento,


In [84]:
covid_italy_df['province'] = covid_italy_df['provincename'].apply(lambda x: provinces_df.index[provinces_df['province'] == x][0])

In [85]:
columns = ['date', 'province', 'cases', 'latitude', 'longitude']
covid_italy_df.to_csv(os.path.join(folder_preprocessed, "covid_italy.csv"), columns=columns)
covid_italy_df.head()

Unnamed: 0,sno,date,regioncode,regionname,provincecode,provincename,provinceabbreviation,latitude,longitude,cases,province
0,0,2020-02-24,13,Abruzzo,66,L'Aquila,AQ,42.351222,13.398438,0,0
1,1,2020-02-24,13,Abruzzo,67,Teramo,TE,42.658918,13.7044,0,1
2,2,2020-02-24,13,Abruzzo,68,Pescara,PE,42.464584,14.213648,0,2
3,3,2020-02-24,13,Abruzzo,69,Chieti,CH,42.351032,14.167546,0,3
4,4,2020-02-24,13,Abruzzo,979,In fase di definizione/aggiornamento,,,,0,4


## Categories

In [86]:
eye_cat = pd.DataFrame([{'cat': 'left'}, {'cat': 'right'}])
eye_cat.to_csv(os.path.join(folder_preprocessed, "cat_eye.csv"))
eye_cat

Unnamed: 0,cat
0,left
1,right


In [87]:
l = ['low (hypotension)', 'normal', 'elevated', 'grade 1 hypertension ', 'grade 2 hypertension', 'hypertension crisis']
bp_cat = pd.DataFrame([{'cat': x} for x in l])
bp_cat.to_csv(os.path.join(folder_preprocessed, "cat_bp.csv"))
bp_cat

Unnamed: 0,cat
0,low (hypotension)
1,normal
2,elevated
3,grade 1 hypertension
4,grade 2 hypertension
5,hypertension crisis


In [88]:
gender_cat = pd.DataFrame([{'cat': 'male'}, {'cat': 'female'}])
gender_cat.to_csv(os.path.join(folder_preprocessed, "cat_gender.csv"))
gender_cat

Unnamed: 0,cat
0,male
1,female


In [89]:
binary_cat = pd.DataFrame([{'cat': 'no'}, {'cat': 'yes'}])
binary_cat.to_csv(os.path.join(folder_preprocessed, "cat_binary.csv"))
binary_cat

Unnamed: 0,cat
0,no
1,yes


In [90]:
examination_cat = pd.DataFrame([{'cat': 'normal'}, {'cat': 'above normal'}, {'cat': 'well above normal'}])
examination_cat.to_csv(os.path.join(folder_preprocessed, "cat_examination.csv"))
examination_cat

Unnamed: 0,cat
0,normal
1,above normal
2,well above normal


In [91]:
chest_pain_cat = pd.DataFrame([{"cat": "typical angina"}, {"cat": "atypical angina"}, {"cat": "non-anginal pain"}, {"cat": "asymptomatic"}])
chest_pain_cat.to_csv(os.path.join(folder_preprocessed, "cat_chest_pain.csv"))
chest_pain_cat

Unnamed: 0,cat
0,typical angina
1,atypical angina
2,non-anginal pain
3,asymptomatic
