# Related imports

In [1]:
import os
import numpy as np
import pandas as pd 
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from collections import Counter
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestRegressor

In [2]:
meta = pd.read_csv('./ehr_dataset_last.csv')

# Missing data handling

In [3]:
#remove patients with misisng gender 
meta = meta[meta.gender.isna() == False]

In [4]:
meta

Unnamed: 0,dicom_id,cxr_view,cxr_orientation,subject_id,gender,age,stay_expire,icu_stay,ageR5,ageR10,path
0,02aa804e-bde0afdd-112c0b34-7bc16630-4e384014,PA,Erect,10000032,F,52.0,0,0,50-55,50-60,/scratch/fs999/shamoutlab/data/physionet.org/f...
1,174413ec-4ec4c1f7-34ea26b7-c5f994f8-79ef1962,LATERAL,Erect,10000032,F,52.0,0,0,50-55,50-60,/scratch/fs999/shamoutlab/data/physionet.org/f...
2,2a2277a9-b0ded155-c0de8eb9-c124d10e-82c5caab,PA,Erect,10000032,F,52.0,0,0,50-55,50-60,/scratch/fs999/shamoutlab/data/physionet.org/f...
3,e084de3b-be89b11e-20fe3f9f-9c8d8dfe-4cfd202c,LATERAL,Erect,10000032,F,52.0,0,0,50-55,50-60,/scratch/fs999/shamoutlab/data/physionet.org/f...
4,68b5c4b1-227d0485-9cc38c3f-7b84ab51-4b472714,AP,,10000032,F,52.0,0,1,50-55,50-60,/scratch/fs999/shamoutlab/data/physionet.org/f...
...,...,...,...,...,...,...,...,...,...,...,...
377105,428e2c18-5721d8f3-35a05001-36f3d080-9053b83c,PA,Erect,19999733,F,19.0,0,0,<20,<20,/scratch/fs999/shamoutlab/data/physionet.org/f...
377106,58c403aa-35ff8bd9-73e39f54-8dc9cc5d-e0ec3fa9,LATERAL,Erect,19999733,F,19.0,0,0,<20,<20,/scratch/fs999/shamoutlab/data/physionet.org/f...
377107,58766883-376a15ce-3b323a28-6af950a0-16b793bd,AP,Erect,19999987,F,57.0,0,1,55-60,50-60,/scratch/fs999/shamoutlab/data/physionet.org/f...
377108,7ba273af-3d290f8d-e28d0ab4-484b7a86-7fc12b08,AP,Erect,19999987,F,57.0,0,1,55-60,50-60,/scratch/fs999/shamoutlab/data/physionet.org/f...


In [5]:
# check missingniss in the dataset
meta.isna().sum()

dicom_id               0
cxr_view           15757
cxr_orientation    41100
subject_id             0
gender                 0
age                    0
stay_expire            0
icu_stay               0
ageR5                  0
ageR10                 0
path                   0
dtype: int64

In [6]:
Counter(meta.cxr_view)

Counter({'AP': 146879,
         'PA': 95885,
         'LATERAL': 82598,
         'LL': 35082,
         nan: 15757,
         'PA LLD': 4,
         'LAO': 3,
         'RAO': 3,
         'AP LLD': 2,
         'XTABLE LATERAL': 2,
         'AP RLD': 2,
         'SWIMMERS': 1,
         'PA RLD': 1,
         'AP AXIAL': 1,
         'LPO': 1})

In [7]:
# limit the dataset to the views AP, PA, Lateral, LL and missing (nan)
meta = meta[meta.cxr_view.isin(['AP','PA','LATERAL','LL',np.nan])]

In [8]:
meta_image = meta[['cxr_view','cxr_orientation']]

In [9]:
meta_image

Unnamed: 0,cxr_view,cxr_orientation
0,PA,Erect
1,LATERAL,Erect
2,PA,Erect
3,LATERAL,Erect
4,AP,
...,...,...
377105,PA,Erect
377106,LATERAL,Erect
377107,AP,Erect
377108,AP,Erect


In [10]:
le1 = LabelEncoder()
le2 = LabelEncoder()

In [11]:
impute_col = 'cxr_view'
impute_col_en = 'cxr_view_en'
le1.fit(meta_image.loc[:,impute_col])
print(dict(zip(le1.classes_,le1.transform(le1.classes_))))
meta_image[impute_col_en] = le1.transform(meta_image[impute_col])

{'AP': 0, 'LATERAL': 1, 'LL': 2, 'PA': 3, nan: 4}


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  meta_image[impute_col_en] = le1.transform(meta_image[impute_col])


In [12]:
impute_col = 'cxr_orientation'
impute_col_en = 'cxr_orientation_en'
le2.fit(meta_image.loc[:,impute_col])
print(dict(zip(le2.classes_,le2.transform(le2.classes_))))
meta_image[impute_col_en] = le2.transform(meta_image[impute_col])

{'Erect': 0, 'Recumbent': 1, nan: 2}


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  meta_image[impute_col_en] = le2.transform(meta_image[impute_col])


In [13]:
meta_image

Unnamed: 0,cxr_view,cxr_orientation,cxr_view_en,cxr_orientation_en
0,PA,Erect,3,0
1,LATERAL,Erect,1,0
2,PA,Erect,3,0
3,LATERAL,Erect,1,0
4,AP,,0,2
...,...,...,...,...
377105,PA,Erect,3,0
377106,LATERAL,Erect,1,0
377107,AP,Erect,0,0
377108,AP,Erect,0,0


In [14]:
meta_image['cxr_view_en'] = meta_image['cxr_view_en'].map(lambda x: np.nan if x == 4 else x)
meta_image['cxr_orientation_en'] = meta_image['cxr_orientation_en'].map(lambda x: np.nan if x == 2 else x)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  meta_image['cxr_view_en'] = meta_image['cxr_view_en'].map(lambda x: np.nan if x == 4 else x)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  meta_image['cxr_orientation_en'] = meta_image['cxr_orientation_en'].map(lambda x: np.nan if x == 2 else x)


In [15]:
meta_image_en = meta_image.loc[:,'cxr_view_en':]
meta_image_en.head()

Unnamed: 0,cxr_view_en,cxr_orientation_en
0,3.0,0.0
1,1.0,0.0
2,3.0,0.0
3,1.0,0.0
4,0.0,


In [16]:
estimator = RandomForestRegressor()

In [17]:
imputer = IterativeImputer(estimator=estimator,max_iter=10, random_state=24)

In [18]:
imputer.fit(meta_image_en)

In [19]:
imputed_cols = imputer.transform(meta_image_en)
meta_image.loc[:,'cxr_view_en':] = imputed_cols.round()

In [20]:
meta.cxr_view = list(le1.inverse_transform(meta_image.cxr_view_en.astype('int')))
meta.cxr_orientation = list(le2.inverse_transform(meta_image.cxr_orientation_en.astype('int')))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  meta.cxr_view = list(le1.inverse_transform(meta_image.cxr_view_en.astype('int')))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  meta.cxr_orientation = list(le2.inverse_transform(meta_image.cxr_orientation_en.astype('int')))


In [21]:
meta['male'] = meta['gender'].apply(lambda x: 0. if x == 'F' else 1.)
meta['female'] = meta['gender'].apply(lambda x: 0. if x == 'M' else 1.)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  meta['male'] = meta['gender'].apply(lambda x: 0. if x == 'F' else 1.)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  meta['female'] = meta['gender'].apply(lambda x: 0. if x == 'M' else 1.)


In [22]:
meta['age_norm'] = meta.age.apply(lambda x: (x-16)/(100-16))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  meta['age_norm'] = meta.age.apply(lambda x: (x-16)/(100-16))


In [23]:
meta.isna().sum()

dicom_id           0
cxr_view           0
cxr_orientation    0
subject_id         0
gender             0
age                0
stay_expire        0
icu_stay           0
ageR5              0
ageR10             0
path               0
male               0
female             0
age_norm           0
dtype: int64

# Datasets creation

In [26]:
sorted(meta.ageR5.unique())

['20-25',
 '25-30',
 '30-35',
 '35-40',
 '40-45',
 '45-50',
 '50-55',
 '55-60',
 '60-65',
 '65-70',
 '70-75',
 '75-80',
 '80-85',
 '85-90',
 '90+',
 '<20']

In [27]:
meta['<20'] = meta.ageR5.apply(lambda x: 1. if x == '<20' else 0.)
meta['20-25'] = meta.ageR5.apply(lambda x: 1. if x == '20-25' else 0.)
meta['25-30'] = meta.ageR5.apply(lambda x: 1. if x == '25-30' else 0.)
meta['30-35'] = meta.ageR5.apply(lambda x: 1. if x == '30-35' else 0.)
meta['35-40'] = meta.ageR5.apply(lambda x: 1. if x == '35-40' else 0.)
meta['40-45'] = meta.ageR5.apply(lambda x: 1. if x == '40-45' else 0.)
meta['45-50'] = meta.ageR5.apply(lambda x: 1. if x == '45-50' else 0.)
meta['50-55'] = meta.ageR5.apply(lambda x: 1. if x == '50-55' else 0.)
meta['55-60'] = meta.ageR5.apply(lambda x: 1. if x == '55-60' else 0.)
meta['60-65'] = meta.ageR5.apply(lambda x: 1. if x == '60-65' else 0.)
meta['65-70'] = meta.ageR5.apply(lambda x: 1. if x == '65-70' else 0.)
meta['70-75'] = meta.ageR5.apply(lambda x: 1. if x == '70-75' else 0.)
meta['75-80'] = meta.ageR5.apply(lambda x: 1. if x == '75-80' else 0.)
meta['80-85'] = meta.ageR5.apply(lambda x: 1. if x == '80-85' else 0.)
meta['85-90'] = meta.ageR5.apply(lambda x: 1. if x == '85-90' else 0.)
meta['90+'] = meta.ageR5.apply(lambda x: 1. if x == '90+' else 0.)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  meta['<20'] = meta.ageR5.apply(lambda x: 1. if x == '<20' else 0.)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  meta['20-25'] = meta.ageR5.apply(lambda x: 1. if x == '20-25' else 0.)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  meta['25-30'] = meta.ageR5.apply(lambda x: 1. if x == '25-30' else 

In [28]:
meta.columns

Index(['dicom_id', 'cxr_view', 'cxr_orientation', 'subject_id', 'gender',
       'age', 'stay_expire', 'icu_stay', 'ageR5', 'ageR10', 'path', 'male',
       'female', 'age_norm', '<20', '20-25', '25-30', '30-35', '35-40',
       '40-45', '45-50', '50-55', '55-60', '60-65', '65-70', '70-75', '75-80',
       '80-85', '85-90', '90+'],
      dtype='object')

In [29]:
meta['<20*'] = meta.ageR10.apply(lambda x: 1. if x == '<20' else 0.)
meta['20-30'] = meta.ageR10.apply(lambda x: 1. if x == '20-30' else 0.)
meta['30-40'] = meta.ageR10.apply(lambda x: 1. if x == '30-40' else 0.)
meta['40-50'] = meta.ageR10.apply(lambda x: 1. if x == '40-50' else 0.)
meta['50-60'] = meta.ageR10.apply(lambda x: 1. if x == '50-60' else 0.)
meta['60-70'] = meta.ageR10.apply(lambda x: 1. if x == '60-70' else 0.)
meta['70-80'] = meta.ageR10.apply(lambda x: 1. if x == '70-80' else 0.)
meta['80-90'] = meta.ageR10.apply(lambda x: 1. if x == '80-90' else 0.)
meta['90+*'] = meta.ageR10.apply(lambda x: 1. if x == '90+' else 0.)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  meta['<20*'] = meta.ageR10.apply(lambda x: 1. if x == '<20' else 0.)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  meta['20-30'] = meta.ageR10.apply(lambda x: 1. if x == '20-30' else 0.)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  meta['30-40'] = meta.ageR10.apply(lambda x: 1. if x == '30-40' e

In [30]:
meta.columns

Index(['dicom_id', 'cxr_view', 'cxr_orientation', 'subject_id', 'gender',
       'age', 'stay_expire', 'icu_stay', 'ageR5', 'ageR10', 'path', 'male',
       'female', 'age_norm', '<20', '20-25', '25-30', '30-35', '35-40',
       '40-45', '45-50', '50-55', '55-60', '60-65', '65-70', '70-75', '75-80',
       '80-85', '85-90', '90+', '<20*', '20-30', '30-40', '40-50', '50-60',
       '60-70', '70-80', '80-90', '90+*'],
      dtype='object')

In [31]:
meta.cxr_view.unique()

array(['PA', 'LATERAL', 'AP', 'LL'], dtype=object)

In [32]:
meta['PA'] = meta.cxr_view.apply(lambda x: 1. if x == 'PA' else 0.)
meta['LATERAL'] = meta.cxr_view.apply(lambda x: 1. if x == 'LATERAL' else 0.)
meta['AP'] = meta.cxr_view.apply(lambda x: 1. if x == 'AP' else 0.)
meta['LL'] = meta.cxr_view.apply(lambda x: 1. if x == 'LL' else 0.)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  meta['PA'] = meta.cxr_view.apply(lambda x: 1. if x == 'PA' else 0.)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  meta['LATERAL'] = meta.cxr_view.apply(lambda x: 1. if x == 'LATERAL' else 0.)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  meta['AP'] = meta.cxr_view.apply(lambda x: 1. if x == 'AP' 

In [33]:
meta.columns

Index(['dicom_id', 'cxr_view', 'cxr_orientation', 'subject_id', 'gender',
       'age', 'stay_expire', 'icu_stay', 'ageR5', 'ageR10', 'path', 'male',
       'female', 'age_norm', '<20', '20-25', '25-30', '30-35', '35-40',
       '40-45', '45-50', '50-55', '55-60', '60-65', '65-70', '70-75', '75-80',
       '80-85', '85-90', '90+', '<20*', '20-30', '30-40', '40-50', '50-60',
       '60-70', '70-80', '80-90', '90+*', 'PA', 'LATERAL', 'AP', 'LL'],
      dtype='object')

In [34]:
meta.cxr_orientation.unique()

array(['Erect', 'Recumbent'], dtype=object)

In [35]:
meta['Erect'] = meta.cxr_orientation.apply(lambda x: 1. if x == 'Erect' else 0.)
meta['Recumbent'] = meta.cxr_orientation.apply(lambda x: 1. if x == 'Recumbent' else 0.)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  meta['Erect'] = meta.cxr_orientation.apply(lambda x: 1. if x == 'Erect' else 0.)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  meta['Recumbent'] = meta.cxr_orientation.apply(lambda x: 1. if x == 'Recumbent' else 0.)


In [36]:
meta.columns

Index(['dicom_id', 'cxr_view', 'cxr_orientation', 'subject_id', 'gender',
       'age', 'stay_expire', 'icu_stay', 'ageR5', 'ageR10', 'path', 'male',
       'female', 'age_norm', '<20', '20-25', '25-30', '30-35', '35-40',
       '40-45', '45-50', '50-55', '55-60', '60-65', '65-70', '70-75', '75-80',
       '80-85', '85-90', '90+', '<20*', '20-30', '30-40', '40-50', '50-60',
       '60-70', '70-80', '80-90', '90+*', 'PA', 'LATERAL', 'AP', 'LL', 'Erect',
       'Recumbent'],
      dtype='object')

In [37]:
meta

Unnamed: 0,dicom_id,cxr_view,cxr_orientation,subject_id,gender,age,stay_expire,icu_stay,ageR5,ageR10,...,60-70,70-80,80-90,90+*,PA,LATERAL,AP,LL,Erect,Recumbent
0,02aa804e-bde0afdd-112c0b34-7bc16630-4e384014,PA,Erect,10000032,F,52.0,0,0,50-55,50-60,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
1,174413ec-4ec4c1f7-34ea26b7-c5f994f8-79ef1962,LATERAL,Erect,10000032,F,52.0,0,0,50-55,50-60,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
2,2a2277a9-b0ded155-c0de8eb9-c124d10e-82c5caab,PA,Erect,10000032,F,52.0,0,0,50-55,50-60,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
3,e084de3b-be89b11e-20fe3f9f-9c8d8dfe-4cfd202c,LATERAL,Erect,10000032,F,52.0,0,0,50-55,50-60,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
4,68b5c4b1-227d0485-9cc38c3f-7b84ab51-4b472714,AP,Erect,10000032,F,52.0,0,1,50-55,50-60,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
377105,428e2c18-5721d8f3-35a05001-36f3d080-9053b83c,PA,Erect,19999733,F,19.0,0,0,<20,<20,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
377106,58c403aa-35ff8bd9-73e39f54-8dc9cc5d-e0ec3fa9,LATERAL,Erect,19999733,F,19.0,0,0,<20,<20,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
377107,58766883-376a15ce-3b323a28-6af950a0-16b793bd,AP,Erect,19999987,F,57.0,0,1,55-60,50-60,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
377108,7ba273af-3d290f8d-e28d0ab4-484b7a86-7fc12b08,AP,Erect,19999987,F,57.0,0,1,55-60,50-60,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0


In [43]:
meta['alive'] = meta.stay_expire.apply(lambda x: 1. if x == 0 else 0.)
meta['died'] = meta.stay_expire.apply(lambda x: 1. if x == 1 else 0.)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  meta['alive'] = meta.stay_expire.apply(lambda x: 1. if x == 0 else 0.)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  meta['died'] = meta.stay_expire.apply(lambda x: 1. if x == 1 else 0.)


In [44]:
meta['icu'] = meta.icu_stay.apply(lambda x: 1. if x == 1 else 0.)
meta['not_icu'] = meta.icu_stay.apply(lambda x: 1. if x == 0 else 0.)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  meta['icu'] = meta.icu_stay.apply(lambda x: 1. if x == 1 else 0.)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  meta['not_icu'] = meta.icu_stay.apply(lambda x: 1. if x == 0 else 0.)


In [45]:
meta.head(1)

Unnamed: 0,dicom_id,cxr_view,cxr_orientation,subject_id,gender,age,stay_expire,icu_stay,ageR5,ageR10,...,PA,LATERAL,AP,LL,Erect,Recumbent,alive,died,icu,not_icu
0,02aa804e-bde0afdd-112c0b34-7bc16630-4e384014,PA,Erect,10000032,F,52.0,0,0,50-55,50-60,...,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0


In [46]:
['path', 'male','female', 'age_norm', 
 
'<20', '20-25', '25-30', '30-35', '35-40', '40-45', '45-50', '50-55', '55-60', '60-65', '65-70', '70-75', '75-80','80-85', '85-90', '90+', 
 
'<20*', '20-30', '30-40', '40-50', '50-60','60-70', '70-80', '80-90', '90+*', 
 
 'PA', 'LATERAL', 'AP', 'LL', 'Erect','Recumbent', 
 
 'alive', 'died', 'icu', 'not_icu']

['path',
 'male',
 'female',
 'age_norm',
 '<20',
 '20-25',
 '25-30',
 '30-35',
 '35-40',
 '40-45',
 '45-50',
 '50-55',
 '55-60',
 '60-65',
 '65-70',
 '70-75',
 '75-80',
 '80-85',
 '85-90',
 '90+',
 '<20*',
 '20-30',
 '30-40',
 '40-50',
 '50-60',
 '60-70',
 '70-80',
 '80-90',
 '90+*',
 'PA',
 'LATERAL',
 'AP',
 'LL',
 'Erect',
 'Recumbent',
 'alive',
 'died',
 'icu',
 'not_icu']

In [47]:
os.listdir('../data/single-variable/')

['.ipynb_checkpoints',
 'meta-mortality-only.csv',
 'meta-gender-only.csv',
 'meta-age-norm-only.csv',
 'meta-age10-only.csv',
 'meta-orientation-only.csv',
 'meta-view-only.csv',
 'meta-icu-only.csv',
 'meta-age5-only.csv']

In [48]:
meta[['path','male','female']].to_csv('../data/single-variable/meta-gender-only.csv',index=False)

In [49]:
pd.read_csv('../data/single-variable/meta-gender-only.csv').shape

(376201, 3)

In [50]:
meta[['path','age_norm']].to_csv('../data/single-variable/meta-age-norm-only.csv',index=False)

In [51]:
pd.read_csv('../data/single-variable/meta-age-norm-only.csv').shape

(376201, 2)

In [52]:
meta[['path','<20', '20-25', '25-30', '30-35', '35-40','40-45', '45-50', '50-55', '55-60', '60-65', '65-70',\
      '70-75', '75-80','80-85', '85-90', '90+']].to_csv('../data/single-variable/meta-age5-only.csv',index=False)

In [53]:
pd.read_csv('../data/single-variable/meta-age5-only.csv').shape

(376201, 17)

In [54]:
meta[['path','<20*', '20-30', '30-40', '40-50', '50-60','60-70', '70-80', '80-90', '90+*']].to_csv('../data/single-variable/meta-age10-only.csv',index=False)

In [55]:
pd.read_csv('../data/single-variable/meta-age10-only.csv').shape

(376201, 10)

In [56]:
meta[['path','PA', 'LATERAL', 'AP', 'LL']].to_csv('../data/single-variable/meta-view-only.csv',index=False)

In [58]:
pd.read_csv('../data/single-variable/meta-view-only.csv').shape

(376201, 5)

In [59]:
meta[['path', 'Erect','Recumbent']].to_csv('../data/single-variable/meta-orientation-only.csv',index=False)

In [61]:
pd.read_csv('../data/single-variable/meta-orientation-only.csv').shape

(376201, 3)

In [64]:
meta[['path', 'alive', 'died',]].to_csv('../data/single-variable/meta-mortality-only.csv',index=False)

In [65]:
pd.read_csv('../data/single-variable/meta-mortality-only.csv').shape

(376201, 3)

In [66]:
meta[['path', 'icu', 'not_icu']].to_csv('../data/single-variable/meta-icu-only.csv',index=False)

In [67]:
pd.read_csv('../data/single-variable/meta-icu-only.csv').shape

(376201, 3)

In [68]:
meta[['path', 'male','female', 'age_norm' ]].to_csv('../data/single-group/socio-norm.csv',index=False)

In [69]:
pd.read_csv('../data/single-group/socio-norm.csv').shape

(376201, 4)

In [70]:
meta[['path', 'male','female', '<20', '20-25', '25-30', '30-35', '35-40',
       '40-45', '45-50', '50-55', '55-60', '60-65', '65-70', '70-75', '75-80',
        '80-85', '85-90', '90+', ]].to_csv('../data/single-group/socio-age5.csv',index=False)

In [71]:
pd.read_csv('../data/single-group/socio-age5.csv').shape

(376201, 19)

In [72]:
meta[['path','male','female', '<20*', '20-30', '30-40', '40-50', '50-60',
        '60-70', '70-80', '80-90', '90+*'  ]].to_csv('../data/single-group/socio-age10.csv',index=False)

In [74]:
pd.read_csv('../data/single-group/socio-age10.csv').shape

(376201, 12)

In [75]:
meta[['path', 'PA', 'LATERAL', 'AP', 'LL', 'Erect','Recumbent' ]].to_csv('../data/single-group/image_meta.csv',index=False)

In [76]:
pd.read_csv('../data/single-group/image_meta.csv').shape

(376201, 7)

In [77]:
meta[['path', 'alive', 'died', 'icu', 'not_icu' ]].to_csv('../data/single-group/clinical.csv',index=False)

In [79]:
pd.read_csv('../data/single-group/clinical.csv').shape

(376201, 5)

In [80]:
meta[['path', 'male','female', 'age_norm', 'PA', 'LATERAL', 'AP', 'LL', 'Erect','Recumbent']].to_csv('../data/combinations/socio-age-norm-image-meta.csv',index=False)

In [81]:
pd.read_csv('../data/combinations/socio-age-norm-image-meta.csv').shape

(376201, 10)

In [82]:
meta[['path', 'male','female', 'age_norm', 'alive', 'died', 'icu', 'not_icu']].to_csv('../data/combinations/socio-age-norm-clinical.csv',index=False)

In [83]:
pd.read_csv('../data/combinations/socio-age-norm-clinical.csv').shape

(376201, 8)

In [84]:
meta[['path', 'male','female', '<20', '20-25', '25-30', '30-35', '35-40',
       '40-45', '45-50', '50-55', '55-60', '60-65', '65-70', '70-75', '75-80',
        '80-85', '85-90', '90+','PA', 'LATERAL', 'AP', 'LL', 'Erect','Recumbent']].to_csv('../data/combinations/socio-age-5-image-meta.csv',index=False)

In [85]:
pd.read_csv('../data/combinations/socio-age-5-image-meta.csv').shape

(376201, 25)

In [86]:
meta[['path', 'male','female', '<20', '20-25', '25-30', '30-35', '35-40',
       '40-45', '45-50', '50-55', '55-60', '60-65', '65-70', '70-75', '75-80',
        '80-85', '85-90', '90+','alive', 'died', 'icu', 'not_icu']].to_csv('../data/combinations/socio-age-5-clinical.csv',index=False)

In [90]:
pd.read_csv('../data/combinations/socio-age-5-clinical.csv').shape

(376201, 23)

In [91]:
meta[['path', 'male','female', '<20*', '20-30', '30-40', '40-50', '50-60',
        '60-70', '70-80', '80-90', '90+*','PA', 'LATERAL', 'AP', 'LL', 'Erect','Recumbent']].to_csv('../data/combinations/socio-age-10-image-meta.csv',index=False)

In [93]:
pd.read_csv('../data/combinations/socio-age-10-image-meta.csv').shape

(376201, 18)

In [84]:
meta[['path', 'male','female', '<20*', '20-30', '30-40', '40-50', '50-60',
        '60-70', '70-80', '80-90', '90+*','alive', 'died', 'icu', 'not_icu']].to_csv('../data/combinations/socio-age-10-clinical.csv',index=False)

In [94]:
pd.read_csv('../data/combinations/socio-age-10-clinical.csv').shape

(376201, 16)

In [95]:
meta[['path', 'PA', 'LATERAL', 'AP', 'LL', 'Erect','Recumbent','alive', 'died', 'icu', 'not_icu' ]].to_csv('../data/combinations/image-meta-clinical.csv',index=False)

In [96]:
pd.read_csv('../data/combinations/image-meta-clinical.csv').shape

(376201, 11)

In [97]:
meta[['path', 'male','female', 'age_norm','PA', 'LATERAL', 'AP', 'LL', 'Erect','Recumbent', 'alive', 'died', 'icu', 'not_icu']].to_csv('../data/all/all-age-norm.csv',index=False)

In [98]:
pd.read_csv('../data/all/all-age-norm.csv').shape

(376201, 14)

In [99]:
meta[['path', 'male','female', '<20', '20-25', '25-30', '30-35', '35-40', '40-45', '45-50', '50-55', '55-60', '60-65', '65-70', '70-75', '75-80','80-85', '85-90', '90+','PA', 'LATERAL', 'AP', 'LL', 'Erect','Recumbent', 'alive', 'died', 'icu', 'not_icu']].to_csv('../data/all/all-age-5.csv',index=False)

In [100]:
pd.read_csv('../data/all/all-age-5.csv').shape

(376201, 29)

In [92]:
meta[['path', 'male','female', '<20*', '20-30', '30-40', '40-50', '50-60','60-70', '70-80', '80-90', '90+*', 'PA', 'LATERAL', 'AP', 'LL', 'Erect','Recumbent', 'alive', 'died', 'icu', 'not_icu']].to_csv('../data/all/all-age-10.csv',index=False)

In [101]:
pd.read_csv('../data/all/all-age-10.csv').shape

(376201, 22)

In [333]:
meta.to_csv('../data/preprocessed.csv',index=False)