# Data processing for the Organisational Audit Portfolio spreadsheet 3: rename data contents

Make the data easier to wrangle. For example, change rows with long string values to shorter or numerical values.

In [1]:
import os
import pandas as pd
import numpy as np  # for NaN

### Import data

In [2]:
dir_files = '../data/organisational_audit/processed'
file_excel = 'processed_2019_portfolio_key_indicators_summary.csv'

In [3]:
df = pd.read_csv(os.path.join(dir_files, file_excel), index_col=0)

In [4]:
df

Unnamed: 0_level_0,ki_total,ki1,ki1_nurses6_wte,ki1_nurses7_wte,ki2,ki2_psych_wte,ki3,ki3_strokenurse_outofhours,ki3_strokenurse_bed1_weekdays10pm,ki3_strokenurse_bed1_saturdays,...,ki8,ki8_patientcarersurvey_frequency,ki9,ki9_tia_brainimaging_mostused,ki9_tia_outpatients_seen,ki9_tia_outpatient_timescale,ki10,ki10_management_executiveonboard,ki10_management_nonexecutiveonboard,ki10_management_chairmanofclinicalgovernance
Hospital names,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Queens Hospital Romford HASU,6.0,Yes,2.73,1.14,No,0.48,Yes,Yes,5,5,...,Yes,Continuous (every patient),No,Computed Tomography,Yes,The same day (7 days a week),Yes,Yes,Yes,Yes
Newham General Hospital,6.0,Yes,1.54,1.54,No,0.00,At site treating your patients during the firs...,At site treating your patients during the firs...,Not Applicable,Not Applicable,...,Yes,Continuous (every patient),Yes,Magnetic Resonance Imaging,Yes,The same day (5 days a week),No,No,No,No
Royal London Hospital HASU,5.0,Yes,2.85,0.38,No,0.23,Yes,Yes,4,4,...,No,Less than once a year,Yes,Magnetic Resonance Imaging,Yes,The same day (5 days a week),No,No,No,No
Whipps Cross University Hospital,3.0,Yes,2.63,0.53,No,0.00,No,No,Not Applicable,Not Applicable,...,No,Never,No,Computed Tomography,Yes,The same day (5 days a week),No,No,No,No
Charing Cross Hospital HASU,5.0,Yes,3.67,0.44,No,0.27,Yes,Yes,6,7,...,Yes,3-4 times a year,No,Computed Tomography,Yes,The same day (5 days a week),No,No,No,No
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Altnagelvin Hospital,3.0,No,1.20,0.40,No,0.00,No,No,1,1,...,Yes,1-2 times a year,Yes,Magnetic Resonance Imaging,Yes,The same day (5 days a week),No,No,No,No
South West Acute Hospital,3.0,No,1.56,0.56,No,0.00,No,No,1,1,...,Yes,1-2 times a year,No,Computed Tomography,Yes,Within a week,No,No,No,No
Noble's Hospital,3.0,Yes,5.00,0.83,No,0.00,Yes,Yes,Not Applicable,Not Applicable,...,Yes,Continuous (every patient),No,Magnetic Resonance Imaging,Yes,Within a week,No,No,No,No
Walton Centre Stroke Team,,,,,,,,,,,...,,,,,,,,,,


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 171 entries, Queens Hospital Romford HASU to Queen's Medical Centre - Nottingham
Data columns (total 47 columns):
 #   Column                                        Non-Null Count  Dtype  
---  ------                                        --------------  -----  
 0   ki_total                                      169 non-null    float64
 1   ki1                                           169 non-null    object 
 2   ki1_nurses6_wte                               169 non-null    float64
 3   ki1_nurses7_wte                               169 non-null    float64
 4   ki2                                           169 non-null    object 
 5   ki2_psych_wte                                 169 non-null    float64
 6   ki3                                           169 non-null    object 
 7   ki3_strokenurse_outofhours                    169 non-null    object 
 8   ki3_strokenurse_bed1_weekdays10pm             169 non-null    object 
 9   ki3_stroken

## Remove teams with missing data

Two stroke units have completely missing data. They appear at the end of the list and in the full dataset only have a handful of the entries completed. All other teams have complete information.

Remove these two teams now to save a bunch of extra formatting to account for the missing values.

This also allows us to use `pd.NA` not-applicable values for any 'Not Applicable' value instead of missing values.

In [6]:
# Find where the missing teams are in the DataFrame:
missing_team_inds = []

for i in range(len(df)):
    if df.iloc[i].isna().all():
        # If everything in this row is missing, then add to list:
        missing_team_inds.append(i)

# Find the names of the missing teams:
missing_teams = [df.index[i] for i in missing_team_inds]

missing_teams

['Walton Centre Stroke Team', "Queen's Medical Centre - Nottingham"]

In [7]:
# Remove these teams from the DataFrame:
for team in missing_teams:
    df = df.drop(team, axis='rows')

## Label acute units

Some columns contain mostly well-behaved data and also data that has similar contents but prefixed with "At site treating your patients during the first 72h:". These are non-acute stroke units who receive only patients after treatment at an acute unit. The phrasing in the document: "teams who do not treat patients in the first 72 hours following stroke".

Find which units have data containing that long string, label them as not acute, and remove the long string from the affected data.

In [8]:
long_str = 'At site treating your patients during the first 72h'

In [9]:
# Find columns containing the long string.
cols_with_long_str = []
for col in df.columns:
    try:
        contents = df[col].str
        if contents.contains(long_str).any():
            cols_with_long_str.append(col)
    except AttributeError:
        # Column can't be converted to strings.
        pass

cols_with_long_str

['ki3',
 'ki3_strokenurse_outofhours',
 'ki4',
 'ki4_minimum_nurse_bed1_saturdays10am',
 'ki4_minimum_nurse_bed1_sundays10am',
 'ki4_minimum_nurse_bed3_saturdays10am',
 'ki4_minimum_nurse_bed3_sundays10am',
 'ki6',
 'ki6_prealert_ivtcandidates',
 'ki6_prealert_fastpositive',
 'ki6_prealert_othersuspectedstroke',
 'ki6_prealert_strokenurse',
 'ki6_prealert_juniordoctor',
 'ki6_prealert_consultant']

In [10]:
# Find hospitals containing the long string.
teams_with_long_str = []
for team in df.index:
    try:
        contents = df.loc[team, cols_with_long_str].str
        if contents.contains(long_str).any():
            teams_with_long_str.append(team)
    except AttributeError:
        # Column can't be converted to strings.
        pass

teams_with_long_str

['Newham General Hospital',
 'St Helier Hospital',
 'Chelsea and Westminster Hospital',
 'West Middlesex University Hospital',
 'Croydon University Hospital',
 'St Thomas Hospital',
 'Hillingdon Hospital',
 'Homerton University Hospital',
 'Kingston Hospital',
 'University Hospital Lewisham',
 'North Middlesex Hospital',
 'Royal Free Hospital',
 'Barnet General Hospital',
 'Bedford Hospital',
 'Good Hope General Hospital',
 'Solihull Hospital',
 'Royal Bolton Hospital',
 'Trafford General Hospital',
 'Tameside General Hospital',
 'Royal Albert Edward Infirmary',
 'Warrington Hospital',
 'Hexham General Hospital',
 'North Tyneside General Hospital',
 'Wansbeck General Hospital',
 'Queen Elizabeth Hospital Gateshead',
 'Barnsley Hospital',
 'Royal Surrey County Hospital']

In [11]:
# Label acute teams.
df['acute_unit'] = 1
mask = df.index.isin(teams_with_long_str)
df.loc[mask, 'acute_unit'] = 0

In [12]:
# Remove the long string.

# In this case every instance of the long string is immediately followed by ':_x000D_\n ',
# so add that to the text to be replaced.
long_str += ':_x000D_\n '

for col in cols_with_long_str:
    df[col] = df[col].str.replace(long_str, '')

### Convert 'Not Applicable' strings to pd.NA

In [13]:
df = df.replace('Not Applicable', np.NaN)

In [14]:
for col in df.columns:
    try:
        df[col] = df[col].astype(float)
    except ValueError:
        # Can't convert column. Perhaps contains strings.
        pass
    

In [15]:
df

Unnamed: 0_level_0,ki_total,ki1,ki1_nurses6_wte,ki1_nurses7_wte,ki2,ki2_psych_wte,ki3,ki3_strokenurse_outofhours,ki3_strokenurse_bed1_weekdays10pm,ki3_strokenurse_bed1_saturdays,...,ki8_patientcarersurvey_frequency,ki9,ki9_tia_brainimaging_mostused,ki9_tia_outpatients_seen,ki9_tia_outpatient_timescale,ki10,ki10_management_executiveonboard,ki10_management_nonexecutiveonboard,ki10_management_chairmanofclinicalgovernance,acute_unit
Hospital names,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Queens Hospital Romford HASU,6.0,Yes,2.73,1.14,No,0.48,Yes,Yes,5.0,5.0,...,Continuous (every patient),No,Computed Tomography,Yes,The same day (7 days a week),Yes,Yes,Yes,Yes,1.0
Newham General Hospital,6.0,Yes,1.54,1.54,No,0.00,Yes,Yes,,,...,Continuous (every patient),Yes,Magnetic Resonance Imaging,Yes,The same day (5 days a week),No,No,No,No,0.0
Royal London Hospital HASU,5.0,Yes,2.85,0.38,No,0.23,Yes,Yes,4.0,4.0,...,Less than once a year,Yes,Magnetic Resonance Imaging,Yes,The same day (5 days a week),No,No,No,No,1.0
Whipps Cross University Hospital,3.0,Yes,2.63,0.53,No,0.00,No,No,,,...,Never,No,Computed Tomography,Yes,The same day (5 days a week),No,No,No,No,1.0
Charing Cross Hospital HASU,5.0,Yes,3.67,0.44,No,0.27,Yes,Yes,6.0,7.0,...,3-4 times a year,No,Computed Tomography,Yes,The same day (5 days a week),No,No,No,No,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Craigavon Area Hospital,2.0,Yes,2.63,0.26,No,0.00,No,No,,,...,Less than once a year,No,Magnetic Resonance Imaging,Yes,Within a week,No,No,No,No,1.0
Daisy Hill Hospital,2.0,No,1.50,0.33,No,0.00,No,No,,,...,Less than once a year,No,Magnetic Resonance Imaging,,,No,No,No,No,1.0
Altnagelvin Hospital,3.0,No,1.20,0.40,No,0.00,No,No,1.0,1.0,...,1-2 times a year,Yes,Magnetic Resonance Imaging,Yes,The same day (5 days a week),No,No,No,No,1.0
South West Acute Hospital,3.0,No,1.56,0.56,No,0.00,No,No,1.0,1.0,...,1-2 times a year,No,Computed Tomography,Yes,Within a week,No,No,No,No,1.0


### TIA outpatients.

The TIA columns for 'Outpatients' and 'Outpatients Timescale' are missing data whenever the site does not have a neurovascular clinic. This clinic info is hidden in the "all sites results" sheet rather than the "key indicators summary" sheet that the rest of this data here came from.

Add a column to distinguish units that see TIAs from those that don't.

In [16]:
mask = ~df['ki9_tia_outpatients_seen'].isin(['Yes', 'No'])

df.loc[mask, ['ki9_tia_outpatients_seen', 'ki9_tia_outpatient_timescale']]

Unnamed: 0_level_0,ki9_tia_outpatients_seen,ki9_tia_outpatient_timescale
Hospital names,Unnamed: 1_level_1,Unnamed: 2_level_1
Southend Hospital,,
West Cumberland Hospital,,
Northumbria Specialist Emergency Care Hospital HASU,,
Royal Surrey County Hospital,,
Weston General Hospital,,
Horton General Hospital,,
Daisy Hill Hospital,,


In [17]:
df['ki9_tia_clinic'] = 1

# Mask for any missing data (i.e. neither Yes nor No):
mask = ~df['ki9_tia_outpatients_seen'].isin(['Yes', 'No'])

df.loc[mask, 'ki9_tia_clinic'] = 0

Set missing values to 'No':

In [18]:
df.loc[mask, 'ki9_tia_outpatients_seen'] = 'No'

### Check the contents of the columns

In [19]:
dict_of_column_contents = {}

for col in df.columns:
    column_contents = df[col].unique().tolist()
    # # Sort the contents and convert to tuple:
    try:
        column_contents = sorted(column_contents)
    except TypeError:
        # Can't sort lists containing NA.
        pass
    dict_of_column_contents[col] = column_contents

In [20]:
for col, column_contents in dict_of_column_contents.items():
    if not all(isinstance(v, (int, float)) for v in column_contents):
        print(col, column_contents, '\n')

ki1 ['No', 'Yes'] 

ki2 ['No', 'Yes'] 

ki3 ['No', 'Yes'] 

ki3_strokenurse_outofhours ['No', 'Yes'] 

ki4 ['No', 'Yes'] 

ki5 ['No', 'Yes'] 

ki6 ['No', 'Yes'] 

ki6_prealert_ivtcandidates ['No', 'Sometimes', 'Yes'] 

ki6_prealert_fastpositive ['No', 'Sometimes', 'Yes'] 

ki6_prealert_othersuspectedstroke ['No', 'Sometimes', 'Yes'] 

ki6_prealert_strokenurse ['No', 'Yes'] 

ki6_prealert_juniordoctor ['No', 'Yes'] 

ki6_prealert_consultant ['No', 'Yes'] 

ki7 ['No', 'Yes'] 

ki7_stroke_esd_team ['No', 'Yes'] 

ki8 ['No', 'Yes'] 

ki8_patientcarersurvey_frequency ['1-2 times a year', '3-4 times a year', 'Continuous (every patient)', 'Less than once a year', 'More than 4 a year', 'Never'] 

ki9 ['No', 'Yes'] 

ki9_tia_brainimaging_mostused ['Computed Tomography', 'Magnetic Resonance Imaging', 'Rarely image TIAs'] 

ki9_tia_outpatients_seen ['No', 'Yes'] 

ki9_tia_outpatient_timescale ['The same day (7 days a week)', 'The same day (5 days a week)', 'The next weekday', nan, 'The next day',

### Convert yes/no columns to 1/0

In [21]:
# Find columns containing only 'Yes' and 'No':
cols_bool = []
for col in df.columns:
    mask = df[col].isin(['Yes', 'No'])
    if mask.all():
        cols_bool.append(col)

cols_bool

['ki1',
 'ki2',
 'ki3',
 'ki3_strokenurse_outofhours',
 'ki4',
 'ki5',
 'ki6',
 'ki6_prealert_strokenurse',
 'ki6_prealert_juniordoctor',
 'ki6_prealert_consultant',
 'ki7',
 'ki7_stroke_esd_team',
 'ki8',
 'ki9',
 'ki9_tia_outpatients_seen',
 'ki10',
 'ki10_management_executiveonboard',
 'ki10_management_nonexecutiveonboard',
 'ki10_management_chairmanofclinicalgovernance']

In [22]:
df[cols_bool] = df[cols_bool].replace(['Yes', 'No'], [1, 0])

### Convert yes/no/sometimes columns to two yes/sometimes columns

In [23]:
# Find columns containing only 'Yes', 'No', and 'Sometimes':
cols_yns = []
for col in df.columns:
    mask = df[col].isin(['Yes', 'No', 'Sometimes'])
    if mask.all():
        cols_yns.append(col)

cols_yns

['ki6_prealert_ivtcandidates',
 'ki6_prealert_fastpositive',
 'ki6_prealert_othersuspectedstroke']

In [24]:
for col in cols_yns:
    # Rename these values for the sake of column names:
    df[col] = df[col].replace(
        ['Yes', 'No', 'Sometimes'],
        ['yes', 'no', 'sometimes']
    )
    # Split into separate columns:
    df_here = pd.get_dummies(df[col], prefix=col, prefix_sep='_')
    # Drop the 'no' column:
    df_here = df_here.drop(f'{col}_no', axis='columns')
    # Merge these new columns into the existing dataframe:
    df = pd.concat((df, df_here), axis='columns')
    
    # Remove the original column
    df = df.drop(col, axis='columns')

### Convert categorical columns to one-hot-encoded

Brain imaging type:

In [25]:
list(set(df['ki9_tia_brainimaging_mostused'].values))

['Magnetic Resonance Imaging', 'Rarely image TIAs', 'Computed Tomography']

In [26]:
# Rename these values for the sake of column names:
df['ki9_tia_brainimaging_mostused'] = df['ki9_tia_brainimaging_mostused'].replace(
    ['Computed Tomography', 'Rarely image TIAs', 'Magnetic Resonance Imaging'],
    ['ct', 'rare', 'mri']
)

In [27]:
df_imaging = pd.get_dummies(
    df['ki9_tia_brainimaging_mostused'],
    prefix='ki9_tia_brainimaging_mostused',
    prefix_sep='_'
)

df_imaging

Unnamed: 0_level_0,ki9_tia_brainimaging_mostused_ct,ki9_tia_brainimaging_mostused_mri,ki9_tia_brainimaging_mostused_rare
Hospital names,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Queens Hospital Romford HASU,1,0,0
Newham General Hospital,0,1,0
Royal London Hospital HASU,0,1,0
Whipps Cross University Hospital,1,0,0
Charing Cross Hospital HASU,1,0,0
...,...,...,...
Craigavon Area Hospital,0,1,0
Daisy Hill Hospital,0,1,0
Altnagelvin Hospital,0,1,0
South West Acute Hospital,1,0,0


In [28]:
# Merge these new columns into the existing dataframe:
df = pd.concat((df, df_imaging), axis='columns')

# Remove the original column
df = df.drop('ki9_tia_brainimaging_mostused', axis='columns')

Survey frequency:

In [29]:
list(set(df['ki8_patientcarersurvey_frequency'].values))

['1-2 times a year',
 '3-4 times a year',
 'More than 4 a year',
 'Continuous (every patient)',
 'Less than once a year',
 'Never']

In [30]:
cols_now = [
    'Never',
    'More than 4 a year',
    'Less than once a year',
    'Continuous (every patient)',
    '3-4 times a year',
    '1-2 times a year'
    ]
cols_soon = [
    'never',
    'more4peryear',
    'less1peryear',
    'everypatient',
    '3to4peryear',
    '1to2peryear'
]

# Rename these values for the sake of column names:
df['ki8_patientcarersurvey_frequency'] = df['ki8_patientcarersurvey_frequency'].replace(
    cols_now, cols_soon
)

In [31]:
df_survey = pd.get_dummies(
    df['ki8_patientcarersurvey_frequency'],
    prefix='ki8_patientcarersurvey_frequency',
    prefix_sep='_'
)

df_survey

Unnamed: 0_level_0,ki8_patientcarersurvey_frequency_1to2peryear,ki8_patientcarersurvey_frequency_3to4peryear,ki8_patientcarersurvey_frequency_everypatient,ki8_patientcarersurvey_frequency_less1peryear,ki8_patientcarersurvey_frequency_more4peryear,ki8_patientcarersurvey_frequency_never
Hospital names,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Queens Hospital Romford HASU,0,0,1,0,0,0
Newham General Hospital,0,0,1,0,0,0
Royal London Hospital HASU,0,0,0,1,0,0
Whipps Cross University Hospital,0,0,0,0,0,1
Charing Cross Hospital HASU,0,1,0,0,0,0
...,...,...,...,...,...,...
Craigavon Area Hospital,0,0,0,1,0,0
Daisy Hill Hospital,0,0,0,1,0,0
Altnagelvin Hospital,1,0,0,0,0,0
South West Acute Hospital,1,0,0,0,0,0


In [32]:
# Merge these new columns into the existing dataframe:
df = pd.concat((df, df_survey), axis='columns')

# Remove the original column
df = df.drop('ki8_patientcarersurvey_frequency', axis='columns')

TIA timescales:

In [33]:
list(set(df['ki9_tia_outpatient_timescale'].values))

[nan,
 'Within a month',
 'The next day',
 'The same day (5 days a week)',
 'Within a week',
 'The next weekday',
 'The same day (7 days a week)']

In [34]:
# Replace nan with 'missing' string:
df['ki9_tia_outpatient_timescale'] = df['ki9_tia_outpatient_timescale'].fillna('missing')

In [35]:
cols_now = [
    'The next weekday',
    'The same day (7 days a week)',
    'The next day',
    'Within a week',
    'The same day (5 days a week)',
    'missing',
    'Within a month'
]
cols_soon = [
    'nextweekday',
    'sameday',
    'nextday',
    'withinweek',
    'samedayexcludingweekends',
    'missing',
    'withinmonth'
]

# Rename these values for the sake of column names:
df['ki9_tia_outpatient_timescale'] = df['ki9_tia_outpatient_timescale'].replace(
    cols_now, cols_soon
)

In [36]:
df_tia = pd.get_dummies(
    df['ki9_tia_outpatient_timescale'],
    prefix='ki9_tia_outpatient_timescale',
    prefix_sep='_'
)

# Drop the 'missing' column:
df_tia = df_tia.drop('ki9_tia_outpatient_timescale_missing', axis='columns')

df_tia

Unnamed: 0_level_0,ki9_tia_outpatient_timescale_nextday,ki9_tia_outpatient_timescale_nextweekday,ki9_tia_outpatient_timescale_sameday,ki9_tia_outpatient_timescale_samedayexcludingweekends,ki9_tia_outpatient_timescale_withinmonth,ki9_tia_outpatient_timescale_withinweek
Hospital names,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Queens Hospital Romford HASU,0,0,1,0,0,0
Newham General Hospital,0,0,0,1,0,0
Royal London Hospital HASU,0,0,0,1,0,0
Whipps Cross University Hospital,0,0,0,1,0,0
Charing Cross Hospital HASU,0,0,0,1,0,0
...,...,...,...,...,...,...
Craigavon Area Hospital,0,0,0,0,0,1
Daisy Hill Hospital,0,0,0,0,0,0
Altnagelvin Hospital,0,0,0,1,0,0
South West Acute Hospital,0,0,0,0,0,1


In [37]:
# Merge these new columns into the existing dataframe:
df = pd.concat((df, df_tia), axis='columns')

# Remove the original column
df = df.drop('ki9_tia_outpatient_timescale', axis='columns')

## Sort columns by key indicator

In [38]:
cols = df.columns
ki_cols = [col.split('_')[0] for col in cols]
ki_cols = [int(col.split('ki')[-1]) if col.split('ki')[-1].isnumeric() else 0 for col in ki_cols]

In [39]:
cols = np.array(cols)[np.argsort(ki_cols)]

In [40]:
df = df[cols]

Final dataframe contents:

In [41]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 169 entries, Queens Hospital Romford HASU to Noble's Hospital
Data columns (total 64 columns):
 #   Column                                                 Non-Null Count  Dtype  
---  ------                                                 --------------  -----  
 0   ki_total                                               169 non-null    float64
 1   acute_unit                                             169 non-null    float64
 2   ki1_nurses6_wte                                        169 non-null    float64
 3   ki1                                                    169 non-null    int64  
 4   ki1_nurses7_wte                                        169 non-null    float64
 5   ki2                                                    169 non-null    int64  
 6   ki2_psych_wte                                          169 non-null    float64
 7   ki3                                                    169 non-null    int64  
 8   ki3_strokenurse

Save to file:

In [42]:
df.to_csv(os.path.join(dir_files, 'processed_2019_portfolio_key_indicators_summary_numeric.csv'))