# Range of values across region types

This notebook finds the minimum and maximum value of each category across all of the different region types.

These min and max values will be used to estimate the colour limits needed to usefully display the data on a map.

In [1]:
import os
import pandas as pd

In [2]:
region_types = [
    'LSOA',
    'closest_ivt_unit',
    'closest_mt_unit',
    'closest_mt_transfer',
    # 'rural_urban_2011',
    'ambulance_service',
    # 'LAD22NM',
    # 'icb_code',
    'isdn',
    # 'lhb',
    'icb_lhb'
]

In [20]:
df_lsoa = pd.read_csv(f'data/collated_data_by_region/collated_data_regional_LSOA.csv', index_col=0)
df_ivt = pd.read_csv(f'data/collated_data_by_region/collated_data_regional_closest_ivt_unit.csv', index_col=0)
df_mt = pd.read_csv(f'data/collated_data_by_region/collated_data_regional_closest_mt_unit.csv', index_col=0)
df_transfer = pd.read_csv(f'data/collated_data_by_region/collated_data_regional_closest_mt_transfer.csv', index_col=0)
df_ambo = pd.read_csv(f'data/collated_data_by_region/collated_data_regional_ambulance_service.csv', index_col=0)
df_isdn = pd.read_csv(f'data/collated_data_by_region/collated_data_regional_isdn.csv', index_col=0)
df_icb_lhb = pd.read_csv(f'data/collated_data_by_region/collated_data_regional_icb_lhb.csv', index_col=0)

df_dict = {
    'LSOA': df_lsoa,
    'closest_ivt_unit': df_ivt,
    'closest_mt_unit': df_mt,
    'closest_mt_transfer': df_transfer,
    'ambulance_service': df_ambo,
    'isdn': df_isdn,
    'icb_lhb': df_icb_lhb
}

In [21]:
cols = df_lsoa.columns

cols

Index(['polygon_area_km2', 'population_all_x', 'population_density',
       'income_domain_weighted_mean', 'imd_weighted_mean', 'weighted_ivt_time',
       'mt_time_weighted_mean', 'ivt_time_weighted_mean',
       'mt_transfer_time_weighted_mean',
       'ethnic_group_other_than_white_british',
       'ethnic_group_all_categories_ethnic_group',
       'ethnic_minority_proportion', 'bad_or_very_bad_health',
       'all_categories_general_health', 'bad_health_proportion',
       'long_term_health_count',
       'all_categories_long_term_health_problem_or_disability',
       'long_term_health_proportion', 'age_65_plus_count', 'population_all_y',
       'age_65_plus_proportion', 'ural_False', 'ural_True', 'proportion_rural',
       'ver_65_within_30_False', 'ver_65_within_30_True',
       'proportion_over_65_within_30', 'closest_is_m_False',
       'closest_is_m_True', 'proportion_closest_is_mt'],
      dtype='object')

In [22]:
# List of lists of columns:
all_cols = [df.columns.values.tolist() for df in list(df_dict.values())]
# Join into a single list:
all_cols = sum(all_cols, [])
# Remove duplicates:
all_cols = sorted(list(set(all_cols)))

In [23]:
all_cols

['admissions_2122',
 'age_65_plus_count',
 'age_65_plus_proportion',
 'all_categories_general_health',
 'all_categories_long_term_health_problem_or_disability',
 'bad_health_proportion',
 'bad_or_very_bad_health',
 'closest_is_m_False',
 'closest_is_m_True',
 'ethnic_group_all_categories_ethnic_group',
 'ethnic_group_other_than_white_british',
 'ethnic_minority_proportion',
 'imd_weighted_mean',
 'income_domain_weighted_mean',
 'ivt_rate',
 'ivt_time_weighted_mean',
 'long_term_health_count',
 'long_term_health_proportion',
 'mt_time_weighted_mean',
 'mt_transfer_time_weighted_mean',
 'polygon_area_km2',
 'population_all_x',
 'population_all_y',
 'population_density',
 'proportion_closest_is_mt',
 'proportion_over_65_within_30',
 'proportion_rural',
 'ural_False',
 'ural_True',
 'ver_65_within_30_False',
 'ver_65_within_30_True',
 'weighted_ivt_time']

In [26]:
# Check that all of these are numeric.
# Pull out the numeric column names...
cols_numeric = df_ivt.select_dtypes(include=['int64', 'float64'])
# ... and make sure the difference in sets of names is empty:
set(all_cols) - set(cols_numeric)

In [30]:
results_cols = []
for key in list(df_dict.keys()):
    results_cols += [f'{key}_min', f'{key}_max']

In [33]:
df_results = pd.DataFrame(index=all_cols, columns=results_cols)

for col in all_cols:
    for df_name, df in df_dict.items():
        try:
            min_here = df[col].min()
            max_here = df[col].max()
            df_results.at[col, f'{df_name}_min'] = min_here
            df_results.at[col, f'{df_name}_max'] = max_here
        except KeyError:
            pass

In [34]:
df_results

Unnamed: 0,LSOA_min,LSOA_max,closest_ivt_unit_min,closest_ivt_unit_max,closest_mt_unit_min,closest_mt_unit_max,closest_mt_transfer_min,closest_mt_transfer_max,ambulance_service_min,ambulance_service_max,isdn_min,isdn_max,icb_lhb_min,icb_lhb_max
admissions_2122,,,126.0,1848.0,,,,,,,,,,
age_65_plus_count,8.0,1215.0,22687.0,195054.0,107901.0,910958.0,121918.0,905940.0,40858.0,1392786.0,256086.0,1098453.0,36801.0,614170.0
age_65_plus_proportion,0.0013,0.6239,0.0824,0.2871,0.1023,0.2467,0.0824,0.2461,0.122,0.2871,0.122,0.2461,0.1031,0.2766
all_categories_general_health,983.0,8300.0,89939.0,1288965.0,936123.0,4493628.0,896382.0,4560888.0,138265.0,8173941.0,1343601.0,8173941.0,132976.0,2914801.0
all_categories_long_term_health_problem_or_disability,613.0,3855.0,85609.0,1274799.0,925753.0,4432326.0,882914.0,4500601.0,133713.0,8073700.0,1321137.0,8073700.0,131047.0,2864278.0
bad_health_proportion,0.0029,0.2351,0.0321,0.1032,0.0364,0.0806,0.0367,0.0813,0.0393,0.0763,0.0343,0.0729,0.0343,0.0959
bad_or_very_bad_health,4.0,406.0,5258.0,73691.0,44645.0,228235.0,54915.0,249691.0,8953.0,480812.0,55320.0,405473.0,8149.0,212456.0
closest_is_m_False,0.0,8712.0,0.0,1383341.0,187300.0,3643885.0,0.0,3725660.0,142296.0,5433023.0,655973.0,3869052.0,0.0,2290213.0
closest_is_m_True,0.0,17274.0,0.0,1479138.0,468512.0,1479138.0,468512.0,1479138.0,0.0,6141216.0,0.0,6141216.0,0.0,2036470.0
ethnic_group_all_categories_ethnic_group,983.0,8300.0,89939.0,1288965.0,936123.0,4493628.0,896382.0,4560888.0,138265.0,8173941.0,1343601.0,8173941.0,132976.0,2914801.0
