# Split big geojson into regions

## Setup

In [2]:
import pandas as pd
import numpy as np

# For importing geojson:
import json

# For saving new geojson:
from geojson import FeatureCollection

# For drawing maps:
import folium
from folium import plugins

## Import data

In [3]:
df_regions = pd.read_csv('./LSOA_regions.csv')

df_regions.head()

Unnamed: 0,LSOA11CD,LSOA11NM,LSOA11LONG,LSOA11LAT,CCG19CD,CCG19NM,STP19CD,STP19NM,LHB20CD,LHB20NM,LHB20NMW,LAD17CD,LAD17NM,SCN17CD,SCN17NM,RGN11CD,RGN11NM
0,E01031349,Adur 001A,-0.22737,50.83651,E38000213,NHS Coastal West Sussex CCG,E54000033,Sussex and East Surrey,,,,E07000223,Adur,E55000010,South East Coast,E12000008,South East
1,E01031350,Adur 001B,-0.22842,50.84244,E38000213,NHS Coastal West Sussex CCG,E54000033,Sussex and East Surrey,,,,E07000223,Adur,E55000010,South East Coast,E12000008,South East
2,E01031351,Adur 001C,-0.253,50.85845,E38000213,NHS Coastal West Sussex CCG,E54000033,Sussex and East Surrey,,,,E07000223,Adur,E55000010,South East Coast,E12000008,South East
3,E01031352,Adur 001D,-0.23812,50.8429,E38000213,NHS Coastal West Sussex CCG,E54000033,Sussex and East Surrey,,,,E07000223,Adur,E55000010,South East Coast,E12000008,South East
4,E01031370,Adur 001E,-0.24649,50.83958,E38000213,NHS Coastal West Sussex CCG,E54000033,Sussex and East Surrey,,,,E07000223,Adur,E55000010,South East Coast,E12000008,South East


Find all of the unique STP codes and LHB codes:

In [3]:
STP_code_set = set(df_regions['STP19CD'])
# Remove NaN values from Welsh LSOAs:
STP_code_set = [STP_code for STP_code in STP_code_set if isinstance(STP_code, str)]
# Sort alphabetically:
STP_code_set = sorted(list(STP_code_set))

England_code_set = STP_code_set
England_code_col = 'STP19CD'
England_name_col = 'STP19NM'
England_code_set

['E54000005',
 'E54000006',
 'E54000007',
 'E54000008',
 'E54000009',
 'E54000010',
 'E54000011',
 'E54000012',
 'E54000013',
 'E54000014',
 'E54000015',
 'E54000016',
 'E54000017',
 'E54000018',
 'E54000019',
 'E54000020',
 'E54000021',
 'E54000022',
 'E54000023',
 'E54000024',
 'E54000025',
 'E54000026',
 'E54000027',
 'E54000028',
 'E54000029',
 'E54000030',
 'E54000031',
 'E54000032',
 'E54000033',
 'E54000034',
 'E54000035',
 'E54000036',
 'E54000037',
 'E54000038',
 'E54000039',
 'E54000040',
 'E54000041',
 'E54000042',
 'E54000043',
 'E54000044',
 'E54000048',
 'E54000049']

In [4]:
LHB_code_set = set(df_regions['LHB20CD'])
# Remove NaN values from English LSOAs:
LHB_code_set = [LHB_code for LHB_code in LHB_code_set if isinstance(LHB_code, str)]
# Sort alphabetically:
LHB_code_set = sorted(list(LHB_code_set))

LHB_code_set

['W11000023',
 'W11000024',
 'W11000025',
 'W11000028',
 'W11000029',
 'W11000030',
 'W11000031']

For each of these STP and LHB codes, find the LSOAs that belong to it:

In [5]:
# Record these for info or naming of the .geojson files:
code_dict = {}
group_dict = {}

# Record these just out of interest:
region_name_list = []
group_name_list = []
n_lsoas_list = []

# English LSOAs:
for code in England_code_set:
    df_regions_here = df_regions[df_regions[England_code_col] == code]
    LSOA_codes_here = df_regions_here['LSOA11CD'].values
    
    code_dict[code] = LSOA_codes_here
    
    group_name = df_regions_here[England_name_col].iloc[0]
    group_dict[code] = group_name
    
    region_name = df_regions_here['RGN11NM'].iloc[0]
    
    # Update lists for the dataframe:
    region_name_list.append(region_name)
    group_name_list.append(group_name)
    n_lsoas_list.append(len(LSOA_codes_here))
    
# Welsh LSOAs:
for LHB in LHB_code_set:
    df_regions_lhb = df_regions[df_regions['LHB20CD'] == LHB]
    LSOA_codes_for_this_lhb = df_regions_lhb['LSOA11CD'].values
    
    code_dict[LHB] = LSOA_codes_for_this_lhb
    
    group_name = df_regions_lhb['LHB20NM'].iloc[0]
    group_dict[LHB] = group_name
    
    region_name = df_regions_lhb['RGN11NM'].iloc[0]
    
    # Update lists for the dataframe:
    region_name_list.append(region_name)
    group_name_list.append(group_name)
    n_lsoas_list.append(len(LSOA_codes_for_this_lhb))

In [6]:
pd.DataFrame(
    np.stack([region_name_list, group_name_list, n_lsoas_list], axis=-1),
    columns=['Region', 'Group', 'Number of LSOAs']
)

Unnamed: 0,Region,Group,Number of LSOAs
0,Yorkshire and The Humber,West Yorkshire and Harrogate (Health and Care ...,1521
1,Yorkshire and The Humber,"Humber, Coast and Vale",845
2,North West,Greater Manchester Health and Social Care Part...,1694
3,North West,Cheshire and Merseyside,1562
4,Yorkshire and The Humber,South Yorkshire and Bassetlaw,923
5,West Midlands,Staffordshire and Stoke on Trent,687
6,West Midlands,Shropshire and Telford and Wrekin,301
7,East Midlands,Joined Up Care Derbyshire,621
8,East Midlands,Lincolnshire,420
9,East Midlands,Nottingham and Nottinghamshire Health and Care,609


## Examine the big .geojson

What order are the LSOA shapes stored in? It seems to be sorted by LSOA11CD.

In [4]:
with open('./LSOA_(Dec_2011)_Boundaries_Super_Generalised_Clipped_(BSC)_EW_V3.geojson') as f:
    geojson_ew = json.load(f)

In [5]:
geojson_ew.keys()

dict_keys(['type', 'name', 'crs', 'features'])

In [6]:
big_geojson_order = []

for i in range(len(geojson_ew['features'])):
    big_geojson_order.append(geojson_ew['features'][i]['properties']['LSOA11CD'])

In [10]:
geojson_ew['features'][0]

{'type': 'Feature',
 'properties': {'OBJECTID': 1,
  'LSOA11CD': 'E01000001',
  'LSOA11NM': 'City of London 001A',
  'LSOA11NMW': 'City of London 001A',
  'BNG_E': 532129,
  'BNG_N': 181625,
  'LONG': -0.09706,
  'LAT': 51.5181,
  'Shape__Area': 157794.481079102,
  'Shape__Length': 1685.39177789522,
  'GlobalID': 'b12173a3-5423-4672-a5eb-f152d2345f96'},
 'geometry': {'type': 'Polygon',
  'coordinates': [[[-0.094744468765127, 51.5205961026855],
    [-0.095455174414778, 51.5154416842748],
    [-0.099722888517677, 51.5167693121822],
    [-0.098498304750799, 51.5205398973512],
    [-0.097265555652221, 51.5215848107683],
    [-0.094744468765127, 51.5205961026855]]]}}

## Match big .geojson info with regions

In [11]:
for group in code_dict.keys():
    lsoa_here = code_dict[group]
    features_here = []
    for lsoa in lsoa_here:
        # Find where it is in the big geojson:
        try:
            ind = big_geojson_order.index(lsoa)
            # inds_for_geojson.append(ind)
            features_here.append(geojson_ew['features'][ind])
        except ValueError:
            print(f'Problem with {group}, LSOA {lsoa}')
    
    features_here = FeatureCollection(features_here)
            
#     geojson_here = {}
#     geojson_here['type'] = geojson_ew['type']
#     geojson_here['crs'] = geojson_ew['crs']
#     geojson_here['features'] = features_here
    
    # Now save the file as something new:
    group_name = group_dict[group]
    group_name = group_name.replace(' ', '~')
    save_name = 'LSOA_' + group_name + '.geojson'
    
    with open('./lhb_stp_geojson/'+save_name, 'w', encoding='utf-8') as f:
        json.dump(features_here, f, ensure_ascii=False)

### Import one of the newly-made .geojson

In [13]:
# with open('./lhb_scn_geojson/LSOA_South~West.geojson') as f:
with open('./lhb_stp_geojson/LSOA_Devon.geojson') as f:
    geojson_devon = json.load(f)

In [14]:
# Create a map
devon_map = folium.Map(location=[51, -3.5],
                        zoom_start=8,
                        tiles='cartodbpositron')
# Add choropleth
folium.Choropleth(geo_data=geojson_devon,
                  highlight=True).add_to(devon_map)

# Generate map
devon_map

## Separate London from the rest of England and Wales

In [7]:
region_name_set = set(df_regions['RGN11NM'])

regions_london = []
regions_not_london = []
for i, region_name in enumerate(sorted(list(region_name_set))):
    if 'London' in region_name:
        regions_london.append(region_name)
    else:
        regions_not_london.append(region_name)

In [8]:
regions_london

['London']

In [9]:
regions_not_london

['East Midlands',
 'East of England',
 'North East',
 'North West',
 'South East',
 'South West',
 'Wales',
 'West Midlands',
 'Yorkshire and The Humber']

In [11]:
# London LSOAs:
group_name = 'London'

features_here = []
for region_name in regions_london:
    df_regions_here = df_regions[df_regions['RGN11NM'] == region_name]
    LSOA_codes_here = df_regions_here['LSOA11CD'].values
    
    for lsoa in LSOA_codes_here:
        # Find where it is in the big geojson:
        try:
            ind = big_geojson_order.index(lsoa)
            # inds_for_geojson.append(ind)
            features_here.append(geojson_ew['features'][ind])
        except ValueError:
            print(f'Problem with {group}, LSOA {lsoa}')
    
features_here = FeatureCollection(features_here)

# Now save the file as something new:
# group_name = group_dict[group]
group_name = group_name.replace(' ', '~')
save_name = 'LSOA_' + group_name + '.geojson'

with open('./london_geojson/'+save_name, 'w', encoding='utf-8') as f:
    json.dump(features_here, f, ensure_ascii=False)

In [12]:
# London LSOAs:
group_name = 'Not London'

features_here = []

df_regions_here = df_regions[df_regions['RGN11NM'] != 'London']
LSOA_codes_here = df_regions_here['LSOA11CD'].values

for lsoa in LSOA_codes_here:
    # Find where it is in the big geojson:
    try:
        ind = big_geojson_order.index(lsoa)
        # inds_for_geojson.append(ind)
        features_here.append(geojson_ew['features'][ind])
    except ValueError:
        print(f'Problem with {group}, LSOA {lsoa}')
    
features_here = FeatureCollection(features_here)

# Now save the file as something new:
# group_name = group_dict[group]
group_name = group_name.replace(' ', '~')
save_name = 'LSOA_' + group_name + '.geojson'

with open('./london_geojson/'+save_name, 'w', encoding='utf-8') as f:
    json.dump(features_here, f, ensure_ascii=False)