# Split big geojson into regions

## Setup

In [9]:
import pandas as pd
import numpy as np

# For importing geojson:
import json

# For saving new geojson:
from geojson import FeatureCollection

# For drawing maps:
import folium
from folium import plugins

## Import data

In [10]:
df_regions = pd.read_csv('./LSOA_regions.csv')

df_regions.head().T

Unnamed: 0,0,1,2,3,4
LSOA11CD,E01031349,E01031350,E01031351,E01031352,E01031370
LSOA11NM,Adur 001A,Adur 001B,Adur 001C,Adur 001D,Adur 001E
LSOA11LONG,-0.22737,-0.22842,-0.253,-0.23812,-0.24649
LSOA11LAT,50.83651,50.84244,50.85845,50.8429,50.83958
CCG19CD,E38000213,E38000213,E38000213,E38000213,E38000213
CCG19NM,NHS Coastal West Sussex CCG,NHS Coastal West Sussex CCG,NHS Coastal West Sussex CCG,NHS Coastal West Sussex CCG,NHS Coastal West Sussex CCG
STP19CD,E54000033,E54000033,E54000033,E54000033,E54000033
STP19NM,Sussex and East Surrey,Sussex and East Surrey,Sussex and East Surrey,Sussex and East Surrey,Sussex and East Surrey
LHB20CD,,,,,
LHB20NM,,,,,


Find all of the unique region names:

In [11]:
region_name_set = set(df_regions['RGN11NM'])
# Remove NaN values from English LSOAs:
region_name_set = [region_name for region_name in region_name_set if isinstance(region_name, str)]
# Sort alphabetically:
region_name_list = sorted(list(region_name_set))

region_name_list

['East Midlands',
 'East of England',
 'London',
 'North East',
 'North West',
 'South East',
 'South West',
 'Wales',
 'West Midlands',
 'Yorkshire and The Humber']

For each of these STP and LHB codes, find the LSOAs that belong to it:

In [12]:
# Record these for info or naming of the .geojson files:
code_dict = {}

# Record these just out of interest:
n_lsoas_list = []

for region_name in region_name_list:
    df_regions_here = df_regions[df_regions['RGN11NM'] == region_name]
    LSOA_codes_here = df_regions_here['LSOA11CD'].values
    code_dict[region_name] = LSOA_codes_here
    
    # Update lists for the dataframe:
    n_lsoas_list.append(len(LSOA_codes_here))

In [13]:
pd.DataFrame(
    np.stack([region_name_list, n_lsoas_list], axis=-1),
    columns=['Region', 'Number of LSOAs']
)

Unnamed: 0,Region,Number of LSOAs
0,East Midlands,2774
1,East of England,3614
2,London,4835
3,North East,1657
4,North West,4497
5,South East,5382
6,South West,3280
7,Wales,1909
8,West Midlands,3487
9,Yorkshire and The Humber,3317


## Examine the big .geojson

What order are the LSOA shapes stored in? It seems to be sorted by LSOA11CD.

In [14]:
with open('./LSOA_(Dec_2011)_Boundaries_Super_Generalised_Clipped_(BSC)_EW_V3.geojson') as f:
    geojson_ew = json.load(f)

In [15]:
geojson_ew.keys()

dict_keys(['type', 'name', 'crs', 'features'])

In [16]:
big_geojson_order = []

for i in range(len(geojson_ew['features'])):
    big_geojson_order.append(geojson_ew['features'][i]['properties']['LSOA11CD'])

In [17]:
geojson_ew['features'][0]

{'type': 'Feature',
 'properties': {'OBJECTID': 1,
  'LSOA11CD': 'E01000001',
  'LSOA11NM': 'City of London 001A',
  'LSOA11NMW': 'City of London 001A',
  'BNG_E': 532129,
  'BNG_N': 181625,
  'LONG': -0.09706,
  'LAT': 51.5181,
  'Shape__Area': 157794.481079102,
  'Shape__Length': 1685.39177789522,
  'GlobalID': 'b12173a3-5423-4672-a5eb-f152d2345f96'},
 'geometry': {'type': 'Polygon',
  'coordinates': [[[-0.094744468765127, 51.5205961026855],
    [-0.095455174414778, 51.5154416842748],
    [-0.099722888517677, 51.5167693121822],
    [-0.098498304750799, 51.5205398973512],
    [-0.097265555652221, 51.5215848107683],
    [-0.094744468765127, 51.5205961026855]]]}}

## Match big .geojson info with regions

In [18]:
for group in code_dict.keys():
    lsoa_here = code_dict[group]
    features_here = []
    for lsoa in lsoa_here:
        # Find where it is in the big geojson:
        try:
            ind = big_geojson_order.index(lsoa)
            # inds_for_geojson.append(ind)
            features_here.append(geojson_ew['features'][ind])
        except ValueError:
            print(f'Problem with {group}, LSOA {lsoa}')
    
    features_here = FeatureCollection(features_here)
            
#     geojson_here = {}
#     geojson_here['type'] = geojson_ew['type']
#     geojson_here['crs'] = geojson_ew['crs']
#     geojson_here['features'] = features_here
    
    # Now save the file as something new:
    group_name = group #group_dict[group]
    group_name = group_name.replace(' ', '~')
    save_name = 'LSOA_' + group_name + '.geojson'
    
    with open('./region_geojson/'+save_name, 'w', encoding='utf-8') as f:
        json.dump(features_here, f, ensure_ascii=False)

### Import one of the newly-made .geojson

In [19]:
# with open('./lhb_scn_geojson/LSOA_South~West.geojson') as f:
with open('./region_geojson/LSOA_South~West.geojson') as f:
    geojson_devon = json.load(f)

In [20]:
# Create a map
devon_map = folium.Map(location=[51, -3.5],
                        zoom_start=8,
                        tiles='cartodbpositron')
# Add choropleth
folium.Choropleth(geo_data=geojson_devon,
                  highlight=True).add_to(devon_map)

# Generate map
devon_map