In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
%run ../notebook_preamble.ipy

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
import geopandas as gpd

In [4]:
from beis_indicators.geo.coders import NutsCoder, LepCoder
# from beis_indicators.geo.nuts import auto_nuts2_uk
from beis_indicators.indicators import points_to_indicator

In [5]:
lsoa_shp_01 = gpd.read_file('../../data/raw/Lower_Layer_Super_Output_Areas__December_2001__EW_BGC-shp/Lower_Layer_Super_Output_Areas__December_2001__EW_BGC.shp')
lsoa_shp_11 = gpd.read_file('../../data/raw/Lower_Layer_Super_Output_Areas__December_2011__Boundaries_EW_BGC_v3-shp/Lower_Layer_Super_Output_Areas__December_2011__Boundaries_EW_BGC_v3.shp')

In [6]:
# LSOA to lon/lat
lsoa_shp_geo_01 = lsoa_shp_01.to_crs(epsg=4326)
lsoa_shp_geo_11 = lsoa_shp_11.to_crs(epsg=4326)

In [7]:
lsoa_shp_geo_11.head(2)

Unnamed: 0,OBJECTID,LSOA11CD,LSOA11NM,LSOA11NMW,Age_Indica,Shape__Are,Shape__Len,geometry
0,1,E01000001,City of London 001A,City of London 001A,0,133320.768867,2291.846072,POLYGON ((-0.0972886732320328 51.5215770420208...
1,2,E01000002,City of London 001B,City of London 001B,0,226191.273003,2433.960112,POLYGON ((-0.08812915078150488 51.519410719510...


In [8]:
# print(lsoa_shp_geo.iloc[0].geometry.centroid.x, lsoa_shp_geo.iloc[0].geometry.centroid.y)

lsoa_shp_geo_01['lon'] = lsoa_shp_geo_01.geometry.apply(lambda i: i.centroid.x)
lsoa_shp_geo_01['lat'] = lsoa_shp_geo_01.geometry.apply(lambda i: i.centroid.y)

lsoa_shp_geo_11['lon'] = lsoa_shp_geo_11.geometry.apply(lambda i: i.centroid.x)
lsoa_shp_geo_11['lat'] = lsoa_shp_geo_11.geometry.apply(lambda i: i.centroid.y)

In [9]:
lsoa_shp_geo_re_01 = lsoa_shp_geo_01.rename(columns={'LSOA01CD':'LSOA_code'})
lsoa_shp_geo_re_11 = lsoa_shp_geo_11.rename(columns={'LSOA11CD':'LSOA_code'})

2011 data (England and Wales) uses 2001 LSOA data <br />
2013 data (England only) uses 2011 LSOA data

- Read more here: http://data.dft.gov.uk.s3.amazonaws.com/connectivity-data/Brief-guide-to-connectivity-travel-time-data_v2.pdf

## Road Junctions

Method: Averaging time over the nearest 5 road junctions 

Time of day: AM

Mode of transport: Car

In [10]:
#2011 data
df_road = pd.read_csv('../../data/raw/travel/Road-junctions-travel-times/Junctions_HW_AM.csv')
#2013 data
df_road_13 = pd.read_csv('../../data/raw/travel/2013_Junctions_HW_AM.csv')

In [11]:
print(len(df_road))
print(len(df_road_13))

16869696
16051932


In [12]:
df_road_filter = df_road[df_road['NearOrder'] <= 4]
df_road_filter_13 = df_road_13[df_road_13['NearOrder'] <= 4]

In [13]:
len(df_road_filter)
len(df_road_filter_13)

163770

In [14]:
df_road_filter.reset_index(inplace=True, drop=True)
df_road_filter_13.reset_index(inplace=True, drop=True)

In [15]:
df_road_lat_lon = df_road_filter.merge(lsoa_shp_geo_re_01, on='LSOA_code', how='left')#
df_road_lat_lon_13 = df_road_filter_13.merge(lsoa_shp_geo_re_11, on='LSOA_code', how='left')

In [16]:
df_road_lat_lon = df_road_lat_lon[['LSOA_code', 'RepTime', 'Percentage Services', 'uid', 'NearOrder', 'lon', 'lat']]
df_road_lat_lon_13 = df_road_lat_lon_13[['LSOA_code', 'RepTime', 'Percentage Services', 'UID', 'NearOrder', 'lon', 'lat']]
df_road_lat_lon_13.columns = ['LSOA_code', 'RepTime', 'Percentage Services', 'uid', 'NearOrder', 'lon', 'lat']

In [17]:
df_road_lat_lon['year'] = [2011]*len(df_road_lat_lon)
df_road_lat_lon_13['year'] = [2013]*len(df_road_lat_lon_13)

In [18]:
df_road_latlon_11_13 = pd.concat([df_road_lat_lon, df_road_lat_lon_13]).reset_index(drop=True)

### NUTS 2

In [155]:
df_road_nuts2 = points_to_indicator(df_road_latlon_11_13, value_col='RepTime', coder=NutsCoder(level=2),
                    aggfunc=np.mean, value_rename='travel_to_junctions_time',
                    projection='EPSG:4326', x_col='lon', y_col='lat')

2020-10-07 11:36:17,967 - beis_indicators.geo.coders - INFO - Loading NUTS 2 2016 boundaries
2020-10-07 11:36:21,600 - beis_indicators.geo.coders - INFO - Loading NUTS 2 2021 boundaries
2020-10-07 11:36:25,371 - beis_indicators.geo.coders - INFO - Loading NUTS 2 2003 boundaries
2020-10-07 11:36:30,676 - beis_indicators.geo.coders - INFO - Loading NUTS 2 2006 boundaries
2020-10-07 11:36:42,290 - beis_indicators.geo.coders - INFO - Loading NUTS 2 2010 boundaries
2020-10-07 11:36:47,171 - beis_indicators.geo.coders - INFO - Loading NUTS 2 2013 boundaries


Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: None
Right CRS: EPSG:4326

  joined = gpd.sjoin(points, shape, op='within', how='right')
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: None
Right CRS: EPSG:4326

  joined = gpd.sjoin(points, shape, op='within', how='right')


In [160]:
df_road_nuts2 = df_road_nuts2.sort_values(by='nuts_id')

Unnamed: 0,year,nuts_id,nuts_year_spec,road_junction_time
0,2011,UKC1,2010,14.98
0,2013,UKC1,2013,15.40
1,2013,UKC2,2013,17.35
1,2011,UKC2,2010,17.76
2,2013,UKD1,2013,38.66
2,2011,UKD1,2010,37.62
3,2013,UKD3,2013,12.30
3,2011,UKD3,2010,11.61
4,2013,UKD4,2013,21.14
4,2011,UKD4,2010,20.25


### NUTS 3

In [161]:
df_road_nuts3 = points_to_indicator(df_road_latlon_11_13, value_col='RepTime', coder=NutsCoder(level=3),
                    aggfunc=np.mean, value_rename='travel_to_junctions_time',
                    projection='EPSG:4326', x_col='lon', y_col='lat')

2020-10-07 11:43:33,544 - beis_indicators.geo.coders - INFO - Loading NUTS 3 2016 boundaries
2020-10-07 11:43:38,743 - beis_indicators.geo.coders - INFO - Loading NUTS 3 2021 boundaries
2020-10-07 11:43:44,247 - beis_indicators.geo.coders - INFO - Loading NUTS 3 2003 boundaries
2020-10-07 11:43:52,020 - beis_indicators.geo.coders - INFO - Loading NUTS 3 2006 boundaries
2020-10-07 11:44:08,953 - beis_indicators.geo.coders - INFO - Loading NUTS 3 2010 boundaries
2020-10-07 11:44:16,670 - beis_indicators.geo.coders - INFO - Loading NUTS 3 2013 boundaries


Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: None
Right CRS: EPSG:4326

  joined = gpd.sjoin(points, shape, op='within', how='right')
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: None
Right CRS: EPSG:4326

  joined = gpd.sjoin(points, shape, op='within', how='right')


In [164]:
df_road_nuts3 = df_road_nuts3.sort_values(by='nuts_id')

### LEP

In [162]:
df_road_lep = points_to_indicator(df_road_latlon_11_13, value_col='RepTime', coder=LepCoder(),
                    aggfunc=np.mean, value_rename='travel_to_junctions_time',
                    projection='EPSG:4326', x_col='lon', y_col='lat')

2020-10-07 11:45:08,832 - beis_indicators.geo.coders - INFO - Loading LEP 2014 boundaries
2020-10-07 11:45:18,698 - beis_indicators.geo.coders - INFO - Loading LEP 2017 boundaries
2020-10-07 11:45:29,570 - beis_indicators.geo.coders - INFO - Loading LEP 2020 boundaries


Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: None
Right CRS: EPSG:4326

  joined = gpd.sjoin(points, shape, op='within', how='right')
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: None
Right CRS: EPSG:4326

  joined = gpd.sjoin(points, shape, op='within', how='right')


In [166]:
df_road_lep = df_road_lep.sort_values(by='lep_id')

## Airport

Method: Closest airport 

Time of Day: AM

Mode of transport: Car

In [176]:
df_air_11 = pd.read_csv('../../data/raw/travel/Airports-travel-times/Airports_HW_AM.csv')
df_air_13 = pd.read_csv('../../data/raw/travel/2013_Airports_HW_AM.csv')

In [177]:
df_air_filter_11 = df_air_11[df_air_11['NearOrder'] <= 0]
df_air_filter_13 = df_air_13[df_air_13['NearOrder'] <= 0]

In [178]:
print(len(df_air_filter_11))
print(len(df_air_filter_13))

34288
32754


In [179]:
df_air_filter_11.reset_index(inplace=True, drop=True)
df_air_filter_13.reset_index(inplace=True, drop=True)

In [189]:
df_air_lat_lon = df_air_filter_11.merge(lsoa_shp_geo_re_01, on='LSOA_code', how='left')
df_air_lat_lon_13 = df_air_filter_13.merge(lsoa_shp_geo_re_11, on='LSOA_code', how='left')

In [191]:
df_air_lat_lon = df_air_lat_lon[['LSOA_code', 'RepTime', 'Percentage Services', 'uid', 'NearOrder', 'lon', 'lat']]
df_air_lat_lon_13 = df_air_lat_lon_13[['LSOA_code', 'RepTime', 'Percentage Services', 'UID', 'NearOrder', 'lon', 'lat']]
df_air_lat_lon_13.columns = ['LSOA_code', 'RepTime', 'Percentage Services', 'uid', 'NearOrder', 'lon', 'lat']

In [193]:
df_air_lat_lon['year'] = [2011]*len(df_air_lat_lon)
df_air_lat_lon_13['year'] = [2013]*len(df_air_lat_lon_13)

In [194]:
df_air_latlon_11_13 = pd.concat([df_air_lat_lon, df_air_lat_lon_13]).reset_index(drop=True)

### NUTS 2

In [195]:
df_air_nuts2 = points_to_indicator(df_air_latlon_11_13, value_col='RepTime', coder=NutsCoder(level=2),
                    aggfunc=np.mean, value_rename='travel_to_airport_time',
                    projection='EPSG:4326', x_col='lon', y_col='lat')

2020-10-07 11:56:57,366 - beis_indicators.geo.coders - INFO - Loading NUTS 2 2016 boundaries
2020-10-07 11:57:00,784 - beis_indicators.geo.coders - INFO - Loading NUTS 2 2021 boundaries
2020-10-07 11:57:04,136 - beis_indicators.geo.coders - INFO - Loading NUTS 2 2003 boundaries
2020-10-07 11:57:09,219 - beis_indicators.geo.coders - INFO - Loading NUTS 2 2006 boundaries
2020-10-07 11:57:19,688 - beis_indicators.geo.coders - INFO - Loading NUTS 2 2010 boundaries
2020-10-07 11:57:24,463 - beis_indicators.geo.coders - INFO - Loading NUTS 2 2013 boundaries


Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: None
Right CRS: EPSG:4326

  joined = gpd.sjoin(points, shape, op='within', how='right')
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: None
Right CRS: EPSG:4326

  joined = gpd.sjoin(points, shape, op='within', how='right')


In [196]:
df_air_nuts2 = df_air_nuts2.sort_values(by='nuts_id')

### NUTS 3

In [199]:
df_air_nuts3 = points_to_indicator(df_air_latlon_11_13, value_col='RepTime', coder=NutsCoder(level=3),
                    aggfunc=np.mean, value_rename='travel_to_airport_time',
                    projection='EPSG:4326', x_col='lon', y_col='lat')

2020-10-07 11:58:54,849 - beis_indicators.geo.coders - INFO - Loading NUTS 3 2016 boundaries
2020-10-07 11:58:59,944 - beis_indicators.geo.coders - INFO - Loading NUTS 3 2021 boundaries
2020-10-07 11:59:04,870 - beis_indicators.geo.coders - INFO - Loading NUTS 3 2003 boundaries
2020-10-07 11:59:12,223 - beis_indicators.geo.coders - INFO - Loading NUTS 3 2006 boundaries
2020-10-07 11:59:27,944 - beis_indicators.geo.coders - INFO - Loading NUTS 3 2010 boundaries
2020-10-07 11:59:35,116 - beis_indicators.geo.coders - INFO - Loading NUTS 3 2013 boundaries


Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: None
Right CRS: EPSG:4326

  joined = gpd.sjoin(points, shape, op='within', how='right')
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: None
Right CRS: EPSG:4326

  joined = gpd.sjoin(points, shape, op='within', how='right')


In [200]:
df_air_nuts3 = df_air_nuts3.sort_values(by='nuts_id')

### LEP

In [202]:
df_air_lep = points_to_indicator(df_air_latlon_11_13, value_col='RepTime', coder=LepCoder(),
                    aggfunc=np.mean, value_rename='travel_to_airport_time',
                    projection='EPSG:4326', x_col='lon', y_col='lat')

2020-10-07 12:00:39,995 - beis_indicators.geo.coders - INFO - Loading LEP 2014 boundaries
2020-10-07 12:00:49,482 - beis_indicators.geo.coders - INFO - Loading LEP 2017 boundaries
2020-10-07 12:00:58,745 - beis_indicators.geo.coders - INFO - Loading LEP 2020 boundaries


Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: None
Right CRS: EPSG:4326

  joined = gpd.sjoin(points, shape, op='within', how='right')
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: None
Right CRS: EPSG:4326

  joined = gpd.sjoin(points, shape, op='within', how='right')


In [203]:
df_air_lep = df_air_lep.sort_values(by='lep_id')

## Rail Stations

Method: Closest rail station 

Time of Day: AM

Mode of transport: Car

In [132]:
df_rail_11 = pd.read_csv('../../data/raw/travel/Rail-stations-travel-times/Stations_HW_AM.csv')
df_rail_13 = pd.read_csv('../../data/raw/travel/2013_Stations_HW_AM.csv')

In [133]:
df_rail_filter_11 = df_rail_11[df_rail_11['NearOrder'] <= 0]
df_rail_filter_13 = df_rail_13[df_rail_13['NearOrder'] <= 0]

In [134]:
print(len(df_rail_filter_11))
print(len(df_rail_filter_13))

34288

In [135]:
df_rail_filter_11.reset_index(inplace=True, drop=True)
df_rail_filter_13.reset_index(inplace=True, drop=True)

In [136]:
df_rail_lat_lon = df_rail_filter_11.merge(lsoa_shp_geo_re_01, on='LSOA_code', how='left')
df_rail_lat_lon_13 = df_rail_filter_13.merge(lsoa_shp_geo_re_11, on='LSOA_code', how='left')

In [137]:
df_rail_lat_lon = df_rail_lat_lon[['LSOA_code', 'RepTime', 'Percentage Services', 'uid', 'NearOrder', 'lon', 'lat']]
df_rail_lat_lon_13 = df_rail_lat_lon_13[['LSOA_code', 'RepTime', 'Percentage Services', 'UID', 'NearOrder', 'lon', 'lat']]
df_rail_lat_lon_13.columns = ['LSOA_code', 'RepTime', 'Percentage Services', 'uid', 'NearOrder', 'lon', 'lat']

In [None]:
df_rail_lat_lon['year'] = [2011]*len(df_rail_lat_lon)
df_rail_lat_lon_13['year'] = [2013]*len(df_rail_lat_lon_13)

In [None]:
df_rail_latlon_11_13 = pd.concat([df_rail_lat_lon, df_rail_lat_lon_13]).reset_index(drop=True)

### NUTS 2

In [195]:
df_rail_nuts2 = points_to_indicator(df_rail_latlon_11_13, value_col='RepTime', coder=NutsCoder(level=2),
                    aggfunc=np.mean, value_rename='travel_to_rail_time',
                    projection='EPSG:4326', x_col='lon', y_col='lat')

2020-10-07 11:56:57,366 - beis_indicators.geo.coders - INFO - Loading NUTS 2 2016 boundaries
2020-10-07 11:57:00,784 - beis_indicators.geo.coders - INFO - Loading NUTS 2 2021 boundaries
2020-10-07 11:57:04,136 - beis_indicators.geo.coders - INFO - Loading NUTS 2 2003 boundaries
2020-10-07 11:57:09,219 - beis_indicators.geo.coders - INFO - Loading NUTS 2 2006 boundaries
2020-10-07 11:57:19,688 - beis_indicators.geo.coders - INFO - Loading NUTS 2 2010 boundaries
2020-10-07 11:57:24,463 - beis_indicators.geo.coders - INFO - Loading NUTS 2 2013 boundaries


Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: None
Right CRS: EPSG:4326

  joined = gpd.sjoin(points, shape, op='within', how='right')
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: None
Right CRS: EPSG:4326

  joined = gpd.sjoin(points, shape, op='within', how='right')


In [None]:
df_rail_nuts2 = df_rail_nuts2.sort_values(by='nuts_id')

### NUTS 3

In [199]:
df_rail_nuts3 = points_to_indicator(df_rail_latlon_11_13, value_col='RepTime', coder=NutsCoder(level=3),
                    aggfunc=np.mean, value_rename='travel_to_rail_time',
                    projection='EPSG:4326', x_col='lon', y_col='lat')

2020-10-07 11:58:54,849 - beis_indicators.geo.coders - INFO - Loading NUTS 3 2016 boundaries
2020-10-07 11:58:59,944 - beis_indicators.geo.coders - INFO - Loading NUTS 3 2021 boundaries
2020-10-07 11:59:04,870 - beis_indicators.geo.coders - INFO - Loading NUTS 3 2003 boundaries
2020-10-07 11:59:12,223 - beis_indicators.geo.coders - INFO - Loading NUTS 3 2006 boundaries
2020-10-07 11:59:27,944 - beis_indicators.geo.coders - INFO - Loading NUTS 3 2010 boundaries
2020-10-07 11:59:35,116 - beis_indicators.geo.coders - INFO - Loading NUTS 3 2013 boundaries


Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: None
Right CRS: EPSG:4326

  joined = gpd.sjoin(points, shape, op='within', how='right')
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: None
Right CRS: EPSG:4326

  joined = gpd.sjoin(points, shape, op='within', how='right')


In [None]:
df_rail_nuts3 = df_rail_nuts3.sort_values(by='nuts_id')

### LEP

In [202]:
df_rail_lep = points_to_indicator(df_air_latlon_11_13, value_col='RepTime', coder=LepCoder(),
                    aggfunc=np.mean, value_rename='travel_to_rail_time',
                    projection='EPSG:4326', x_col='lon', y_col='lat')

2020-10-07 12:00:39,995 - beis_indicators.geo.coders - INFO - Loading LEP 2014 boundaries
2020-10-07 12:00:49,482 - beis_indicators.geo.coders - INFO - Loading LEP 2017 boundaries
2020-10-07 12:00:58,745 - beis_indicators.geo.coders - INFO - Loading LEP 2020 boundaries


Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: None
Right CRS: EPSG:4326

  joined = gpd.sjoin(points, shape, op='within', how='right')
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: None
Right CRS: EPSG:4326

  joined = gpd.sjoin(points, shape, op='within', how='right')


In [203]:
df_rail_lep = df_rail_lep.sort_values(by='lep_id')

## Travel to Work