# Rail Matrix Development

## Basic Packages

In [9]:
import os
from pathlib import Path
import pandas as pd
import geopandas as gpd
import numpy as np
from fiona.crs import from_epsg

# Current working directory
basepath = os.path.dirname(os.getcwd())

# Read list of stations within model coverage area
raw_data = os.path.join(basepath, '01 Raw Data' )
# raw_data



## Station Time of Day split

Import Station Masterlist shapefile

In [10]:
stn_masterlist = gpd.read_file(f'{basepath}/03 Output/02 Shapefiles/station_masterlist.shp')
print(stn_masterlist.crs)
stn_masterlist

EPSG:4326


Unnamed: 0,index,station,internal_s,geometry
0,0,Acocks Green,1,POINT (-1.81884 52.44950)
1,1,Atherstone Warks,1,POINT (-1.55383 52.57859)
2,2,Bedworth,1,POINT (-1.46822 52.48029)
3,3,Berkswell,1,POINT (-1.64431 52.39651)
4,4,Bermuda Park,1,POINT (-1.47289 52.50136)
...,...,...,...,...
552,568,Wymondham,0,POINT (1.11690 52.56468)
553,569,Wythall,0,POINT (-1.86511 52.38110)
554,570,Yardley Wood,0,POINT (-1.85421 52.42077)
555,571,Yate,0,POINT (-2.43065 51.54220)


Import Station Output df


In [3]:
station_od_df = pd.read_csv(f'{basepath}/03 Output/04 Matrix Input/04 Station Trips/station_output_df_v2.csv')
station_od_df.head()
fare_daily = station_od_df.groupby(['station1','station2','fare_nofare','output_day']).agg(org_d =('org_daily','sum'),dest_d =('dest_daily','sum')).reset_index()
fare_daily.head()

Unnamed: 0,station1,station2,fare_nofare,output_day,org_d,dest_d
0,Abbey Wood,Atherstone Warks,Fare,Saturday,0.019,0.019
1,Abbey Wood,Atherstone Warks,Fare,Weekday,0.014,0.014
2,Abbey Wood,Berkswell,Fare,Weekday,0.004,0.004
3,Abbey Wood,Bermuda Park,Fare,Weekday,0.002,0.002
4,Abbey Wood,Birmingham Intl,Fare,Saturday,0.387,0.387


### Read the station trips hourly to get distribution for peak hours
- Peak hour factors are calculated for internal station 
- Separate factors for origin and destination by using boarding and alighting
- Separate factors for Weekday and Weekend
- Peak period: AM peak:7-9, Inter peak: 10-12, PM peak: 16-18, Weekend Peak: 10-12
- Average of peak period trips is used as peak hour factors

In [20]:
time_of_day = pd.read_csv(f'{basepath}/03 Output/04 Matrix Input/03 Time of Day/station_trips_hourly_Weekday.csv')
total_by_stn = time_of_day.groupby(['station']).agg(offs=('offs','sum'),ons= ('ons','sum')).reset_index()
total_by_stn.rename(columns={'offs':'total_offs','ons':'total_ons'}, inplace=True)
# total_by_stn
peak_periods = {'am':[7,8],
                'ip': [10,11],
                'pm': [16,17],
                }
factors = {}
for peak in peak_periods:
    factors[peak] = time_of_day.loc[(time_of_day['time'] == peak_periods[peak][0]) | (time_of_day['time'] == peak_periods[peak][1])]
    factors[peak] = factors[peak].groupby(['station']).agg(offs=('offs','sum'),ons= ('ons','sum')).reset_index()
    factors[peak]['offs'] = factors[peak]['offs']/2
    factors[peak]['ons'] = factors[peak]['ons']/2
    factors[peak] = pd.merge(factors[peak], total_by_stn, on='station', how='left')
    factors[peak]['org_factor'] = factors[peak]['offs']/factors[peak]['total_offs'].round(3)
    factors[peak]['dest_factor'] = factors[peak]['ons']/factors[peak]['total_ons'].round(3)

factors['pm']

Unnamed: 0,station,offs,ons,total_offs,total_ons,org_factor,dest_factor
0,Acocks Green,71.5,34.5,509,507,0.140472,0.068047
1,Atherstone Warks,13.0,12.5,161,159,0.080745,0.078616
2,Bedworth,8.0,7.0,81,84,0.098765,0.083333
3,Berkswell,60.0,28.0,402,400,0.149254,0.07
4,Bermuda Park,4.0,3.0,31,30,0.129032,0.1
5,Birmingham Intl,798.0,490.0,6870,6345,0.116157,0.077226
6,Canley,80.5,46.0,657,661,0.122527,0.069592
7,Claverdon,0.5,1.5,5,7,0.1,0.214286
8,Coleshill Pkway,40.0,16.0,286,277,0.13986,0.057762
9,Coventry,1041.0,1075.0,11183,11194,0.093088,0.096034


#### Station wise trips estimated by multiplying daily trips with station specific origin and destination factors

In [38]:
#Station wise trips estimated by multiplying daily trips with station specific origin and destination factors
wk_nf_trips = fare_daily.loc[(fare_daily['output_day']=='Weekday')&(fare_daily['fare_nofare']=='Fare')].copy()
peaks = ['am','ip','pm']
org_dest = ['org','dest']
peak_trips = {}
for index, row in wk_nf_trips.iterrows():
    station1 = row['station1']
    station2 = row['station2']
    org_d = row['org_d']
    dest_d = row['dest_d']
    for o_d in org_dest:
        for peak in peaks:
            try:
                peak_trips[f'{peak}_{o_d}'] = factors[peak].loc[(factors[peak]['station'] == row['station1'])][f'{o_d}_factor'].values[0]*row[f'{o_d}_d']
            except IndexError:
                peak_trips[f'{peak}_{o_d}'] = factors[peak].loc[(factors[peak]['station'] == row['station2'])][f'{o_d}_factor'].values[0]*row[f'{o_d}_d']

            wk_nf_trips.loc[index, f'{peak}_{o_d}'] = peak_trips[f'{peak}_{o_d}']

wk_nf_trips.to_csv('Weekday Fare Peak-hour trips_v2.csv')
wk_nf_trips.head()

Unnamed: 0,station1,station2,fare_nofare,output_day,org_d,dest_d,am_org,ip_org,pm_org,am_dest,ip_dest,pm_dest
1,Abbey Wood,Atherstone Warks,Fare,Weekday,0.014,0.014,0.000478,0.00087,0.00113,0.000969,0.001145,0.001101
2,Abbey Wood,Berkswell,Fare,Weekday,0.004,0.004,0.000338,0.000164,0.000597,0.00076,0.000215,0.00028
3,Abbey Wood,Bermuda Park,Fare,Weekday,0.002,0.002,0.000161,6.5e-05,0.000258,0.000333,6.7e-05,0.0002
5,Abbey Wood,Birmingham Intl,Fare,Weekday,0.38,0.38,0.024836,0.021213,0.04414,0.052254,0.026052,0.029346
6,Abbey Wood,Canley,Fare,Weekday,0.012,0.012,0.000813,0.000575,0.00147,0.001897,0.000781,0.000835


In [39]:
wk_f_trips2[wk_f_trips2['station1']=='Coventry'].sum()

station1       CoventryCoventryCoventryCoventryCoventryCovent...
station2       Birmingham BRBradford Yks BRCaerphillyCarltonC...
fare_nofare    FareFareFareFareFareFareFareFareFareFareFareFa...
output_day     WeekdayWeekdayWeekdayWeekdayWeekdayWeekdayWeek...
org_d                                                   6002.453
dest_d                                                  6002.453
am_org                                                508.837114
ip_org                                                360.157915
pm_org                                                558.754679
am_dest                                               680.195786
ip_dest                                               423.078025
pm_dest                                               576.437107
dtype: object

In [40]:
wk_f_trips2[wk_f_trips2['station2']=='Coventry'].sum()

station1       Abbey WoodAberdeenAbergavennyAcocks GreenActon...
station2       CoventryCoventryCoventryCoventryCoventryCovent...
fare_nofare    FareFareFareFareFareFareFareFareFareFareFareFa...
output_day     WeekdayWeekdayWeekdayWeekdayWeekdayWeekdayWeek...
org_d                                                   2757.614
dest_d                                                  2757.614
am_org                                                 219.02353
ip_org                                                160.535463
pm_org                                                275.739149
am_dest                                               336.155356
ip_dest                                               190.762757
pm_dest                                               249.349907
dtype: object

In [30]:
#Station wise trips estimated by multiplying daily trips with station specific origin and destination factors
wk_nf_trips = fare_daily.loc[(fare_daily['output_day']=='Weekday')&(fare_daily['fare_nofare']=='No_Fare')].copy()
peaks = ['am','ip','pm']
org_dest = ['org','dest']
peak_trips = {}
for index, row in wk_nf_trips.iterrows():
    station1 = row['station1']
    station2 = row['station2']
    org_d = row['org_d']
    dest_d = row['dest_d']
    for o_d in org_dest:
        for peak in peaks:
            try:
                peak_trips[f'{peak}_{o_d}'] = factors[peak].loc[(factors[peak]['station'] == row['station1'])][f'{o_d}_factor'].values[0]*row[f'{o_d}_d']
            except IndexError:
                peak_trips[f'{peak}_{o_d}'] = factors[peak].loc[(factors[peak]['station'] == row['station2'])][f'{o_d}_factor'].values[0]*row[f'{o_d}_d']

            wk_nf_trips.loc[index, f'{peak}_{o_d}'] = peak_trips[f'{peak}_{o_d}']

wk_nf_trips.to_csv('Weekday No_Fare Peak-hour trips_v2.csv')
wk_nf_trips.head()

Unnamed: 0,station1,station2,fare_nofare,output_day,org_d,dest_d,am_org,ip_org,pm_org,am_dest,ip_dest,pm_dest
119,Acocks Green,Adderley Park,No_Fare,Weekday,0.02,0.02,0.001611,0.000884,0.002809,0.003984,0.001065,0.001361
131,Acocks Green,Aston,No_Fare,Weekday,0.248,0.248,0.019976,0.010963,0.034837,0.049404,0.013207,0.016876
163,Acocks Green,Bescot Stadium,No_Fare,Weekday,0.004,0.004,0.000322,0.000177,0.000562,0.000797,0.000213,0.000272
172,Acocks Green,Birmingham BR,No_Fare,Weekday,128.045,128.045,10.314037,5.660142,17.986675,25.507978,6.818964,8.713121
176,Acocks Green,Birmingham Intl,No_Fare,Weekday,0.057,0.057,0.004591,0.00252,0.008007,0.011355,0.003036,0.003879


## Station to Station to Zone-Zone Distribution

### Read Zone shapefiles
- Read zone shapefiles
- Map stations in Station Masterlist to Zones and get Zone numbers

In [11]:
# Load zone  shapefile
# Load zone centroid shapefile
zone_gdf = gpd.read_file(f'{raw_data}/07 Connectors/v20/zones_geometry_zone.SHP')
# zone_gdf.rename(columns={'BUS_DEMA~1':'bus_wk','RAIL_DEM~2':'rail_wk',},inplace=True)
crs = zone_gdf.crs
zone_gdf

Unnamed: 0,NO,MODEL_AREA,XCOORD,YCOORD,geometry
0,101,External,483442.3370,358139.5450,"MULTIPOLYGON (((401565.070 368872.379, 401380...."
1,201,External,571805.2540,263684.3400,"MULTIPOLYGON (((501963.011 305786.877, 502524...."
2,301,External,548177.9270,143853.3340,"MULTIPOLYGON (((488290.273 149918.883, 488410...."
3,401,External,434860.3710,494161.1640,"POLYGON ((380989.322 636877.374, 381197.296 63..."
4,505,External,352895.0290,461898.3870,"MULTIPOLYGON (((331957.783 391954.992, 331916...."
...,...,...,...,...,...
841,19015,FMA,439734.0000,283278.0000,"POLYGON ((442002.364 282450.696, 442020.319 28..."
842,19016,FMA,438788.0000,284631.0000,"POLYGON ((438301.042 285714.158, 438901.977 28..."
843,21861,FMA,429403.0000,275752.0000,"POLYGON ((433150.315 274749.928, 433196.349 27..."
844,21862,FMA,434745.0000,275074.0000,"POLYGON ((433708.371 275537.959, 433751.647 27..."


### Station Masterlist

In [12]:
stn_masterlist = gpd.read_file(f'{basepath}/03 Output/02 Shapefiles/station_masterlist.shp').to_crs(crs)
print(stn_masterlist.crs)
stn_masterlist.head()
# stn_masterlist.to_csv('stn_masterlist_check.csv')

PROJCS["British_National_Grid_TOWGS",GEOGCS["OSGB36",DATUM["Ordnance_Survey_of_Great_Britain_1936",SPHEROID["Airy 1830",6377563.396,299.3249646,AUTHORITY["EPSG","7001"]],TOWGS84[446.4,-125.2,542.1,0.15,0.247,0.842,-20.49],AUTHORITY["EPSG","6277"]],PRIMEM["Greenwich",0],UNIT["Degree",0.0174532925199433]],PROJECTION["Transverse_Mercator"],PARAMETER["latitude_of_origin",49],PARAMETER["central_meridian",-2],PARAMETER["scale_factor",0.999601272],PARAMETER["false_easting",400000],PARAMETER["false_northing",-100000],UNIT["metre",1,AUTHORITY["EPSG","9001"]],AXIS["Easting",EAST],AXIS["Northing",NORTH]]


Unnamed: 0,index,station,internal_s,geometry
0,0,Acocks Green,1,POINT (412409.486 283519.823)
1,1,Atherstone Warks,1,POINT (430331.001 297957.000)
2,2,Bedworth,1,POINT (436213.001 287062.000)
3,3,Berkswell,1,POINT (424300.001 277670.000)
4,4,Bermuda Park,1,POINT (435879.014 289403.290)


### Mapped all stations to zones containing these stations

In [13]:
stn_zones = gpd.sjoin(stn_masterlist, zone_gdf[['NO','MODEL_AREA','geometry']], how="left", predicate="within")
stn_zones.rename(columns= {'NO':'zoneno','MODEL_AREA':'model_area'},inplace=True) 

stn_zones = stn_zones[['station','zoneno','model_area']]
fringe_stn_zone = {'Dundee':601,
                   'Edinburgh': 601,
                   'Penzance': 801,
                   'Portsmouth Hbr': 701,
                   'Weymouth': 801
                   }
stn_zones['zoneno'] = stn_zones['station'].map(fringe_stn_zone).fillna(stn_zones['zoneno'])
stn_zones['model_area'] = stn_zones['model_area'].fillna('External')

stn_zones.loc[len(stn_zones.index)] = ['Liverpool BR', 505, 'External'] 
stn_zones['zoneno'] = stn_zones['zoneno'].astype('int64')

stn_zones.reset_index(drop=True, inplace=True)
stn_zones


Unnamed: 0,station,zoneno,model_area
0,Acocks Green,1029,External
1,Atherstone Warks,1681,External
2,Bedworth,1809,FMA
3,Berkswell,2050,FMA
4,Bermuda Park,1782,FMA
...,...,...,...
553,Wythall,1236,External
554,Yardley Wood,1028,External
555,Yate,801,External
556,York,401,External


### Get distribution factors and park_ride_stn for each internal zone

In [14]:
model_cover_dist = pd.read_excel(f'{basepath}/03 Output/04 Matrix Input/05 Zone Split Factors/station_zone_dist_factors_v2.xlsx',
                                 sheet_name='station_od_trips')
model_cover_dist.head()

Unnamed: 0,station,visum_stop,within_fma,park_ride_stn,shortest con,act_od_trips_1,pr_trips_2,gravity_dist_3,total_4,Updated P&R
0,Acocks Green,Acocks Green,0,1029,1.516536,0.0,0.283582,0.716418,1,
1,Atherstone Warks,Atherstone,0,1681,0.748515,0.0,0.283582,0.716418,1,
2,Bedworth,Bedworth,1,1809,0.159309,0.0,0.283582,0.716418,1,1809.0
3,Berkswell,Berkswell,1,2050,0.64227,0.0,0.283582,0.716418,1,2050.0
4,Bermuda Park,Bermuda Park,1,1782,0.716255,0.0,0.283582,0.716418,1,1782.0


### Get demand split to all zones for each internal station
- Read coventry_station_trips.csv for Coventry access egress trips
- Read compiled_int_stn_trips_df.csv for other stations access egress trips
- Compile both dfs
- Create point geometry from od_lat and od_long
- map point geometry to zones polygon and get zone information


In [15]:
### Read coventry station access 
from shapely.geometry import Point
from pyproj import Proj, transform
cov_zone_dist = pd.read_csv(f'{basepath}/03 Output/04 Matrix Input/05 Zone Split Factors/coventry_station_trips.csv')
intstn_zone_dist = pd.read_csv(f'{basepath}/03 Output/04 Matrix Input/05 Zone Split Factors/compiled_int_stn_trips_df.csv')
stn_zone_dist = pd.concat([cov_zone_dist,intstn_zone_dist]).reset_index(drop=True)
in_proj = Proj('epsg:4326')
out_proj = Proj(crs)
stn_zone_dist['x'], stn_zone_dist['y'] = transform(in_proj, out_proj, stn_zone_dist['od_lat'].values, stn_zone_dist['od_long'].values)
geometry = [Point(xy) for xy in zip(stn_zone_dist['x'], stn_zone_dist['y'])]

stn_zone_gdf = gpd.GeoDataFrame(stn_zone_dist, geometry=geometry, crs=crs).reset_index(drop=True)
stn_zone_gdf.drop(['x','y','od_long','od_lat','Unnamed: 0'],axis=1, inplace=True)
stn_zones_interim = gpd.sjoin(stn_masterlist, zone_gdf[['NO','MODEL_AREA','geometry']], how="left", predicate="within")
stn_zones_interim.rename(columns= {'NO':'zoneno','MODEL_AREA':'model_area'},inplace=True) 

## Correct the distance column by using same crs
from geopy.distance import distance
stn_zone_gdf_check = stn_zone_gdf.merge(stn_zones_interim[['station','zoneno','model_area','geometry']],on='station', suffixes=('_1', '_2'))

def calculate_distance(row):
    return round(float(Point(row['geometry_1']).distance(Point(row['geometry_2']))),0)

stn_zone_gdf_check['distance'] = stn_zone_gdf_check.apply(calculate_distance, axis=1)
stn_zone_gdf = gpd.GeoDataFrame(stn_zone_gdf_check[['station','mode','distance']], geometry=stn_zone_gdf_check['geometry_1'],crs=crs)
stn_zone_gdf



  stn_zone_dist['x'], stn_zone_dist['y'] = transform(in_proj, out_proj, stn_zone_dist['od_lat'].values, stn_zone_dist['od_long'].values)


Unnamed: 0,station,mode,distance,geometry
0,Coventry,Walk,989.0,POINT (433282.044 279137.893)
1,Coventry,Walk,989.0,POINT (433282.044 279137.893)
2,Coventry,Car,2151.0,POINT (431600.990 276857.619)
3,Coventry,Car,2151.0,POINT (431600.990 276857.619)
4,Coventry,Car,1311.0,POINT (433101.179 276857.614)
...,...,...,...,...
843,Solihull,Car,3755.0,POINT (414498.838 275957.577)
844,Berkswell,Walk,560.0,POINT (424316.041 277109.946)
845,Berkswell,Walk,560.0,POINT (424316.041 277109.946)
846,Bedworth,Car,2489.0,POINT (438701.921 287058.784)


In [16]:
stn_zone_gdf_1 = gpd.sjoin(stn_zone_gdf, zone_gdf[['NO','MODEL_AREA','geometry']], how="left", predicate="within")
stn_zone_gdf_1.rename(columns= {'NO':'zoneno','MODEL_AREA':'model_area'},inplace=True)
stn_zone_gdf_1.drop(columns=['index_right'], axis=1, inplace=True)
stn_zone_gdf_1.reset_index(drop=True, inplace=True)
stn_zone_gdf_1

Unnamed: 0,station,mode,distance,geometry,zoneno,model_area
0,Coventry,Walk,989.0,POINT (433282.044 279137.893),14172,AoDM
1,Coventry,Walk,989.0,POINT (433282.044 279137.893),14172,AoDM
2,Coventry,Car,2151.0,POINT (431600.990 276857.619),14322,AoDM
3,Coventry,Car,2151.0,POINT (431600.990 276857.619),14322,AoDM
4,Coventry,Car,1311.0,POINT (433101.179 276857.614),1461,AoDM
...,...,...,...,...,...,...
843,Solihull,Car,3755.0,POINT (414498.838 275957.577),2062,External
844,Berkswell,Walk,560.0,POINT (424316.041 277109.946),2050,FMA
845,Berkswell,Walk,560.0,POINT (424316.041 277109.946),2050,FMA
846,Bedworth,Car,2489.0,POINT (438701.921 287058.784),1806,FMA


In [17]:
stn_zone_gdf_1[stn_zone_gdf_1['zoneno']==1401]

Unnamed: 0,station,mode,distance,geometry,zoneno,model_area
23,Coventry,Walk,971.0,POINT (434201.324 278557.808),1401,AoDM
24,Coventry,Walk,971.0,POINT (434201.324 278557.808),1401,AoDM
130,Coventry,Walk,1017.0,POINT (434201.324 278657.820),1401,AoDM
131,Coventry,Walk,1017.0,POINT (434201.324 278657.820),1401,AoDM
215,Coventry,Walk,1017.0,POINT (434201.324 278657.820),1401,AoDM
216,Coventry,Walk,1017.0,POINT (434201.324 278657.820),1401,AoDM
277,Coventry,Walk,1017.0,POINT (434201.324 278657.820),1401,AoDM
278,Coventry,Walk,1017.0,POINT (434201.324 278657.820),1401,AoDM
281,Coventry,Walk,1017.0,POINT (434201.324 278657.820),1401,AoDM
282,Coventry,Walk,1017.0,POINT (434201.324 278657.820),1401,AoDM


In [18]:
# ### Check existing modes of access & egress
# stn_zone_gdf_1['mode'].value_counts()

### Create new column called asgm_mode
agg_mode = {'Car':'Car','Walk':'Walk','Bus':'Bus','Train':'Train','Bike':'Walk'}

stn_zone_gdf_1['agg_mode'] = stn_zone_gdf_1['mode'].map(agg_mode)
stn_zone_gdf_1

Unnamed: 0,station,mode,distance,geometry,zoneno,model_area,agg_mode
0,Coventry,Walk,989.0,POINT (433282.044 279137.893),14172,AoDM,Walk
1,Coventry,Walk,989.0,POINT (433282.044 279137.893),14172,AoDM,Walk
2,Coventry,Car,2151.0,POINT (431600.990 276857.619),14322,AoDM,Car
3,Coventry,Car,2151.0,POINT (431600.990 276857.619),14322,AoDM,Car
4,Coventry,Car,1311.0,POINT (433101.179 276857.614),1461,AoDM,Car
...,...,...,...,...,...,...,...
843,Solihull,Car,3755.0,POINT (414498.838 275957.577),2062,External,Car
844,Berkswell,Walk,560.0,POINT (424316.041 277109.946),2050,FMA,Walk
845,Berkswell,Walk,560.0,POINT (424316.041 277109.946),2050,FMA,Walk
846,Bedworth,Car,2489.0,POINT (438701.921 287058.784),1806,FMA,Car


### Get % distribution for Coventry station Walk, Bus and Train access egreess trips to respective Zones

In [13]:
cov_act_od_trips = stn_zone_gdf_1[((stn_zone_gdf_1['agg_mode']=='Walk') & (stn_zone_gdf_1['station']=='Coventry') & (stn_zone_gdf_1['distance'] < 1800)) |
                                  ((stn_zone_gdf_1['agg_mode']=='Bus') & (stn_zone_gdf_1['station']=='Coventry') & (stn_zone_gdf_1['distance'] < 5000)) |
                                   ((stn_zone_gdf_1['agg_mode']=='Train') & (stn_zone_gdf_1['station']=='Coventry') )]
cov_act_od_dist = cov_act_od_trips.groupby(['station','zoneno']).agg(zone_count = ('zoneno','count')).reset_index()
cov_act_od_dist['perc_split'] = round(cov_act_od_dist['zone_count']/cov_act_od_dist['zone_count'].sum(),6)
cov_act_od_dist.head()


Unnamed: 0,station,zoneno,zone_count,perc_split
0,Coventry,1303,6,0.016086
1,Coventry,1322,4,0.010724
2,Coventry,1332,2,0.005362
3,Coventry,1338,2,0.005362
4,Coventry,1343,1,0.002681


In [14]:
cov_act_od_dist

Unnamed: 0,station,zoneno,zone_count,perc_split
0,Coventry,1303,6,0.016086
1,Coventry,1322,4,0.010724
2,Coventry,1332,2,0.005362
3,Coventry,1338,2,0.005362
4,Coventry,1343,1,0.002681
...,...,...,...,...
65,Coventry,14343,2,0.005362
66,Coventry,14524,4,0.010724
67,Coventry,14542,2,0.005362
68,Coventry,14721,5,0.013405


### Distribute the Weekday peak hour trips to zone as per methodology
Iterate through each Station OD trip matrix and assign them to zone-zone matrix as per following:
- For trips between Internal station to External station or vice-versa, create a list of all combination of internal zones (3) to external zone(1) i.e. 3 combinations, allocate the peak hour trips stn trips to zones.
- For trips between Internal station to Internal station, create a list of all combination of internal zones (3) to internals  zone (3) i.e. 9 combinations, allocate the peak hour trips stn trips to zones.


In [15]:
wk_f_trips = pd.read_csv(f'{basepath}/03 Output/04 Matrix Input/04 Station Trips/Weekday Peak-hour trips.csv')
wk_f_trips.rename(columns= {'org_d':'day_org','dest_d':'day_dest'},inplace=True)
wk_f_trips


Unnamed: 0,station1,station2,output_day,day_org,day_dest,am_org,ip_org,pm_org,am_dest,ip_dest,pm_dest
0,Abbey Wood,Atherstone Warks,Weekday,0.014,0.014,0.000478,0.000870,0.001130,0.000969,0.001145,0.001101
1,Abbey Wood,Berkswell,Weekday,0.004,0.004,0.000338,0.000164,0.000597,0.000760,0.000215,0.000280
2,Abbey Wood,Bermuda Park,Weekday,0.002,0.002,0.000161,0.000064,0.000258,0.000333,0.000067,0.000200
3,Abbey Wood,Birmingham Intl,Weekday,0.380,0.380,0.024836,0.021213,0.044140,0.052254,0.026052,0.029346
4,Abbey Wood,Canley,Weekday,0.012,0.012,0.000813,0.000575,0.001470,0.001897,0.000781,0.000835
...,...,...,...,...,...,...,...,...,...,...,...
10230,Wood End,Water Orton,Weekday,0.004,0.004,0.000110,0.000110,0.001014,0.001027,0.000162,0.000270
10231,Worcs Parkway,Warwick,Weekday,0.046,0.046,0.003476,0.002297,0.005152,0.006230,0.002790,0.003751
10232,Worcs Parkway,Warwick Parkway,Weekday,0.014,0.014,0.000419,0.000790,0.001149,0.001556,0.001232,0.000897
10233,Worcs Parkway,Water Orton,Weekday,0.012,0.012,0.000329,0.000329,0.003041,0.003081,0.000486,0.000811


In [46]:
model_cover_dist = pd.read_excel(f'{basepath}/03 Output/04 Matrix Input/05 Zone Split Factors/station_zone_dist_factors_v2.xlsx',
                                 sheet_name='station_od_trips')
model_cover_dist.head()

Unnamed: 0,station,visum_stop,within_fma,park_ride_stn,shortest con,act_od_trips_1,pr_trips_2,gravity_dist_3,total_4,Updated P&R
0,Acocks Green,Acocks Green,0,1029,1.516536,0.0,0.283582,0.716418,1,
1,Atherstone Warks,Atherstone,0,1681,0.748515,0.0,0.283582,0.716418,1,
2,Bedworth,Bedworth,1,1809,0.159309,0.0,0.283582,0.716418,1,1809.0
3,Berkswell,Berkswell,1,2050,0.64227,0.0,0.283582,0.716418,1,2050.0
4,Bermuda Park,Bermuda Park,1,1782,0.716255,0.0,0.283582,0.716418,1,1782.0


In [47]:
fma_stations_dist = model_cover_dist[model_cover_dist['within_fma']==1].copy()
fma_stations_dist

Unnamed: 0,station,visum_stop,within_fma,park_ride_stn,shortest con,act_od_trips_1,pr_trips_2,gravity_dist_3,total_4,Updated P&R
2,Bedworth,Bedworth,1,1809,0.159309,0.0,0.283582,0.716418,1,1809.0
3,Berkswell,Berkswell,1,2050,0.64227,0.0,0.283582,0.716418,1,2050.0
4,Bermuda Park,Bermuda Park,1,1782,0.716255,0.0,0.283582,0.716418,1,1782.0
6,Canley,Canley,1,1430,0.39064,0.0,0.283582,0.716418,1,1430.0
8,Coleshill Pkway,Coleshill Parkway,1,1698,1.643621,0.0,0.283582,0.716418,1,1688.0
9,Coventry,Coventry,1,14142,0.236142,0.672956,0.327044,0.0,1,14142.0
10,Coventry Arena,Coventry Arena,1,12762,0.461025,0.0,0.283582,0.716418,1,12762.0
12,Hampton In Arden,Hampton-in-Arden,1,2023,0.085374,0.0,0.283582,0.716418,1,2023.0
13,Hatton,Hatton,1,2178,1.559351,0.0,0.283582,0.716418,1,2178.0
15,Kenilworth,Kenilworth,1,2175,0.234055,0.0,0.283582,0.716418,1,2175.0


In [48]:
cov_act_od_dist['act_perc'] = cov_act_od_dist['perc_split']*fma_stations_dist.loc[fma_stations_dist['station']=='Coventry']['act_od_trips_1'].values[0]
cov_od_dist = cov_act_od_dist[['zoneno','act_perc']].copy()
cov_od_dist.loc[len(cov_od_dist.index)] = [fma_stations_dist.loc[fma_stations_dist['station']=='Coventry']['park_ride_stn'].values[0], fma_stations_dist.loc[fma_stations_dist['station']=='Coventry']['pr_trips_2'].values[0]]
cov_od_dist['zoneno'] = cov_od_dist['zoneno'].astype('int64')
cov_od_dist
# cov_od_dist['act_perc'].sum() Check - Should be 1

Unnamed: 0,zoneno,act_perc
0,1303,0.010825
1,1322,0.007217
2,1332,0.003608
3,1338,0.003608
4,1343,0.001804
...,...,...
66,14524,0.007217
67,14542,0.003608
68,14721,0.009021
69,21863,0.003608


In [49]:
int_train_dist = fma_stations_dist[fma_stations_dist['station']!='Coventry'][['station','park_ride_stn','pr_trips_2']].copy()
int_train_dist.rename(columns={'park_ride_stn':'zoneno','pr_trips_2':'act_perc'},inplace=True)
int_train_dist.reset_index(drop=True, inplace=True)
int_train_dist

Unnamed: 0,station,zoneno,act_perc
0,Bedworth,1809,0.283582
1,Berkswell,2050,0.283582
2,Bermuda Park,1782,0.283582
3,Canley,1430,0.283582
4,Coleshill Pkway,1698,0.283582
5,Coventry Arena,12762,0.283582
6,Hampton In Arden,2023,0.283582
7,Hatton,2178,0.283582
8,Kenilworth,2175,0.283582
9,Nuneaton,1760,0.283582


In [50]:
# Load zone centroid shapefile
zone_centroids = gpd.read_file(f'{raw_data}/07 Connectors/v20/zones_geometry_zone.SHP')[['NO','MODEL_AREA','XCOORD','YCOORD','geometry']]
zone_centroids['geometry'] = [Point(x, y) for x, y in zip(zone_centroids['XCOORD'], zone_centroids['YCOORD'])]
crs = zone_centroids.crs
zone_centroids

Unnamed: 0,NO,MODEL_AREA,XCOORD,YCOORD,geometry
0,101,External,483442.3370,358139.5450,POINT (483442.337 358139.545)
1,201,External,571805.2540,263684.3400,POINT (571805.254 263684.340)
2,301,External,548177.9270,143853.3340,POINT (548177.927 143853.334)
3,401,External,434860.3710,494161.1640,POINT (434860.371 494161.164)
4,505,External,352895.0290,461898.3870,POINT (352895.029 461898.387)
...,...,...,...,...,...
841,19015,FMA,439734.0000,283278.0000,POINT (439734.000 283278.000)
842,19016,FMA,438788.0000,284631.0000,POINT (438788.000 284631.000)
843,21861,FMA,429403.0000,275752.0000,POINT (429403.000 275752.000)
844,21862,FMA,434745.0000,275074.0000,POINT (434745.000 275074.000)


In [51]:
# Load node shapefile
fma_stn_list = stn_masterlist[stn_masterlist['station'].isin(fma_stations_dist.loc[fma_stations_dist['station'] != 'Coventry', 'station'])]
fma_stn_list

Unnamed: 0,index,station,internal_s,geometry
2,2,Bedworth,1,POINT (436213.001 287062.000)
3,3,Berkswell,1,POINT (424300.001 277670.000)
4,4,Bermuda Park,1,POINT (435879.014 289403.290)
6,6,Canley,1,POINT (430850.001 278070.000)
8,8,Coleshill Pkway,1,POINT (420000.001 290980.000)
10,10,Coventry Arena,1,POINT (434423.741 283457.480)
12,12,Hampton In Arden,1,POINT (420616.001 281066.000)
13,13,Hatton,1,POINT (422560.001 266320.000)
15,15,Kenilworth,1,POINT (429343.684 271701.955)
22,22,Nuneaton,1,POINT (436361.001 292299.000)


In [52]:
pop_data = pd.read_csv(f'{basepath}/03 Output/04 Matrix Input/08 zone population/zonal_population_AoDM_FMA.csv')
pop_data.rename(columns={'zone':'NO'},inplace=True)
pop_data

Unnamed: 0.1,Unnamed: 0,NO,model_area,pop,overlap_per
0,0,1278,AoDM,2050.0,100.0
1,1,1281,AoDM,1749.0,100.0
2,2,1282,AoDM,1692.0,100.0
3,3,1283,AoDM,2080.0,100.0
4,4,1284,AoDM,1472.0,100.0
...,...,...,...,...,...
470,470,19015,FMA,244.0,14.0
471,471,19016,FMA,281.0,16.0
472,472,21861,AoDM,1188.0,22.0
473,473,21862,FMA,878.0,16.0


In [53]:
zone_df = zone_centroids.merge(pop_data[['NO','pop']],on='NO')
zone_df

Unnamed: 0,NO,MODEL_AREA,XCOORD,YCOORD,geometry,pop
0,1278,AoDM,435567.5780,283621.2880,POINT (435567.578 283621.288),2050.0
1,1281,AoDM,432823.5540,283635.2980,POINT (432823.554 283635.298),1749.0
2,1282,AoDM,433001.6819,283334.6300,POINT (433001.682 283334.630),1692.0
3,1283,AoDM,432666.9550,282852.6410,POINT (432666.955 282852.641),2080.0
4,1284,AoDM,435449.3437,282968.8168,POINT (435449.344 282968.817),1472.0
...,...,...,...,...,...,...
470,19015,FMA,439734.0000,283278.0000,POINT (439734.000 283278.000),244.0
471,19016,FMA,438788.0000,284631.0000,POINT (438788.000 284631.000),281.0
472,21861,FMA,429403.0000,275752.0000,POINT (429403.000 275752.000),1188.0
473,21862,FMA,434745.0000,275074.0000,POINT (434745.000 275074.000),878.0


In [54]:
walkable_zones = pd.DataFrame(columns=['station','zoneno','pop', 'distance','dist_perc'])


# Iterate through each node
for _, row in fma_stn_list.iterrows():
    
    # Iterate through all zones and add connectors to connect to all Zones within 400m
    for index, zone in zone_df.iterrows():
        distance = row['geometry'].distance(zone['geometry'])

        if distance <=1200:
            walkable_zones.loc[len(walkable_zones.index)] = [row['station'], zone['NO'], zone['pop'],distance,round(float(zone['pop']/(distance ** 2)),3)] 

walkable_zones_grouped = walkable_zones.groupby(['station']).agg(station_sum = ('dist_perc','sum')).reset_index()
walkable_zones_grouped
walkable_zones = walkable_zones.merge(walkable_zones_grouped, on='station')
walkable_zones['gravity_split'] = round(walkable_zones['dist_perc']/walkable_zones['station_sum'],6)
walkable_zones = walkable_zones.merge(fma_stations_dist[['station','gravity_dist_3']],on='station')
walkable_zones['act_grav_split'] = round(walkable_zones['gravity_split']*walkable_zones['gravity_dist_3'],6)
walkable_zones.rename(columns={'act_grav_split':'act_perc'},inplace=True)

In [55]:
walkable_zones

Unnamed: 0,station,zoneno,pop,distance,dist_perc,station_sum,gravity_split,gravity_dist_3,act_perc
0,Bedworth,1794,1481.0,820.121877,0.002,0.089,0.022472,0.716418,0.016099
1,Bedworth,1796,1073.0,1144.237610,0.001,0.089,0.011236,0.716418,0.008050
2,Bedworth,1798,1563.0,884.822536,0.002,0.089,0.022472,0.716418,0.016099
3,Bedworth,1799,1160.0,1164.924399,0.001,0.089,0.011236,0.716418,0.008050
4,Bedworth,1800,1323.0,841.789866,0.002,0.089,0.022472,0.716418,0.016099
...,...,...,...,...,...,...,...,...,...
75,Rugby,1859,1979.0,814.450111,0.003,0.026,0.115385,0.716418,0.082664
76,Rugby,1860,1515.0,889.353470,0.002,0.026,0.076923,0.716418,0.055109
77,Tile Hill,1388,0.0,920.700873,0.000,0.010,0.000000,0.198347,0.000000
78,Tile Hill,1443,1594.0,447.201907,0.008,0.010,0.800000,0.198347,0.158678


In [123]:
###Check
walkable_zones[walkable_zones['station']=='Hatton']

Unnamed: 0,station,zoneno,pop,distance,dist_perc,station_sum,gravity_split,gravity_dist_3,act_perc


In [44]:
int_train_dist

Unnamed: 0,station,zoneno,act_perc
0,Bedworth,1809,0.283582
1,Berkswell,2050,0.283582
2,Bermuda Park,1782,0.283582
3,Canley,1430,0.283582
4,Coleshill Pkway,1698,0.283582
5,Coventry Arena,12762,0.283582
6,Hampton In Arden,2023,0.283582
7,Hatton,2178,0.283582
8,Kenilworth,2175,0.283582
9,Nuneaton,1760,0.283582


In [56]:
int_zone_split = pd.concat([int_train_dist,walkable_zones[['station','zoneno','act_perc']]])
int_zone_split_final = int_zone_split.groupby(['station','zoneno']).agg(zonal_split = ('act_perc','sum')).reset_index()
int_zone_split_final
check_splits = int_zone_split_final.groupby(['station']).agg(total_split = ('zonal_split','sum')).reset_index()
check_splits
int_zone_split_adj = int_zone_split_final.merge(check_splits, on='station')
int_zone_split_adj['adj_split'] = round(int_zone_split_adj['zonal_split']/int_zone_split_adj['total_split'],6)
int_zone_split_adj

Unnamed: 0,station,zoneno,zonal_split,total_split,adj_split
0,Bedworth,1794,0.016099,1.000000,0.016099
1,Bedworth,1796,0.008050,1.000000,0.008050
2,Bedworth,1798,0.016099,1.000000,0.016099
3,Bedworth,1799,0.008050,1.000000,0.008050
4,Bedworth,1800,0.016099,1.000000,0.016099
...,...,...,...,...,...
77,Rugby,1859,0.082664,1.000001,0.082664
78,Rugby,1860,0.055109,1.000001,0.055109
79,Tile Hill,1388,0.000000,1.000000,0.000000
80,Tile Hill,1443,0.960331,1.000000,0.960331


### Final Matrix Development apter applying zone-zone factors

In [158]:
peaks = ['day','am','ip','pm']
mtx = []
for index, row in wk_f_trips.iterrows():

    ### Case 1 - Both are external zones
    if (row['station1'] not in fma_stations_dist['station'].values) and (row['station2'] not in fma_stations_dist['station'].values):
        zone_od = {}
        zone_od['org_zone'] = stn_zones.loc[stn_zones['station']==row['station1']]['zoneno'].values[0]
        zone_od['dest_zone'] = stn_zones.loc[stn_zones['station']==row['station2']]['zoneno'].values[0]
        for peak in peaks:
            for o_d in ['org','dest']:
                zone_od[f'{peak}_{o_d}'] = row[f'{peak}_{o_d}']
        mtx.append(zone_od)

    ## Case 2 - Org is internal zone and Dest is external zone
    elif (row['station1'] in fma_stations_dist['station'].values) and (row['station2'] not in fma_stations_dist['station'].values):

        ## Subcase 2.1 - Org is Coventry
        if row['station1'] == 'Coventry':
            for _, rowx in cov_od_dist.iterrows():
                zone_od = {}
                zone_od['org_zone'] = rowx['zoneno']
                zone_od['dest_zone'] = stn_zones.loc[stn_zones['station']==row['station2']]['zoneno'].values[0]
                for peak in peaks:
                    zone_od[f'{peak}_org'] = row[f'{peak}_org']*rowx['act_perc']
                    zone_od[f'{peak}_dest'] = row[f'{peak}_dest']*rowx['act_perc']
                mtx.append(zone_od)
        
        ## Subcase 2.2 - Org is other FMA zone except Coventry
        elif row['station1'] != 'Coventry':
            interim_2 = int_zone_split_adj[int_zone_split_adj['station']==row['station1']].copy()
            for _, rowx in interim_2.iterrows():
                zone_od = {}
                zone_od['org_zone'] = rowx['zoneno']
                zone_od['dest_zone'] = stn_zones.loc[stn_zones['station']==row['station2']]['zoneno'].values[0]
                for peak in peaks:
                    zone_od[f'{peak}_org'] = row[f'{peak}_org']*rowx['adj_split']
                    zone_od[f'{peak}_dest'] = row[f'{peak}_dest']*rowx['adj_split']
                mtx.append(zone_od)


    ###  Case 3 - Org is external zone and Dest is internal zone
    elif (row['station1'] not in fma_stations_dist['station'].values) and (row['station2'] in fma_stations_dist['station'].values):

        ## Subcase 3.1 - Dest is Coventry
        if row['station2'] == 'Coventry':
            for _, rowx in cov_od_dist.iterrows():
                zone_od = {}
                zone_od['org_zone'] = stn_zones.loc[stn_zones['station']==row['station2']]['zoneno'].values[0]
                zone_od['dest_zone'] = rowx['zoneno']
                for peak in peaks:
                    zone_od[f'{peak}_org'] = row[f'{peak}_org']*rowx['act_perc']
                    zone_od[f'{peak}_dest'] = row[f'{peak}_dest']*rowx['act_perc']
                mtx.append(zone_od)

        ## Subcase 2.2 - Dest is other FMA zone except Coventry
        elif row['station2'] != 'Coventry':
            interim_2 = int_zone_split_adj[int_zone_split_adj['station']==row['station2']].copy()
            for _, rowx in interim_2.iterrows():
                zone_od = {}
                zone_od['org_zone'] = stn_zones.loc[stn_zones['station']==row['station2']]['zoneno'].values[0]
                zone_od['dest_zone'] = rowx['zoneno']
                for peak in peaks:
                    zone_od[f'{peak}_org'] = row[f'{peak}_org']*rowx['adj_split']
                    zone_od[f'{peak}_dest'] = row[f'{peak}_dest']*rowx['adj_split']
                mtx.append(zone_od)


    ### Case 4 - Org is internal zone and Dest is internal zone
    elif (row['station1'] in fma_stations_dist['station'].values) and (row['station2'] in fma_stations_dist['station'].values):

        ### Subcase 4.1 - Org is coventry and Dest is other FMA zone except Coventry
        if (row['station1'] == 'Coventry') and (row['station2'] != 'Coventry'):
            for _, rowx in cov_od_dist.iterrows():
                interim_2 = int_zone_split_adj[int_zone_split_adj['station']==row['station2']].copy()
                for _, rowy in interim_2.iterrows():
                    zone_od = {}
                    zone_od['org_zone'] = rowx['zoneno']
                    zone_od['dest_zone'] = rowy['zoneno']
                    for peak in peaks:
                        zone_od[f'{peak}_org'] = row[f'{peak}_org']*rowx['act_perc']*rowy['adj_split']
                        zone_od[f'{peak}_dest'] = row[f'{peak}_dest']*rowx['act_perc']*rowy['adj_split']
                    mtx.append(zone_od)

        ### Subcase 4.2 - Org is other FMA zone except Coventry and Dest is Coventry
        elif (row['station1'] != 'Coventry') and (row['station2'] == 'Coventry'):
            interim_2 = int_zone_split_adj[int_zone_split_adj['station']==row['station1']].copy()
            for _, rowx in interim_2.iterrows():
                for _, rowy in cov_od_dist.iterrows():
                    zone_od = {}
                    zone_od['org_zone'] = rowx['zoneno']
                    zone_od['dest_zone'] = rowy['zoneno']
                    for peak in peaks:
                        zone_od[f'{peak}_org'] = row[f'{peak}_org']*rowx['adj_split']*rowy['act_perc']
                        zone_od[f'{peak}_dest'] = row[f'{peak}_dest']*rowx['adj_split']*rowy['act_perc']
                    mtx.append(zone_od)        

        ### Subcase 4.3 - Org is other FMA zone except Coventry and Dest is is other FMA zone except Coventry
        elif (row['station1'] != 'Coventry') and (row['station2'] != 'Coventry'):
            interim_2 = int_zone_split_adj[int_zone_split_adj['station']==row['station1']].copy()
            for _, rowx in interim_2.iterrows():
                interim_3 = int_zone_split_adj[int_zone_split_adj['station']==row['station2']].copy()
                for _, rowy in interim_3.iterrows():
                    zone_od = {}
                    zone_od['org_zone'] = rowx['zoneno']
                    zone_od['dest_zone'] = rowy['zoneno']
                    for peak in peaks:
                        zone_od[f'{peak}_org'] = row[f'{peak}_org']*rowx['adj_split']*rowy['zonal_split']
                        zone_od[f'{peak}_dest'] = row[f'{peak}_dest']*rowx['adj_split']*rowy['zonal_split']
                    mtx.append(zone_od)     

mtx_rail = pd.DataFrame(mtx)
mtx_rail



    

Unnamed: 0,org_zone,dest_zone,day_org,day_dest,am_org,am_dest,ip_org,ip_dest,pm_org,pm_dest
0,301.0,1681.0,0.014000,0.014000,0.000478,0.000969,0.000870,0.001145,0.001130,0.001101
1,2050.0,2049.0,0.001563,0.001563,0.000132,0.000297,0.000064,0.000084,0.000233,0.000109
2,2050.0,2050.0,0.002176,0.002176,0.000184,0.000414,0.000089,0.000117,0.000325,0.000152
3,2050.0,2051.0,0.000261,0.000261,0.000022,0.000049,0.000011,0.000014,0.000039,0.000018
4,1782.0,1780.0,0.000716,0.000716,0.000058,0.000119,0.000023,0.000024,0.000092,0.000072
...,...,...,...,...,...,...,...,...,...,...
67687,2085.0,1701.0,0.004000,0.004000,0.000110,0.001027,0.000110,0.000162,0.001014,0.000270
67688,1010.0,2220.0,0.046000,0.046000,0.003476,0.006230,0.002297,0.002790,0.005152,0.003751
67689,1010.0,2224.0,0.014000,0.014000,0.000419,0.001556,0.000790,0.001232,0.001149,0.000897
67690,1010.0,1701.0,0.012000,0.012000,0.000329,0.003081,0.000329,0.000486,0.003041,0.000811


In [159]:
mtx_rail_reduced = mtx_rail.groupby(['org_zone','dest_zone']).sum().reset_index()
mtx_rail_reduced

Unnamed: 0,org_zone,dest_zone,day_org,day_dest,am_org,am_dest,ip_org,ip_dest,pm_org,pm_dest
0,101.0,1021.0,0.792000,0.792000,0.059196,0.144928,0.034701,0.041505,0.110907,0.056474
1,101.0,1029.0,0.859000,0.859000,0.073015,0.161616,0.036507,0.042064,0.133145,0.066418
2,101.0,1030.0,1.297000,1.297000,0.112722,0.239799,0.054969,0.069507,0.176737,0.091054
3,101.0,1205.0,6.713000,6.713000,0.569231,1.348506,0.274801,0.314979,0.706632,0.423254
4,101.0,1483.0,0.290000,0.290000,0.016893,0.042085,0.015133,0.020866,0.031323,0.020512
...,...,...,...,...,...,...,...,...,...,...
15352,21863.0,8201.0,0.087132,0.087132,0.007386,0.009874,0.005228,0.006141,0.008111,0.008368
15353,21863.0,8301.0,0.011293,0.011293,0.000957,0.001280,0.000678,0.000796,0.001051,0.001085
15354,21863.0,8350.0,0.396090,0.302810,0.033577,0.034314,0.023766,0.021343,0.036871,0.029080
15355,21863.0,9102.0,0.013307,0.013307,0.001128,0.001508,0.000798,0.000938,0.001239,0.001278


In [160]:
print(mtx_rail_reduced['day_org'].sum())
print(mtx_rail_reduced['day_dest'].sum())

39709.95816364634
29255.207364158363


### Melting the matrix to vertical format

In [162]:
org_mtx = mtx_rail_reduced[['org_zone','dest_zone','day_org','am_org','ip_org','pm_org']].copy()
org_mtx.rename(columns={'day_org':'day_trips','am_org':'am_trips','ip_org':'ip_trips','pm_org':'pm_trips'}, inplace=True)
dest_matrix = mtx_rail_reduced[['org_zone','dest_zone','day_dest','am_dest','ip_dest','pm_dest']].copy()
dest_matrix.rename(columns={'org_zone':'int_o','dest_zone':'int_d','day_dest':'day_trips','am_dest':'am_trips','ip_dest':'ip_trips','pm_dest':'pm_trips'}, inplace=True)
dest_matrix.rename(columns={'int_o':'dest_zone','int_d':'org_zone'}, inplace=True)
mtx_rail_linear = pd.concat([org_mtx,dest_matrix]).reset_index(drop=True)
mtx_rail_linear

Unnamed: 0,org_zone,dest_zone,day_trips,am_trips,ip_trips,pm_trips
0,101.0,1021.0,0.792000,0.059196,0.034701,0.110907
1,101.0,1029.0,0.859000,0.073015,0.036507,0.133145
2,101.0,1030.0,1.297000,0.112722,0.054969,0.176737
3,101.0,1205.0,6.713000,0.569231,0.274801,0.706632
4,101.0,1483.0,0.290000,0.016893,0.015133,0.031323
...,...,...,...,...,...,...
30709,8201.0,21863.0,0.087132,0.009874,0.006141,0.008368
30710,8301.0,21863.0,0.011293,0.001280,0.000796,0.001085
30711,8350.0,21863.0,0.302810,0.034314,0.021343,0.029080
30712,9102.0,21863.0,0.013307,0.001508,0.000938,0.001278


In [165]:
mtx_rail_melted = pd.melt(mtx_rail_linear,  id_vars=['org_zone', 'dest_zone'], var_name='time_period', value_name='trips')
mtx_rail_melted['org_zone'] = mtx_rail_melted['org_zone'].astype('int64')
mtx_rail_melted['dest_zone'] = mtx_rail_melted['dest_zone'].astype('int64')
mtx_rail_melted['time_period'] = mtx_rail_melted['time_period'].str.replace('_trips', '')
mtx_rail_melted

Unnamed: 0,org_zone,dest_zone,time_period,trips
0,101,1021,day,0.792000
1,101,1029,day,0.859000
2,101,1030,day,1.297000
3,101,1205,day,6.713000
4,101,1483,day,0.290000
...,...,...,...,...
122851,8201,21863,pm,0.008368
122852,8301,21863,pm,0.001085
122853,8350,21863,pm,0.029080
122854,9102,21863,pm,0.001278


In [166]:
mtx_rail_melted.to_csv('weekday_prior_rail_matrices.csv')

## Validation checks against normall mapped matrix

In [81]:
# Merge DataFrames based on 'station1' and 'station2'
zone_f_trips = pd.merge(wk_f_trips, stn_zones, left_on='station1', right_on='station', how='left')
zone_f_trips = pd.merge(zone_f_trips, stn_zones, left_on='station2', right_on='station', how='left', suffixes=('_station1', '_station2'))

# Drop redundant 'station' columns
zone_f_trips = zone_f_trips.drop(['station_station1', 'station_station2'], axis=1)
zone_f = zone_f_trips[['station1','station2','day_org','day_dest','am_org','am_dest','ip_org','ip_dest','pm_org','pm_dest','zoneno_station1','zoneno_station2']].copy()
zone_f.rename(columns = {'zoneno_station1':'org_zone','zoneno_station2':'dest_zone'},inplace=True)

zone_f

Unnamed: 0,station1,station2,day_org,day_dest,am_org,am_dest,ip_org,ip_dest,pm_org,pm_dest,org_zone,dest_zone
0,Abbey Wood,Atherstone Warks,0.014,0.014,0.000478,0.000969,0.000870,0.001145,0.001130,0.001101,301,1681
1,Abbey Wood,Berkswell,0.004,0.004,0.000338,0.000760,0.000164,0.000215,0.000597,0.000280,301,2050
2,Abbey Wood,Bermuda Park,0.002,0.002,0.000161,0.000333,0.000064,0.000067,0.000258,0.000200,301,1782
3,Abbey Wood,Birmingham Intl,0.380,0.380,0.024836,0.052254,0.021213,0.026052,0.044140,0.029346,301,1985
4,Abbey Wood,Canley,0.012,0.012,0.000813,0.001897,0.000575,0.000781,0.001470,0.000835,301,13922
...,...,...,...,...,...,...,...,...,...,...,...,...
10230,Wood End,Water Orton,0.004,0.004,0.000110,0.001027,0.000110,0.000162,0.001014,0.000270,2085,1701
10231,Worcs Parkway,Warwick,0.046,0.046,0.003476,0.006230,0.002297,0.002790,0.005152,0.003751,1010,2220
10232,Worcs Parkway,Warwick Parkway,0.014,0.014,0.000419,0.001556,0.000790,0.001232,0.001149,0.000897,1010,2224
10233,Worcs Parkway,Water Orton,0.012,0.012,0.000329,0.003081,0.000329,0.000486,0.003041,0.000811,1010,1701


In [141]:
ext_int_mtx = zone_f[(~zone_f['station1'].isin(fma_stations_dist['station'].values)) & (zone_f['station2'].isin(fma_stations_dist['station'].values))].copy()

In [153]:
cov_othr_mtx_3 = zone_f[(zone_f['station1'].isin(fma_stations_dist['station'].values)) & (zone_f['station1']!='Coventry') & (zone_f['station2'].isin(fma_stations_dist['station'].values)) & (zone_f['station2']!='Coventry') ].copy()

In [161]:
print(zone_f['day_org'].sum())
print(zone_f['day_dest'].sum())

39709.969
29255.219
