In [1]:
"""
This script builds on P:\GBCBA\HandT\CQ\Projects\5227104-NorMITs Demand 2024-ADDY4067\40 Technical\02 TourModel\Develop Tour Model\Rail Coverage Analysis\01_processing\ProcessMatrix\JoinMatrixToGeography_v0.2.ipynb
It is designed to ultimately obtain a furnessed version of the rail output matrix based on the rail ticketing data proportions
Versioning to be handled by Git
"""

  """


'\nThis script builds on P:\\GBCBA\\HandT\\CQ\\ProjectsŒ7104-NorMITs Demand 2024-ADDY4067  Technical\x02 TourModel\\Develop Tour Model\\Rail Coverage Analysis\x01_processing\\ProcessMatrix\\JoinMatrixToGeography_v0.2.ipynb\nIt is designed to ultimately obtain a furnessed version of the rail output matrix based on the rail ticketing data proportions\nVersioning to be handled by Git\n'

## Imports

In [2]:
# Existing packages
import pandas as pd
import os
import numpy as np
from datetime import datetime

# TfN packages
from caf.distribute import furness

## Import data in files

In [3]:
# Set model version
model_ver = 'v3'

# Set directories
inputs_dir = r'I:\NTS\imports\tour_adjust_imports'
msoa_dir = r'I:\NTS\imports'
tour_model_dir = r'I:\NTS\outputs\tour\reports'

# Set file names
odm_file = 'ODM_for_rdm_2022-23.csv'
msoa_county_file = 'msoa11cd_correspondence.csv'
stn_geo_file = 'station_attributes_on_TfN_geography.csv'
sector_file = 'bespoke_sectors_v1.1.csv'
lrtu_file = 'lrt0101.csv'
model_file = 'matrix_county_output.csv'

# Import data
odm_in_df = pd.read_csv(os.path.join(inputs_dir, odm_file))
msoa_county_in_df = pd.read_csv(os.path.join(msoa_dir, msoa_county_file))
stn_geo_in_df = pd.read_csv(os.path.join(inputs_dir, stn_geo_file))
sector_in_df = pd.read_csv(os.path.join(inputs_dir, sector_file))
lrtu_in_df = pd.read_csv(os.path.join(inputs_dir, lrtu_file), skiprows=7)
lrtu_in_df.columns = lrtu_in_df.columns.str.split('[').str[0].str.strip() # Some processing required here to make column names tidier
model_in_df = pd.read_csv(os.path.join(tour_model_dir, model_ver, model_file))

  odm_in_df = pd.read_csv(os.path.join(inputs_dir, odm_file))


## Other inputs
Some manual inputs that set values later in the process

In [4]:
# Set light rail inputs
# Year for which to extract the Light Rail, Tramway and Underground data
lrtu_year_in = 2023
# Proportion of trips on the London Underground, London Trams and Docklands
# Light Railway that are considered to be "unique" (i.e. not double counted
# with another rail mode)
lrtu_london_scale_in = 0.25
# Proportion of trips on Light Rail, Tramway and Underground systems outside of
# London that are considered to be "unique" (i.e. not double counted with
# another rail mode)
lrtu_nonlondon_scale_in = 0.5

# For each Light Rail, Tramway or Underground system in GB,
# set the sector in which it is located.
# Done at sector level as some of these systems cross county borders
lrtu_systems_in = {
    'Docklands Light Railway': 'London',
    'London Trams': 'London',
    'Nottingham Express Transit': 'East Midlands North',
    'West Midlands Metro': 'West Midlands South',
    'Sheffield Supertram': 'South Yorkshire',
    'Tyne and Wear Metro': 'Tyne and Wear',
    'Manchester Metrolink': 'Greater Manchester',
    'Blackpool Tramway': 'Lancashire',
    'Edinburgh Trams': 'Scotland',
    'London Underground': 'London',
    'Glasgow Subway': 'Scotland'
}

# Set counties for stations that are located outside of the MSOA shapefile,
# so get missed off the correspondence. This is a table here in case the station
# shapefile is updated to add new stations

# Need to account for:
#  - Blackfriars (5112) - Bad join in the GIS as it's in the middle of the Thames
#  - Portsmouth Harbour (5540) - Bad join in the GIS as it's in the harbour
#  - Ryde Pier Head (5541) - Bad join as in the GIS as it's in the sea

# Counties to allocate stations to:
#  - Blackfriars -> Inner London (County 17)
#  - Portsmouth Harbour -> Hampshire (County 35)
#  - Ryde Pier Head -> Hampshire (County 35)

stn_county_infill_df = pd.DataFrame(
    columns=['National Location Code', 'county', 'county_nm'],
    data=[[5112, 17, 'Inner London'],
          [5540, 35, 'Hampshire'],
          [5541, 35, 'Hampshire']]
    )

## Functions to process rail ticketing/journey data

In [6]:
def process_lrtu_data(
    lrtu_df, lrtu_year, lrtu_systems, lrtu_london_scale, lrtu_nonlondon_scale):
    """
    Process light rail, tramway and underground data to get an annual journey
    count (for "unique", i.e. not double counted with another rail mode) by
    sector
    
    Parameters
    ----------
    lrtu_df: pandas df
        Light Rail, Tramway and Underground annual journey data by system as
        read in by this script
    lrtu_year: int
        Year for which to extract the Light Rail, Tramway and Underground data
        It is the year in which the finacial year ends
        It should match the year for which the national rail odm is downloaded
    lrtu_systems: dict
        Dictionary relating each Light Rail, Tramway or Underground system in
        GB to the sector in which it is located
    lrtu_london_scale: float
        Expected range 0.0 to 1.0
        Proportion of trips on the London Underground, London Trams and
        Docklands Light Railway that are considered to be "unique" (i.e. not
        double counted with another rail mode)
    lrtu_nonlondon_scale: float
        Expected range 0.0 to 1.0
        Proportion of trips on Light Rail, Tramway and Underground systems
        outside of London that are considered to be "unique" (i.e. not double
        counted with another rail mode)
    
    Returns
    ----------
    lrtu_df: pandas df
        For the selected year, the estimate of the number of "unique" (i.e. not
        double counted with another rail mode) journeys by Light Rail, Tramway
        and Underground for the sectors in which such systems are located.
        This is an annual total
    """
    
    # Basic logic checks on inputs
    yearnow = datetime.now().year
    if (not 2013 < lrtu_year <= yearnow) or (type(lrtu_year) is not int):
        print('WARNING: Unexpected input year for Light Rail, Tramway and Underground data')
        print(f'Expected an interger year between 2014 and {yearnow}')
        print(f'Instead, got {lrtu_year}')
    if not 0 < lrtu_london_scale <= 1:
        print('WARNING: London scaling factor expected to be greater than 0, less the or equal to 1')
        print(f'Instead got London scaling factor of {lrtu_london_scale}')
    if not 0 < lrtu_nonlondon_scale <= 1:
        print('WARNING: Outside London scaling factor expected to be greater than 0, less the or equal to 1')
        print(f'Instead got outside London scaling factor of {lrtu_nonlondon_scale}')
    
    # Process to account for odd formatting of source
    lrtu_df = lrtu_df.dropna(axis=1, how='all')
    lrtu_df = lrtu_df.dropna(axis=0, how='all')
    lrtu_df = lrtu_df.rename(columns={'Financial year ending March': 'Year'})
    lrtu_df['Year'] = lrtu_df['Year'].astype(int)

    # Select data we are interested in and reformat to a system-based index
    lrtu_df = lrtu_df.loc[lrtu_df['Year'] == lrtu_year]
    lrtu_df = lrtu_df.set_index(['Year'])
    lrtu_df = lrtu_df.transpose().reset_index()
    lrtu_df = lrtu_df.rename_axis(None, axis=1)
    lrtu_df = lrtu_df.rename(
        columns={'index': 'System', lrtu_year: 'Yearly Journeys'})

    # Convert yearly journeys to absolutes (and make sure they are numeric!)
    # Note this bit will fall over if you pick a year before all systems
    #   were returning data (i.e. some cells are '[w]')
    lrtu_df['Yearly Journeys'] = lrtu_df['Yearly Journeys'].astype(str)
    lrtu_df['Yearly Journeys'] = lrtu_df['Yearly Journeys'].str.replace(
        ',', '')
    lrtu_df['Yearly Journeys'] = lrtu_df['Yearly Journeys'].astype(float) * 10 # Just clear float to minimise rounding error risk
    lrtu_df['Yearly Journeys'] = lrtu_df['Yearly Journeys'].astype(int)
    lrtu_df['Yearly Journeys'] = lrtu_df['Yearly Journeys'] * 100000 # Not 1 million as we've times by 10 about to get out of float

    # Apply sectors to data
    lrtu_df['Sector'] = lrtu_df['System'].map(lrtu_systems)
    lrtu_df = lrtu_df.dropna(axis=0) # Drop rows where system name is not found (expected to be some total rows like all of GB)
    if lrtu_df.shape[0] != len(lrtu_systems):
        print('WARNING: The systems you have specified sectors for and the systems in the input file do not match!')
    lrtu_df = lrtu_df.groupby(
        ['Sector'])['Yearly Journeys'].sum().reset_index()

    # Apply scaling factors to account for overlap with other rail modes
    # (e.g. national rail, other light rail systems)
    lrtu_df['Yearly Journeys'] = lrtu_df['Yearly Journeys'] * np.where(
        lrtu_df['Sector'] == 'London', lrtu_london_scale, lrtu_nonlondon_scale)
    lrtu_df['Yearly Journeys'] = lrtu_df['Yearly Journeys'].astype(int)
    
    return lrtu_df

In [7]:
def rationalise_inputs(odm_df, msoa_county_df, stn_geo_df):
    """
    Cut input dfs down to just the columns of interest
    
    Parameters
    ----------
    odm_df: pandas df
        Origin-destination matrix for journeys on the national rail network
        between station pairs. Annual data for 1 year
    msoa_county_df: pandas df
        Lookup table to get from MSOA to County
    stn_geo_df: pandas df
        National rail stations with MSOA attached
    
    Returns
    ---------- 
    odm_df: pandas df
        Origin-destination matrix for journeys on the national rail network
        between station pairs. Annual data for 1 year. Columns cut down to just
        those required by other functions
    msoa_county_df: pandas df
        Lookup table to get from MSOA to County. Columns cut down to just those
        required by other functions
    stn_geo_df: pandas df
        National rail stations with MSOA attached. Columns cut down to just
        those required by other functions
    """
    
    # nlc (National Location Code) is a unique numerical code for each station
    odm_df = odm_df[['origin_nlc',
                     'origin_station_name',
                     'destination_nlc',
                     'destination_station_name',
                     'journeys']]
    msoa_county_df = msoa_county_df[['msoa11cd',
                                     'county',
                                     'county_nm']]
    stn_geo_df = stn_geo_df[['National Location Code', 'msoa11cd']]
    
    return odm_df, msoa_county_df, stn_geo_df

In [8]:
def process_station_geography(msoa_county_df, stn_geo_df, stn_infill_df):
    """
    Join each national rail station to the county in which they lie
    
    Parameters
    ----------
    msoa_county_df: pandas df
        Lookup table to get from MSOA to County. Columns cut down to just those
        required by this function
    stn_geo_df: pandas df
        National rail stations with MSOA attached. Columns cut down to just
        those required by this function
    stn_infill_df: pandas df
        Table assigning stations outside of MSOAs to their counties
    
    Returns
    ----------
    stn_geo_df: pandas df
        Table relating all active national rail stations to their county
    """
    
    # Assign counties to stations that are allocated MSOAs by the geospatial
    # processing
    stn_geo_df = stn_geo_df.merge(msoa_county_df, how='left', on='msoa11cd')
    stn_geo_df = stn_geo_df.drop(columns=['msoa11cd'], axis=1)
    
    # Add on the stations that exist outside of the MSOA shapefile
    # Drop rows containing nulls
    if stn_geo_df[stn_geo_df.isnull().any(axis=1)].shape == stn_county_infill_df.shape:
        # We are infilling something the same size as the NULL rows,
        # which we want to do
        # Drop the NULL rows, then append the replacements
        stn_geo_df = stn_geo_df.dropna(how='any', axis=0)
        stn_geo_df = pd.concat([stn_geo_df, stn_county_infill_df])
        stn_geo_df.reset_index(inplace=True, drop=True)
    else:
        print('WARNING: The NULL infilling table you are trying to append is not the same dimensions as the NULL rows in the table')
        print('Operation therefore not attempted and NULL rows are still in place')
    
    # Rename the National Location Code to make it a bit less unweildly
    stn_geo_df = stn_geo_df.rename(columns={'National Location Code': 'nlc'})
    
    return stn_geo_df

In [9]:
def make_sector_rail_odm(process_odm_df, stn_geo_df, sector_df):
    """
    Make the sectorised national rail odm
    
    Parameters
    ----------
    process_odm_df: pandas df
        Origin-destination matrix for journeys on the national rail network
        between station pairs. Annual data for 1 year. Columns cut down to just
        those required by this function
    stn_geo_df: pandas df
        Table relating all active national rail stations to their county
    sector_df: pandas df
        County to sector correspondence
    
    Returns
    ----------
    sector_odm_df: pandas df
        Column matrix of all sector origin-destination movemnets from the
        national rail ticketing data
    county_rows: int
        Row count of county level matrix
    sector_rows: int
        Row count of sector level matrix
    """
    
    # Join geography to ODM
    process_odm_df = process_odm_df.merge(
        stn_geo_df, how='left', left_on='origin_nlc', right_on='nlc')
    process_odm_df = process_odm_df.drop(columns=['nlc'], axis=1)
    process_odm_df = process_odm_df.rename(
        columns={'county': 'origin_county_code',
                 'county_nm': 'origin_county_name'})

    process_odm_df = process_odm_df.merge(
        stn_geo_df, how='left', left_on='destination_nlc', right_on='nlc')
    process_odm_df = process_odm_df.drop(columns=['nlc'], axis=1)
    process_odm_df = process_odm_df.rename(
        columns={'county': 'destination_county_code',
                 'county_nm': 'destination_county_name'})

    # Groupby on county level geographies, summing journeys and dropping the
    # station details
    process_odm_df = process_odm_df.groupby(
        ['origin_county_code',
         'origin_county_name',
         'destination_county_code',
         'destination_county_name']
    )['journeys'].sum().reset_index()

    # Now aggregate to Sector level using the Tour Model Output County to
    # sector correspondence
    sector_df = sector_df[['county', 'Sector_ID', 'Sector']]
    sector_odm_df = process_odm_df.merge(
        sector_df, how='left', left_on='origin_county_name', right_on='county')
    sector_odm_df = sector_odm_df.drop(columns=['county'], axis=1)
    sector_odm_df = sector_odm_df.rename(
        columns={'Sector_ID': 'origin_sector_id',
                 'Sector': 'origin_sector_name'})

    sector_odm_df = sector_odm_df.merge(
        sector_df,
        how='left',
        left_on='destination_county_name',
        right_on='county'
    )
    sector_odm_df = sector_odm_df.drop(columns=['county'], axis=1)
    sector_odm_df = sector_odm_df.rename(
        columns={'Sector_ID': 'destination_sector_id',
                 'Sector': 'destination_sector_name'})

    # Groupby on sector level geographies, summing journeys and dropping the
    # county details
    sector_odm_df = sector_odm_df.groupby(
        ['origin_sector_id',
         'origin_sector_name',
         'destination_sector_id',
         'destination_sector_name']
    )['journeys'].sum().reset_index()
    
    # Calculate row counts for checking
    county_rows = process_odm_df.shape[0]
    sector_rows = sector_odm_df.shape[0]
    
    return sector_odm_df, county_rows, sector_rows

In [10]:
def check_odm_processing(
    county_rows, sector_rows, msoa_county_df, sector_df, sector_odm_df, odm_df):
    """
    Check the odm processing worked correctly
    
    Parameters
    ----------
    county_rows: int
        Row count of county level matrix
    sector_rows: int
        Row count of sector level matrix
    msoa_county_df: pandas df
        Lookup table to get from MSOA to County
    sector_odm_df: pandas df
        Column matrix of all sector origin-destination movemnets from the
        national rail ticketing data
    odm_df: pandas df
        Origin-destination matrix for journeys on the national rail network
        between station pairs. Annual data for 1 year
    
    Returns
    ----------
    None
    """
    
    # Check if we've got all o/d movements at the county level
    # df should have number of counties x number of counties as the row count
    expected_rows = msoa_county_df['county'].nunique() ** 2
    if expected_rows == county_rows:
        print('County table dimensions are as expected')
    else:
        print(f'WARNING: Expected {str(expected_rows)} rows in the county table, got {str(county_rows)} rows')

    # Check if we've got all o/d movements at the sector level
    # df should have number of sectors x number of sectors as the row count
    expected_rows = sector_df['Sector_ID'].nunique() ** 2
    if expected_rows == sector_rows:
        print('Sector table dimensions are as expected')
    else:
        print(f'WARNING: Expected {str(expected_rows)} rows in the sector table, got {str(sector_rows)} rows')

    # Check we've not dropped any journeys
    input_journeys = odm_df['journeys'].sum()
    output_journeys = sector_odm_df['journeys'].sum()
    if input_journeys == output_journeys:
        print(f'The {str(input_journeys)} National Rail journeys input are all accounted for')
    else:
        print(f'WARNING: {str(input_journeys)} were input, but {str(output_journeys)} were output in the sector table!')

In [12]:
def add_lrtu_to_national_rail(
    sector_odm_hlr_df, lrtu_df):
    """
    Add Light Rail, Tramway and Underground trips to intrasector cells of
    the main o/d sector matrix
    
    Parameters
    ----------
    sector_odm_hlr_df: pandas df
        Column matrix of all sector origin-destination movemnets from the
        national rail ticketing data
    lrtu_df: pandas df
        For the selected year, the estimate of the number of "unique" (i.e. not
        double counted with another rail mode) journeys by Light Rail, Tramway
        and Underground for the sectors in which such systems are located.
        This is an annual total
    
    Returns
    ----------
    sector_odm_hlr_df: pandas df
        Column matrix of all sector origin-destination movemnets from the
        national rail ticketing data, with the light rail, tramway and
        underground data joined on the related intrasector movements
    square_s_odm_df: pandas df
        Square matrix of all sector origin-destination movemnets from the
        national rail ticketing data, with the light rail, tramway and
        underground data joined on the related intrasector movements
    square_s_weekly_odm_df: pandas df
        Square matrix of all sector origin-destination movemnets from the
        national rail ticketing data, with the light rail, tramway and
        underground data joined on the related intrasector movements, scaled to
        be weekly to match the Tour Model outputs
    """
    hr_total = sector_odm_hlr_df['journeys'].sum()
    lrtu_total = lrtu_df['Yearly Journeys'].sum()
    
    sector_odm_hlr_df['Sector'] = np.where(
        sector_odm_hlr_df['origin_sector_id'] == sector_odm_hlr_df['destination_sector_id'],
        sector_odm_hlr_df['origin_sector_name'], '-')
    sector_odm_hlr_df = sector_odm_hlr_df.merge(
        lrtu_df, how='left', on='Sector')
    sector_odm_hlr_df['Yearly Journeys'] = sector_odm_hlr_df['Yearly Journeys'].fillna(0).astype(int)
    sector_odm_hlr_df['journeys'] = (sector_odm_hlr_df['journeys'] +
                                     sector_odm_hlr_df['Yearly Journeys'])
    sector_odm_hlr_df = sector_odm_hlr_df.drop(
        ['Sector', 'Yearly Journeys'], axis=1)

    hrlrtu_total = sector_odm_hlr_df['journeys'].sum()
    if hr_total + lrtu_total != hrlrtu_total:
        print('WARNING: Unexpected mismatch between National Rail and other rail totals with their sum!')
    
    # Make square
    square_s_odm_df = sector_odm_hlr_df.pivot(
        index=['origin_sector_id', 'origin_sector_name'],
        columns=['destination_sector_id', 'destination_sector_name'],
        values='journeys'
    )
    
    # Make square matrix weekly (to match Tour Model Output)
    square_s_weekly_odm_df = square_s_odm_df/52
    
    return sector_odm_hlr_df, square_s_odm_df, square_s_weekly_odm_df

## Functions to process model output data

In [98]:
def make_model_rail_mat(model_input, sector_df):
    """
    Process the model outputs to get a square matrix that matches the format and
    dimensions of the rail ticketing data matrix. Also produce a matrix for
    scaling to subsets of the overall df later.

    Parameters
    ----------
    model_input: pandas df
        The county level o/d matrix in stack format exported by the tour model.
        This contains additional modes, as well as trip purposes and time
        periods
    sector_df: pandas df
        County to sector correspondence

    Outputs
    ----------
    rail_mat: pandas df
        Square rail matrix derived from the tour model. Set to the county level
    pdt_props_df: pandas df
        For each purpose, direction and time period combination, this df lists
        the proportion of rail trips relative to all rail trips
    """
    # Filter to rail only
    rail_mat = model_df[model_df['mode'] == 'Rail']
    sector_df = sector_df[['county', 'Sector', 'Sector_ID']]
    
    # Make purpose, directrion and time period proportions
    pdt_props_df = rail_mat.groupby(['purpose',
                                        'direction',
                                        'period']
                                      )['trips'].sum().reset_index()
    rail_trip_tot = pdt_props_df['trips'].sum()
    pdt_props_df['proportion'] = pdt_props_df['trips'] /  rail_trip_tot
    pdt_props_df = pdt_props_df.drop(columns=['trips'])
    
    # Make square, county level tour model output rail matrix
    rail_mat = rail_mat.rename(
        columns={'tmz_o': 'county_origin', 'tmz_d': 'county_destination'})
    for d in ['destination', 'origin']:
        rail_mat = rail_mat.merge(sector_df,
                                  left_on='_'.join(['county', d]),
                                  right_on='county', how='left')
        colname_s = '_'.join([d, 'sector_name'])
        colname_si = '_'.join([d, 'sector_id'])
        rail_mat = rail_mat.rename(columns={'Sector': colname_s,
                                            'Sector_ID': colname_si})
        rail_mat = rail_mat.drop(columns=['county'])
    rail_mat = rail_mat[['destination_sector_name',
                         'destination_sector_id',
                         'origin_sector_name',
                         'origin_sector_id',
                         'trips']]
    rail_mat = rail_mat.groupby(['destination_sector_name',
                                 'destination_sector_id',
                                 'origin_sector_name',
                                 'origin_sector_id']
                               )['trips'].sum().reset_index()
    rail_mat = rail_mat.pivot(
        index=['origin_sector_id', 'origin_sector_name'],
        columns=['destination_sector_id', 'destination_sector_name'],
        values='trips'
        )
    
    rail_mat = rail_mat.fillna(0) # Fill missing data with 0 (i.e. no trips)
    rail_mat = rail_mat.reindex(sorted(rail_mat.columns), axis=1)

    return rail_mat, pdt_props_df

In [87]:
furness_params = {
    'tolerance': 1e-9,
    'max_iterations': 5000,
    'warning': True
}

def furness_rail(ticket_mat, rail_mat, furness_params):
    """
    Furness the tour model output rail matrix to get the patterns from the
    ticketing data. Expect this to fill in the blanks (0s) in the rail matrix

    Parameters
    ----------
    ticket_mat: pandas df
        Target matrix for the furness process. Extracted from rail ticketing
        data
    rail_mat: pandas df
        Seed data for the furness process. These are the tour model output rail
        trips
    furness_params: dict
        Contains tolerance, maximum iterations and whether warnings are shown by
        the furness process

    Outputs
    ----------
    
    """

In [107]:
# Scale ticketing data to same total as tour model output
ticket_tot = sq_s_weekly_odm_df.sum().sum()
tour_tot = rail_mat.sum().sum()
ticket_mat = sq_s_weekly_odm_df * (tour_tot / ticket_tot)

# Initialise furness process
# Check this is the correct way around...
row_targets = ticket_mat.sum(axis=0).reset_index(drop=True).to_numpy()
col_targets = ticket_mat.sum(axis=1).reset_index(drop=True).to_numpy()
seed_vals = rail_mat.replace(0, 1e-10).to_numpy()

tol = furness_params['tolerance']
max_iters = furness_params['max_iterations']
warning = furness_params['warning']

f_mat, iteration, rmse = furness.doubly_constrained_furness(seed_vals,
                                                            row_targets,
                                                            col_targets,
                                                            tol,
                                                            max_iters,
                                                            warning)
print(iteration)
print(rmse)

f_mat_df = pd.DataFrame(data=f_mat,
                        index=ticket_mat.index,
                        columns=ticket_mat.columns)
display(f_mat_df)
# f_mat_df.to_csv(r'C:\Users\Jimny\Documents\GitHub\ART_NTS_Processing\python\post_tour_adj\test_mat_dump.csv')
# ticket_mat.to_csv(r'C:\Users\Jimny\Documents\GitHub\ART_NTS_Processing\python\post_tour_adj\tickettest_mat_dump.csv')
# rail_mat.to_csv(r'C:\Users\Jimny\Documents\GitHub\ART_NTS_Processing\python\post_tour_adj\tourtest_mat_dump.csv')
# print(f_mat)

485
9.936380984707746e-10


Unnamed: 0_level_0,destination_sector_id,1,2,3,4,5,6,7,8,9,10,...,17,18,19,20,21,22,23,24,25,26
Unnamed: 0_level_1,destination_sector_name,Cheshire,Cleveland,Cumbria,Durham,Greater Manchester,Humberside,Lancashire,Merseyside,North Yorkshire,Northumberland,...,West Midlands South,EEH,Peninsula Transport,TfSE,Transport East,Western Gateway,London,Scotland,North Wales,South Wales
origin_sector_id,origin_sector_name,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
1,Cheshire,59608.83,1.172291e-10,561.5745,1.860608e-10,54120.897641,1243.581,1493.008,34636.24,856.4552,1.146974e-10,...,3798.007,2025.213583,128.2775,2050.197,1.0582e-10,57.35608,13293.91,124.8841,3604.042,364.6276
2,Cleveland,1.249704e-10,17620.52,1.253608e-10,4876.363,864.594861,1.033272e-10,1.287379e-10,1.329801e-10,3345.406,1.136757e-10,...,1.297579e-10,56.026638,376.7643,420.756,805.3186,1.278509e-10,2226.661,1789.036,1.044893e-10,1.488322e-10
3,Cumbria,145.8827,1.608532e-10,29078.82,341.2674,4085.762226,1.430523e-10,9319.619,736.3016,1273.487,233.6312,...,361.58,879.806958,104.6179,512.7124,77.84853,55.498,3185.822,4332.855,1.446611e-10,295.6493
4,Durham,433.1161,5084.137,251.8576,4632.259,3125.104587,213.8178,147.4541,1.555562e-10,11581.27,1451.951,...,1.51787e-10,489.320163,1.506569e-10,1.345421e-10,1.226825e-10,1.495562e-10,8400.322,849.6638,1.222285e-10,1.740996e-10
5,Greater Manchester,55053.1,830.6998,3661.409,1520.726,697696.815867,918.47,49057.95,41179.21,8638.311,102.2107,...,5572.996,6425.223216,932.3622,2594.676,1560.716,2334.437,23397.85,3075.96,1515.55,2114.046
6,Humberside,137.8032,9.64965e-11,1.041175e-10,236.1057,1917.115841,30152.38,145.4249,466.5155,4096.343,153.0389,...,577.0554,415.303288,542.7501,2327.55,100.1905,47.75973,3884.07,488.2151,447.6387,1.236115e-10
7,Lancashire,2153.134,1.165541e-10,9235.27,411.3235,47664.246876,597.5483,117515.4,20034.72,481.8321,1.14037e-10,...,2063.214,1658.370395,89.8732,1521.245,924.4973,590.8426,7830.607,2835.521,1401.897,1.493053e-10
8,Merseyside,34982.84,1.110097e-10,1256.315,1.761896e-10,40994.874142,420.2779,18191.64,629304.8,2428.466,1.086123e-10,...,3846.081,1315.788751,1.230551e-10,3552.271,704.5399,675.8965,11754.84,796.7925,3598.83,1.422029e-10
9,North Yorkshire,945.981,4511.198,1309.805,13987.17,7434.99463,5090.152,502.76,688.3221,41154.72,130.2846,...,2588.428,2580.850903,1.584607e-10,1415.754,811.802,726.6034,10299.97,559.1561,1.285597e-10,1.831176e-10
10,Northumberland,1.434185e-10,1.333361e-10,427.4931,231.6619,80.781315,54.11433,1.477422e-10,1.526107e-10,202.6784,1647.706,...,605.6574,95.005447,1.478041e-10,177.4333,1.203594e-10,1.467242e-10,2202.904,4051.27,1.19914e-10,1.708029e-10


## Call rail ticketing/journey processing functions

In [13]:
lrtu_df_processed = process_lrtu_data(
    lrtu_in_df,
    lrtu_year_in,
    lrtu_systems_in,
    lrtu_london_scale_in,
    lrtu_nonlondon_scale_in
)
odm_df_rat, msoa_county_df_rat, stn_geo_df_rat = rationalise_inputs(
    odm_in_df,
    msoa_county_in_df,
    stn_geo_in_df
)
stn_geo_df_processed = process_station_geography(
    msoa_county_df_rat,
    stn_geo_df_rat,
    stn_county_infill_df
)
sector_odm_df_processed, c_rows, s_rows = make_sector_rail_odm(
    odm_df_rat,
    stn_geo_df_processed,
    sector_in_df
)
check_odm_processing(
    c_rows,
    s_rows,
    msoa_county_df_rat,
    sector_in_df,
    sector_odm_df_processed,
    odm_df_rat
)
s_odm_hlr_df, sq_s_odm_df, sq_s_weekly_odm_df = add_lrtu_to_national_rail(
    sector_odm_df_processed,
    lrtu_df_processed
)

County table dimensions are as expected
Sector table dimensions are as expected
The 1228517053 National Rail journeys input are all accounted for


In [62]:
display(rail_mat)

Unnamed: 0_level_0,destination_sector_id,1,2,3,4,18,14,15,5,6,7,...,24,26,11,20,21,12,16,17,13,22
Unnamed: 0_level_1,destination_sector_name,Cheshire,Cleveland,Cumbria,Durham,EEH,East Midlands North,East Midlands South,Greater Manchester,Humberside,Lancashire,...,Scotland,South Wales,South Yorkshire,TfSE,Transport East,Tyne and Wear,West Midlands North,West Midlands South,West Yorkshire,Western Gateway
origin_sector_id,origin_sector_name,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
1,Cheshire,47273.468049,0.0,443.976309,0.0,1538.997771,1410.160508,0.0,48738.157008,1192.816122,1149.395782,...,93.39183,242.810026,1644.82996,1766.657,0.0,197.607921,2902.458885,2900.921594,1453.237888,44.462076
2,Cleveland,0.0,15165.937497,0.0,2644.400144,42.958361,49.031098,0.0,785.6023,0.0,0.0,...,1349.916,0.0,0.0,365.8246,767.866675,6128.083309,0.0,0.0,1799.141319,0.0
3,Cumbria,84.317239,0.0,16754.634109,133.673631,487.259559,36.701741,0.0,2681.532204,0.0,5228.9149,...,2361.466,143.482772,237.944719,321.9854,53.615254,2415.379163,130.097998,201.275267,1109.684603,31.354033
4,Durham,296.276004,3740.821984,171.748415,2147.451067,320.734431,803.100637,0.0,2427.470614,176.900274,97.91511,...,548.0675,0.0,489.639009,0.0,0.0,11890.906136,58.305139,0.0,2878.95156,0.0
5,Greater Manchester,49701.41639,806.656613,3295.191922,930.414227,5558.216843,23269.711161,1177.95511,715238.624328,1002.869316,42992.934558,...,2618.56,1602.549616,9698.604344,2545.188,1678.943199,1936.012968,4412.654819,4845.616493,31504.386266,2060.025378
6,Humberside,132.766974,0.0,0.0,154.161368,383.403844,1571.665663,766.576827,2097.376515,35135.393297,136.009858,...,443.5441,0.0,6921.627246,2436.574,115.02239,163.137606,608.527996,535.4533,3899.46683,44.977585
7,Lancashire,1717.456444,0.0,7343.613876,222.349676,1267.525054,339.207763,337.040498,43172.252035,576.474503,90993.472396,...,2132.762,0.0,1158.756194,1318.45,878.710068,724.804425,166.13379,1585.011427,4617.837617,460.670034
8,Merseyside,29297.896646,0.0,1048.879381,0.0,1055.912379,842.102868,68.8843,38985.959024,425.706546,14789.515021,...,629.2474,0.0,1207.53373,3232.491,703.092198,269.621301,3468.505785,3102.22402,862.243738,553.305556
9,North Yorkshire,615.23645,3155.797949,849.203459,6164.927042,1608.358617,2673.006413,337.264423,5490.82479,4003.895978,317.410377,...,342.9158,0.0,7962.828975,1000.454,629.122227,5151.412655,0.0,1621.321605,37309.54244,461.913357
10,Northumberland,0.0,0.0,297.145643,109.468109,63.475116,50.873301,434.31063,63.959177,45.635137,0.0,...,2663.671,0.0,139.073551,134.4248,0.0,12418.697009,0.0,406.71953,273.294155,0.0
