In [15]:
import openmatrix as omx
import os
import numpy as np
import pandas as pd
import sys
import random
from scipy.sparse import coo_matrix
import logging as logger
import sqlite3

In [16]:
logger.basicConfig(format='%(asctime)s %(levelname)-8s %(message)s',
                   level=logger.INFO,
                   datefmt='%Y-%m-%d %H:%M:%S')

In [17]:
fldr = "C:/D/Projects/Vancouver"
outdb = sqlite3.connect(os.path.join(fldr, 'trip_table.sqlite'))

In order to not require the Emme License while discretizing trips, we have exported all the matrices into a single OMX container

In [18]:
matrices = "matrices.omx"
omx_file = os.path.join(fldr, matrices)
matrices = omx.open_file(omx_file, 'r')

In [19]:
# matrices.close()

In [20]:
# We collect the mapping information for zone numbers
tazs = matrices.mapping('taz')
tazs = np.array(list(tazs.keys()))
max_zone = tazs.max()

In [21]:
all_mats = matrices.list_matrices()

The actual function to discretize a given matrix

In [22]:
def discretize_matrix(matrix, zones, total_diff=50, zonal_difference=5, tries=100):
    diff = 100
    best_trial = None
    best_diff = 1000000000
    tot_diff = 100000000
    
    q = coo_matrix(matrix)
    
    orig_data = pd.DataFrame({'rows':tazs[q.row], 'cols':tazs[q.col], "class":[m]* q.data.shape[0], "flow":q.data})
    orig_prod = orig_data.groupby(['rows']).sum()[["flow"]]
    orig_prod.columns = ['original']
    orig_dest = orig_data.groupby(['cols']).sum()[["flow"]]
    orig_dest.columns = ['original']
    tot_trips = q.sum()
    for tried in range(tries):
        df = orig_data.copy()

        dfs = []

        int_part = np.modf(df[df.flow>1].flow.values)[1]
        df1 = pd.DataFrame(df[df.flow >= 1])
        df1.flow = int_part
        df.loc[df.flow>1, "flow"] -= int_part[:]
        dfs.append(df1)
        df = df.assign(expected=np.random.rand(df.shape[0]))
        df = df[df.expected < df.flow]
        df.flow = 1
        df.drop(['expected'], axis=1, inplace=True)
        dfs.append(df)
        df = pd.concat(dfs, ignore_index=True)
        df = df.reindex(df.index.repeat(df.flow.astype(np.int)))
        df.flow = 1

        # Compute the differences
        prod = df.groupby(['rows']).sum()[["flow"]]    
        dest = df.groupby(['cols']).sum()[["flow"]]
        
        diffp = orig_prod.join(prod).fillna(0)
        diffp = abs(diffp.original.values - diffp.flow.values)
        if not diffp.shape[0]:
            continue
        diffp = diffp.max()
        diffa = orig_dest.join(dest).fillna(0)
        diffa = abs(diffa.original.values - diffa.flow.values)
        if not diffa.shape[0]:
            continue
        diffa = diffa.max()
        
        # max difference for a zone
        diff = max(diffp, diffa)
        
        # Difference in totals
        tdf = abs(df.flow.values.sum() - tot_trips)
        
        # If we still have differences that  are too big
        if tot_diff > total_diff:
            if tot_diff > tdf:
                tot_diff = tdf
                best_diff = diff
                best_trial = df
        else:
            if tdf < total_diff:
                if diff < best_diff:
                    best_diff = diff
                    best_trial = df
        
                if diff < zonal_difference:
                    break
        
    logger.info("    Segmented with a maximum difference of {0:,.0f} trips on a single zone".format(best_diff))
    return best_trial

In [23]:
demand = {}
for m in all_mats:
    mf = matrices[m].attrs.mf
    desc = matrices[m].attrs.description
    
    # We don't need to discretize most matrices
    if len(mf) > 5:
        continue
    if int(mf[2:3]) != 2 and int(mf[2:3]) != 3:
        continue
    
    if 'truck' in desc:
        continue
    
    logger.info("{}    {}   {}".format(str(mf), m, desc))
    demand[m] = str('{}-{}'.format(mf,desc ))
    mat = np.array(matrices[m])
    
    if not mat.sum():
        continue
        
    zones = mat.shape[0]
    
    df = discretize_matrix(mat, tazs)
    df.to_sql('trips', outdb, if_exists='append')
    
    logger.info("    Original Matrix: {0:,.0f}".format(mat.sum()))
    logger.info("    Result Matrix: {0:,.0f}".format(df.shape[0]))
    
matrices.close()

2019-05-11 14:17:54 INFO     b'mf216'    Bk_pertrp_Am   Bike per-trips AM
2019-05-11 14:18:06 INFO         Segmented with a maximum difference of 11 trips on a single zone
2019-05-11 14:18:06 INFO         Original Matrix: 10,303
2019-05-11 14:18:06 INFO         Result Matrix: 10,277
2019-05-11 14:18:06 INFO     b'mf246'    Bk_pertrp_Md   Bike per-trips MD
2019-05-11 14:18:18 INFO         Segmented with a maximum difference of 10 trips on a single zone
2019-05-11 14:18:18 INFO         Original Matrix: 5,366
2019-05-11 14:18:18 INFO         Result Matrix: 5,347
2019-05-11 14:18:18 INFO     b'mf276'    Bk_pertrp_Pm   Bike per-trips PM
2019-05-11 14:18:29 INFO         Segmented with a maximum difference of 12 trips on a single zone
2019-05-11 14:18:30 INFO         Original Matrix: 9,868
2019-05-11 14:18:30 INFO         Result Matrix: 9,865
2019-05-11 14:18:30 INFO     b'mf306'    HOV_drvtrp_VOT_1_Am   HOV drv-trips VOT 1 AM
2019-05-11 14:18:57 INFO         Segmented with a maximum differen

2019-05-11 14:29:40 INFO         Segmented with a maximum difference of 20 trips on a single zone
2019-05-11 14:29:40 INFO         Original Matrix: 68,709
2019-05-11 14:29:40 INFO         Result Matrix: 68,677
2019-05-11 14:29:40 INFO     b'mf302'    SOV_drvtrp_VOT_3_Am   SOV drv-trips VOT 3 AM
2019-05-11 14:30:08 INFO         Segmented with a maximum difference of 28 trips on a single zone
2019-05-11 14:30:09 INFO         Original Matrix: 80,531
2019-05-11 14:30:09 INFO         Result Matrix: 80,492
2019-05-11 14:30:09 INFO     b'mf322'    SOV_drvtrp_VOT_3_Md   SOV drv-trips VOT 3 MD
2019-05-11 14:30:38 INFO         Segmented with a maximum difference of 26 trips on a single zone
2019-05-11 14:30:39 INFO         Original Matrix: 78,669
2019-05-11 14:30:39 INFO         Result Matrix: 78,713
2019-05-11 14:30:39 INFO     b'mf342'    SOV_drvtrp_VOT_3_Pm   SOV drv-trips VOT 3 PM
2019-05-11 14:31:07 INFO         Segmented with a maximum difference of 29 trips on a single zone
2019-05-11 14:

2019-05-11 14:39:29 INFO         Segmented with a maximum difference of 17 trips on a single zone
2019-05-11 14:39:30 INFO         Original Matrix: 16,367
2019-05-11 14:39:30 INFO         Result Matrix: 16,377
2019-05-11 14:39:30 INFO     b'mf355'    railPm   Rail Person Trips PM
2019-05-11 14:39:43 INFO         Segmented with a maximum difference of 24 trips on a single zone
2019-05-11 14:39:45 INFO         Original Matrix: 42,382
2019-05-11 14:39:45 INFO         Result Matrix: 42,339
