In [6]:
import numpy as np
import pandas as pd
import sys
import torch

from datetime import datetime
from pprint import pprint


if 'src' not in sys.path:
    sys.path.append('src')

%reload_ext autoreload
%autoreload 2

from explore.data import DetectorDataProvider, LookUpTable
from explore.graph import IntersectionGraph

In [2]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

##### Select data

Select main sections

In [3]:
# sections = (
#     ('3030', '3040'),
#     # ('3040', '3050'),
#     # ('3030', '3050'),
#     ('3022', '3040'),
#     # ('3040', '0037'),
#     # ('3050', '3110'),
#     # ('3050', '3060'),
#     ('3030', '2050'),
# )

Override to select all sections (except ones with missing/broken data)

In [9]:
lut = LookUpTable('Data')
sections = set()
for inter in lut.list_intersections():
    detectors = lut.get_detectors_on(inter)
    for sec in detectors[['Starting Intersection', 'Ending Intersection']].values:
        sections.add(tuple(sorted(sec)))
pprint(list(sections)[:10])

  self.lookup_table = self.lookup_table[


[('5050', '5051'),
 ('8604', 'Pascalstraße'),
 ('43', '9503'),
 ('43', 'Barthlgasstrasse'),
 ('4140', 'Audi Ring'),
 ('54', '6042'),
 ('8505', 'Am Sportpark'),
 ('3022', '48'),
 ('3120', 'Gutenberg strasse North'),
 ('4010', '5020')]


Exctract list of detectors for each section

In [5]:
lut = LookUpTable('Data')
def construct_int_det(sections):
    int_det = []
    for int_1, int_2 in sections:
        det_1_2, det_2_1 = lut.get_detectors_between(int_1, int_2)
        int_det.append((int_1, int_2, det_1_2))
        int_det.append((int_2, int_1, det_2_1))
    int_det = pd.DataFrame(int_det, columns=['Start', 'End', 'Detectors'])
    return int_det
    
int_det = construct_int_det(sections)
int_det

  self.lookup_table = self.lookup_table[


Unnamed: 0,Start,End,Detectors
0,3030,Löns Straße,[]
1,Löns Straße,3030,[8(DE1)]
2,33,8602,"[7(DC1), 8(DC2), 9(DC3)]"
3,8602,33,[]
4,3100,37,[]
...,...,...,...
463,7040,19,[]
464,4230,8605,[3(DB1)]
465,8605,4230,[3(DC)]
466,3110,3200,[1(DA1)]


In [6]:
ddp = DetectorDataProvider('Data')
print(ddp.list_intersections())

['0007', '0012', '0013', '0014', '0017', '0019', '0022', '0023', '0024', '0025', '0026', '0031', '0033', '0036', '0037', '0042', '0043', '0048', '0049', '0051', '0052', '0053', '0054', '1010', '1011', '1012', '1022', '1040', '1061', '1080', '2010', '2050', '3010', '3021', '3022', '3030', '3040', '3050', '3060', '3080', '3090', '3091', '3100', '3110', '3120', '3130', '3140', '3150', '3160', '3180', '3200', '4010', '4020', '4040', '4050', '4060', '4070', '4090', '4100', '4110', '4120', '4140', '4150', '4160', '4210', '4230', '4240', '4250', '5011', '5012', '5020', '5030', '5031', '5040', '5050', '5051', '5060', '5080', '5090', '6010', '6020', '6021', '6022', '6023', '6030', '6041', '6042', '6043', '6050', '6060', '6070', '6080', '6090', '6100', '7020', '7040', '8001', '8002', '8003', '8004', '8005', '8006', '8008', '8301', '8401', '8402', '8403', '8502', '8505', '8602', '8603', '8604', '8605', '8606', '9503']


Extract data from January to August and accumulate counts by section

In [7]:
START_DATE = datetime(2021, 1, 1)
END_DATE = datetime(2021, 12, 1)

def count_traffic(int_det, start_date=START_DATE, end_date=END_DATE):
    lens = set()
    def get_count(section_end, detectors):
        section_data = ddp.get_data_for_period(section_end, START_DATE, END_DATE)
        for col in section_data.columns:
            section_data[col] = pd.to_numeric(section_data[col], errors='coerce')
        try:
            return list(section_data[detectors].fillna(0).sum(axis=1, numeric_only=True).cumsum())
        except KeyError:
            print(section_end, section_data.columns, detectors)
    int_det['Counts'] = int_det.apply(lambda sec: get_count(sec['End'], sec['Detectors']), axis=1)

count_traffic(int_det)
int_det.to_hdf('int_det_excluded_missing.hdf', 'int_det')
int_det

TypeError: to_hdf() missing 1 required positional argument: 'key'

In [11]:
int_det.to_hdf('int_det_excluded_missing.hdf', 'int_det')

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block0_values] [items->Index(['Start', 'End', 'Detectors', 'Counts'], dtype='object')]

  int_det.to_hdf('int_det_excluded_missing.hdf', 'int_det')


In [12]:
int_det = pd.read_hdf('int_det_excluded_missing.hdf', converters={'Counts': pd.eval})
int_det

Unnamed: 0,Start,End,Detectors,Counts
0,3030,Löns Straße,[],"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
1,Löns Straße,3030,[8(DE1)],"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
2,33,8602,"[7(DC1), 8(DC2), 9(DC3)]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
3,8602,33,[],"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
4,3100,37,[],"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
...,...,...,...,...
463,7040,19,[],"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
464,4230,8605,[3(DB1)],"[0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, ..."
465,8605,4230,[3(DC)],"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, ..."
466,3110,3200,[1(DA1)],"[0.0, 2.0, 3.0, 4.0, 4.0, 5.0, 5.0, 5.0, 5.0, ..."


##### Transform data into expected format

Write counts into a matrix
$$Q=[q_1,q_2,...,q_p]$$
for
$$q_i=\set{z(s_i,t_1),z(s_i,t_2),...z(s_i,t_d)}^T,$$
$z(s_i,t_j)$ is the traffic flow of the road section $s_i$ within the time interval $(t_0,t_j)$

In [16]:
def construct_q(int_det: pd.DataFrame, ignore: tuple = None):
    if ignore is None:
        ignore = []
    else:
        ignore.update({tuple(reversed(pair)) for pair in ignore})
    # int_det = int_det.astype({'Start': 'str', 'End': 'str'}).set_index(['Start', 'End'])
    mat_q = torch.stack([torch.from_numpy(np.array(a).astype(float)) \
        for a in int_det.loc[int_det.index.difference(ignore)]['Counts'].tolist()]).T
    return mat_q

mat_q = construct_q(int_det, {s for s in sections if s not in (('3100', '37'), ('37', '3100'), ('1011', '1010'), ('1010', '1011'))})
torch.save(mat_q, 'mat_q_excluded_missing.pt')
mat_q.shape

torch.Size([32064, 4])

In [60]:
mat_q.shape

torch.Size([32064, 233])

Construct a correlation coefficient matrix
$$
R(i, j)=\frac{\sum_{k=1}^d(z(s_i,t_k)-\tilde{z}(s_i))(z(s_j,t_k)-\tilde{z}(s_j))}{\sqrt{\sum_{k=1}^d(z(s_i,t_k)-\tilde{z}(s_i))^2}\sqrt{\sum_{k=1}^d(z(s_j,t_k)-\tilde{z}(s_j))^2}},
$$
where $$\tilde{z}(s_i)=\frac{1}{d}\sum_{k=1}^dz(s_i,t_k)$$

In [54]:
def construct_r(mat_q):
    d, p = mat_q.shape
    mat_q_normalized = mat_q - mat_q.mean(dim=0)
    mat_r = torch.zeros(p, p)
    for i in range(p):
        for j in range(i, p):
            i_col = mat_q_normalized[:, i]
            j_col = mat_q_normalized[:, j]
            i_norm = max(torch.norm(i_col), 1e-12)
            j_norm = max(torch.norm(j_col), 1e-12)
            if i_norm == 0 or j_norm == 0:
                if i_norm == j_norm:
                    mat_r[i][j] = 1
                else:
                    mat_r[i][j] = 0
            else:
                mat_r[i][j] = (i_col @ j_col) / i_norm / j_norm
            mat_r[j][i] = mat_r[i][j]
    return mat_r

mat_r = construct_r(mat_q)
torch.save(mat_r, 'mat_r_excluded_missing.pt')
mat_r

tensor([[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 1.0000, 0.9995,  ..., 1.0000, 0.9719, 0.0000],
        [0.0000, 0.9995, 1.0000,  ..., 0.9995, 0.9779, 0.0000],
        ...,
        [0.0000, 1.0000, 0.9995,  ..., 1.0000, 0.9718, 0.0000],
        [0.0000, 0.9719, 0.9779,  ..., 0.9718, 1.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]])

Load matrices R and Q

In [55]:
mat_q = torch.load('mat_q_excluded_missing.pt')
mat_r = torch.load('mat_r_excluded_missing.pt')

In [68]:
id = int_det.astype({'Start': 'str', 'End': 'str'}).set_index(['Start', 'End'])

In [79]:
id.loc[id.index.difference((('Steinheilstrasse', '6070'), ('7020', '6010')))]
construct_q(int_dat, ignore=[(('Steinheilstrasse', '6070'),)])

NameError: name 'int_dat' is not defined