In [1]:
import pandas as pd
import sys
import torch

from datetime import datetime
from pprint import pprint


if 'src' not in sys.path:
    sys.path.append('src')

%reload_ext autoreload
%autoreload 2

from explore.data import DetectorDataProvider, LookUpTable

### Read data from the data set

Get list of all sections

In [2]:
lut = LookUpTable('Data')
sections = set()
for inter in lut.list_intersections():
    detectors = lut.get_detectors_on(inter)
    for sec in detectors[['Starting Intersection', 'Ending Intersection']].values:
        sections.add(tuple(sorted(sec)))
pprint(list(sections)[:10])

  self.lookup_table = self.lookup_table[


[('3130', '3150'),
 ('3140', '3200'),
 ('3100', '36'),
 ('5012', '5020'),
 ('1080 & 2040', '2010'),
 ('4140', '4160'),
 ('24', '4040'),
 ('4120', 'Audi Tor 16'),
 ('1010', '8001'),
 ('3100', 'Lena Christ strasse')]


Extract list of detectors for each section

In [3]:
lut = LookUpTable('Data')
int_det = []
for int_1, int_2 in sections:
    det_1_2, det_2_1 = lut.get_detectors_between(int_1, int_2)
    int_det.append((int_1, int_2, det_1_2))
    int_det.append((int_2, int_1, det_2_1))
int_det = pd.DataFrame(int_det, columns=['Start', 'End', 'Detectors'])
int_det

  self.lookup_table = self.lookup_table[


Unnamed: 0,Start,End,Detectors
0,3130,3150,[]
1,3150,3130,"[4(DB1), 5(DB2)]"
2,3140,3200,[5(DC1)]
3,3200,3140,[1(DA1)]
4,3100,36,"[1(DA1), 2(DA2)]"
...,...,...,...
463,NB,6070,[3(DB1)]
464,13,4010,"[5(DC1), 6(DC2)]"
465,4010,13,"[1(DA1), 2(DA2)]"
466,7040,Erletsstrasse,[]


Extract data from January to August and accumulate counts by section

In [5]:
ddp = DetectorDataProvider('Data')

In [6]:
START_DATE = datetime(2021, 1, 1)
END_DATE = datetime(2021, 12, 1)

lens = set()
def get_count(section_end, detectors):
    section_data = ddp.get_data_for_period(section_end, START_DATE, END_DATE)
    for col in section_data.columns:
        section_data[col] = pd.to_numeric(section_data[col], errors='coerce')
    try:
        return list(section_data[detectors].fillna(0).sum(axis=1, numeric_only=True).cumsum())
    except KeyError:
        print(section_end, section_data.columns, detectors)
int_det['Counts'] = int_det.apply(lambda sec: get_count(sec['End'], sec['Detectors']), axis=1)

int_det.to_hdf('int_det_excluded_missing.hdf', 'int_det')
int_det

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block0_values] [items->Index(['Start', 'End', 'Detectors', 'Counts'], dtype='object')]

  int_det.to_hdf('int_det_excluded_missing.hdf', 'int_det')


Unnamed: 0,Start,End,Detectors,Counts
0,3130,3150,[],"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
1,3150,3130,"[4(DB1), 5(DB2)]","[0.0, 0.0, 1.0, 2.0, 3.0, 3.0, 3.0, 3.0, 3.0, ..."
2,3140,3200,[5(DC1)],"[1.0, 1.0, 3.0, 4.0, 5.0, 6.0, 6.0, 7.0, 7.0, ..."
3,3200,3140,[1(DA1)],"[0.0, 0.0, 2.0, 3.0, 3.0, 4.0, 4.0, 5.0, 5.0, ..."
4,3100,36,"[1(DA1), 2(DA2)]","[2.0, 9.0, 12.0, 14.0, 18.0, 20.0, 21.0, 22.0,..."
...,...,...,...,...
463,NB,6070,[3(DB1)],"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
464,13,4010,"[5(DC1), 6(DC2)]","[4.0, 4.0, 4.0, 5.0, 8.0, 8.0, 8.0, 8.0, 9.0, ..."
465,4010,13,"[1(DA1), 2(DA2)]","[2.0, 5.0, 6.0, 9.0, 10.0, 11.0, 14.0, 15.0, 1..."
466,7040,Erletsstrasse,[],"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."


### Write data into a matrix

Write counts into a matrix
$$Q=[q_1,q_2,...,q_p]$$
for
$$q_i=\set{z(s_i,t_1),z(s_i,t_2),...z(s_i,t_d)}^T,$$
$z(s_i,t_j)$ is the traffic flow of the road section $s_i$ within the time interval $(t_0,t_j)$

In [7]:
def construct_q(int_det: pd.DataFrame):
    mat_q = torch.tensor(int_det['Counts'].tolist()).T
    return mat_q

mat_q = construct_q(int_det)
torch.save(mat_q, 'mat_q_excluded_missing.pt')
mat_q.shape

torch.Size([32064, 468])