In [1]:
import pandas as pd
import sys
import torch

from datetime import datetime
from pprint import pprint


if 'src' not in sys.path:
    sys.path.append('src')

%reload_ext autoreload
%autoreload 2

from data.extract import DetectorDataProvider, LookUpTable

### Read data from the data set

Get list of all sections

In [2]:
lut = LookUpTable('Data')
sections = set()
for inter in lut.list_intersections():
    detectors = lut.get_detectors_on(inter)
    for sec in detectors[['Starting Intersection', 'Ending Intersection']].values:
        sections.add(tuple(sorted(sec)))
pprint(list(sections)[:10])

  self.lookup_table = self.lookup_table[


[('6020', '6021'),
 ('3100', '37'),
 ('7040', 'Erletsstrasse'),
 ('3022', '48'),
 ('3130', 'Autobahn Ingolstadt Nord 16a'),
 ('18', '7020'),
 ('4120', 'Audi Tor 15'),
 ('3160', '3180'),
 ('14', '4140'),
 ('23', '4100')]


Extract list of detectors for each section

In [3]:
lut = LookUpTable('Data')
int_det = []
for int_1, int_2 in sections:
    det_1_2, det_2_1 = lut.get_detectors_between(int_1, int_2)
    int_det.append((int_1, int_2, det_1_2))
    int_det.append((int_2, int_1, det_2_1))
int_det = pd.DataFrame(int_det, columns=['Start', 'End', 'Detectors'])
int_det

  self.lookup_table = self.lookup_table[


Unnamed: 0,Start,End,Detectors
0,6020,6021,"[6(DC1), 7(DC2)]"
1,6021,6020,"[12(DD1), 13(DD2), 14(DD3)]"
2,3100,37,[]
3,37,3100,[7(DD1)]
4,7040,Erletsstrasse,[]
...,...,...,...
467,Liegnitzer Strasse,6023,[6(DC1)]
468,1080,Schutterstrasse,[]
469,Schutterstrasse,1080,"[3(DB1), 4(DB2)]"
470,1010,6010,[]


Extract data from January to August and accumulate counts by section

In [4]:
ddp = DetectorDataProvider('Data')

In [5]:
START_DATE = datetime(2021, 1, 1)
END_DATE = datetime(2021, 12, 1)

lens = set()
def get_count(section_end, detectors):
    section_data = ddp.get_data_for_period(section_end, START_DATE, END_DATE)
    for col in section_data.columns:
        section_data[col] = pd.to_numeric(section_data[col], errors='coerce')
    try:
        return list(section_data[detectors].fillna(0).sum(axis=1, numeric_only=True).cumsum())
    except KeyError:
        print(section_end, section_data.columns, detectors)
int_det['Counts'] = int_det.apply(lambda sec: get_count(sec['End'], sec['Detectors']), axis=1)

int_det.to_hdf('int_det_excluded_missing.hdf', 'int_det')
int_det

96 DetCount_20210101.csv


your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block0_values] [items->Index(['Start', 'End', 'Detectors', 'Counts'], dtype='object')]

  int_det.to_hdf('int_det_excluded_missing.hdf', 'int_det')


Unnamed: 0,Start,End,Detectors,Counts
0,6020,6021,"[6(DC1), 7(DC2)]","[0.0, 2.0, 3.0, 3.0, 10.0, 10.0, 11.0, 14.0, 1..."
1,6021,6020,"[12(DD1), 13(DD2), 14(DD3)]","[1.0, 1.0, 3.0, 8.0, 9.0, 10.0, 12.0, 14.0, 17..."
2,3100,37,[],"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
3,37,3100,[7(DD1)],"[0.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 3.0, 3.0, ..."
4,7040,Erletsstrasse,[],"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
...,...,...,...,...
467,Liegnitzer Strasse,6023,[6(DC1)],"[0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, ..."
468,1080,Schutterstrasse,[],"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
469,Schutterstrasse,1080,"[3(DB1), 4(DB2)]","[0.0, 5.0, 5.0, 6.0, 6.0, 7.0, 8.0, 8.0, 8.0, ..."
470,1010,6010,[],"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."


### Write data into a matrix

Write counts into a matrix
$$Q=[q_1,q_2,...,q_p]$$
for
$$q_i=\set{z(s_i,t_1),z(s_i,t_2),...z(s_i,t_d)}^T,$$
$z(s_i,t_j)$ is the traffic flow of the road section $s_i$ within the time interval $(t_0,t_j)$

In [6]:
int_det['Counts'].apply(len)

0      32064
1      32064
2      32064
3      32064
4      32064
       ...  
467    32064
468    32064
469    32064
470    32064
471    32064
Name: Counts, Length: 472, dtype: int64

In [7]:
mat_q = torch.tensor(int_det['Counts'].tolist()).T
torch.save(mat_q, 'mat_q_excluded_missing.pt')
mat_q.shape

torch.Size([32064, 472])