In [10]:
import pandas as pd
import sys
import torch

from datetime import datetime
from pprint import pprint


if 'src' not in sys.path:
    sys.path.append('src')

%reload_ext autoreload
%autoreload 2

from explore.data import DetectorDataProvider, LookUpTable

### Read data from the data set

Get list of all sections

In [11]:
lut = LookUpTable('Data')
sections = set()
for inter in lut.list_intersections():
    detectors = lut.get_detectors_on(inter)
    for sec in detectors[['Starting Intersection', 'Ending Intersection']].values:
        sections.add(tuple(sorted(sec)))
pprint(list(sections)[:10])

[('12', '4020'),
 ('6041', '6042'),
 ('3160', 'SH'),
 ('6100', 'Scheelestrasse'),
 ('23', '4100'),
 ('6090', '8505'),
 ('8402', 'Am Heidgraben'),
 ('4090', 'Hindemithstraße'),
 ('1060', '2010'),
 ('3030', 'Friedrich-Ebert-Strasse')]


Extract list of detectors for each section

In [12]:
lut = LookUpTable('Data')
int_det = []
for int_1, int_2 in sections:
    det_1_2, det_2_1 = lut.get_detectors_between(int_1, int_2)
    int_det.append((int_1, int_2, det_1_2))
    int_det.append((int_2, int_1, det_2_1))
int_det = pd.DataFrame(int_det, columns=['Start', 'End', 'Detectors'])
int_det

Unnamed: 0,Start,End,Detectors
0,12,4020,"[6(DC1), 7(DC2)]"
1,4020,12,"[1(DA1), 2(DA2)]"
2,6041,6042,"[1(DA1), 2(DA2), 3(DA3)]"
3,6042,6041,"[7(DC1), 8(DC2)]"
4,3160,SH,[]
...,...,...,...
475,4010,2,[]
476,6050,Messerschmittstrasse,[]
477,Messerschmittstrasse,6050,"[7(DD2A), 8(DD2B), 9(DD1)]"
478,5050,Friedhofstrasse,[]


Extract data from January to August and accumulate counts by section

In [13]:
ddp = DetectorDataProvider('Data')

In [14]:
START_DATE = datetime(2021, 1, 1)
END_DATE = datetime(2021, 12, 1)

lens = set()
def get_count(section_end, detectors):
    section_data = ddp.get_data_for_period(section_end, START_DATE, END_DATE)
    for col in section_data.columns:
        section_data[col] = pd.to_numeric(section_data[col], errors='coerce')
    try:
        return list(section_data[detectors].fillna(0).sum(axis=1, numeric_only=True).cumsum())
    except KeyError:
        print(section_end, section_data.columns, detectors)
int_det['Counts'] = int_det.apply(lambda sec: get_count(sec['End'], sec['Detectors']), axis=1)

int_det.to_hdf('int_det_excluded_missing.hdf', 'int_det')
int_det

2040 Index([], dtype='object') ['8(DE1)', '9(DE2)']
2040 Index([], dtype='object') ['2(DB1)']
2040 Index([], dtype='object') ['7(DD1)']
2040 Index([], dtype='object') ['1(DA1)', '3(DF4)', '4(DF3)', '5(DF2)', '6(DF1)']


your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block0_values] [items->Index(['Start', 'End', 'Detectors', 'Counts'], dtype='object')]

  int_det.to_hdf('int_det_excluded_missing.hdf', 'int_det')


Unnamed: 0,Start,End,Detectors,Counts
0,12,4020,"[6(DC1), 7(DC2)]","[0.0, 0.0, 0.0, 0.0, 2.0, 5.0, 5.0, 5.0, 6.0, ..."
1,4020,12,"[1(DA1), 2(DA2)]","[2.0, 3.0, 4.0, 13.0, 14.0, 16.0, 21.0, 23.0, ..."
2,6041,6042,"[1(DA1), 2(DA2), 3(DA3)]","[0.0, 0.0, 0.0, 2.0, 7.0, 9.0, 9.0, 11.0, 13.0..."
3,6042,6041,"[7(DC1), 8(DC2)]","[1.0, 1.0, 3.0, 8.0, 11.0, 11.0, 13.0, 13.0, 1..."
4,3160,SH,[],"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
...,...,...,...,...
475,4010,2,[],"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
476,6050,Messerschmittstrasse,[],"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
477,Messerschmittstrasse,6050,"[7(DD2A), 8(DD2B), 9(DD1)]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
478,5050,Friedhofstrasse,[],"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."


### Write data into a matrix

Write counts into a matrix
$$Q=[q_1,q_2,...,q_p]$$
for
$$q_i=\set{z(s_i,t_1),z(s_i,t_2),...z(s_i,t_d)}^T,$$
$z(s_i,t_j)$ is the traffic flow of the road section $s_i$ within the time interval $(t_0,t_j)$

In [15]:
mat_q = torch.tensor(int_det['Counts'].tolist()).T
torch.save(mat_q, 'mat_q_excluded_missing.pt')
mat_q.shape

RuntimeError: Could not infer dtype of NoneType