In [40]:
import pandas as pd
import numpy as np
import tensorflow as tf
import keras

In [42]:
# Load detectors.csv
detectors = pd.read_csv('dataset/detectors.csv')
detectors['detid'] = detectors['detid'].astype('str')
detectors.set_index(['citycode', 'detid'], inplace=True)

# Pull and clean up traffic data for a city
def convert_traffic_data(city):
    # Load {city}.csv
    city_data = pd.read_csv(f'dataset/cities/{city}.csv')

    # Divide the interval column by 3600 to get the hour
    city_data['interval'] = city_data['interval'].apply(lambda x: (x // 3600) % 24)

    # Merge rows
    city_data = city_data.groupby(['city', 'detid', 'day', 'interval']).mean().reset_index()

    # Convert day column to day of the week
    city_data['day'] = pd.to_datetime(city_data['day']).dt.dayofweek

    # Drop the datetime and timestamp columns
    city_data = city_data.drop(columns=['error', 'speed'])
    # Merge with detectors.csv
    city_data['detid'] = city_data['detid'].astype('str')
    city_data = city_data.merge(detectors, left_on=['city', 'detid'], right_index=True)

    # Remove rows with missing data
    city_data = city_data.drop(columns=['road', 'long', 'lat', 'detid'])

    city_data['linkid'] = city_data['linkid'].astype('str')
    city_data['limit'] = city_data['limit'].astype('int64', errors='ignore').fillna(0)
    city_data['lanes'] = city_data['lanes'].astype('int64', errors='ignore').fillna(1)

    # Convert the fclass column to a range
    fclass_ids = [ 'other', 'living_street', 'residential', 'service', 'tertiary_link', 'tertiary', 'secondary_link', 'secondary', 'primary_link', 'primary', 'trunk_link', 'trunk', 'motorway_link', 'motorway' ]
    city_data['fclass'] = city_data['fclass'].apply(lambda x: fclass_ids.index(x))

    # Multiply the flow column by the lane count and the road length
    city_data['flow'] = city_data['flow'] * city_data['lanes'] * city_data['length']

    # Convert the limit column to int
    city_data['limit'] = city_data['limit'].astype('int64', errors='ignore').fillna(0)

    # Average the columns over all links in a city
    city_data_avg = city_data.copy()
    city_data_avg = city_data_avg.drop(columns=['linkid'])
    city_data_avg = city_data_avg.groupby(['city', 'day', 'interval']).mean().reset_index()

    return city_data, city_data_avg

cities = [ 'augsburg', 'basel', 'bern', 'bolton', 'bordeaux', 'bremen', 'cagliari', 'constance', 'darmstadt', 'essen', 'graz', 'groningen', 'hamburg', 'kassel', 'london', 'luzern', 'manchester', 'marseille', 'munich', 'paris', 'rotterdam', 'santander', 'speyer', 'strasbourg', 'stuttgart', 'torino', 'toulouse', 'vilnius', 'wolfsburg', 'zurich' ]
# cities = ['madrid']

data = pd.DataFrame()
data_avg = pd.DataFrame()

for city in cities:
    print(f'Loading {city}...          ', end='\r')
    city_data, city_data_avg = convert_traffic_data(city)

    data = pd.concat([data, city_data])
    data_avg = pd.concat([data_avg, city_data_avg])

Loading stuttgart...           

  city_data = pd.read_csv(f'dataset/cities/{city}.csv')


Loading zurich...             

In [43]:
data.head()

Unnamed: 0,city,day,interval,flow,occ,length,pos,fclass,limit,lanes,linkid
0,augsburg,5,0,1.568041,0.0025,0.112003,0.003423,5,30,1.0,737.0
1,augsburg,5,1,0.448012,0.0,0.112003,0.003423,5,30,1.0,737.0
2,augsburg,5,2,0.784021,0.0,0.112003,0.003423,5,30,1.0,737.0
3,augsburg,5,3,0.672018,0.0,0.112003,0.003423,5,30,1.0,737.0
4,augsburg,5,4,0.560015,0.0,0.112003,0.003423,5,30,1.0,737.0


In [44]:
data_avg.head()

Unnamed: 0,city,day,interval,flow,occ,length,pos,fclass,limit,lanes
0,augsburg,0,0,7.257053,0.054564,0.20692,0.035397,5.656525,47.302231,1.0
1,augsburg,0,1,3.309013,0.049596,0.20692,0.035397,5.656525,47.302231,1.0
2,augsburg,0,2,2.403672,0.048928,0.20692,0.035397,5.656525,47.302231,1.0
3,augsburg,0,3,2.903688,0.049114,0.20692,0.035397,5.656525,47.302231,1.0
4,augsburg,0,4,6.994569,0.05367,0.20692,0.035397,5.656525,47.302231,1.0


In [47]:
# Print row counts for each city
data_avg.groupby('city').count()

Unnamed: 0_level_0,day,interval,flow,occ,length,pos,fclass,limit,lanes
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
augsburg,168,168,168,168,168,168,168,168,168
basel,168,168,168,168,168,168,168,168,168
bern,168,168,168,168,168,168,168,168,168
bolton,125,125,125,125,125,125,125,125,125
bordeaux,168,168,168,168,168,168,168,168,168
bremen,168,168,168,168,168,168,168,168,168
cagliari,120,120,120,120,120,120,120,120,120
constance,168,168,168,168,168,168,168,168,168
darmstadt,120,120,120,120,120,120,120,120,120
essen,168,168,168,168,168,168,168,168,168
