# Analysis

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import keras

In [2]:
# Pull and clean up traffic data for a city
def convert_traffic_data(city):
    # Load {city}.csv
    city_data = pd.read_csv(f'dataset/cities/{city}.csv')

    city_data['interval'] = city_data['interval'].apply(lambda x: f'{x // 3600}:{x % 3600 // 60}:{x % 60}')
    city_data['datetime'] = pd.to_datetime(city_data['day'] + ' ' + city_data['interval'], format='%Y-%m-%d %H:%M:%S')

    date_time = pd.to_datetime(city_data['day'] + ' ' + city_data['interval'])

    # Convert the datetime column to a timestamp
    timestamp = date_time.map(pd.Timestamp.timestamp)

    # Add the sin and cos of the day, week and year to the dataframe
    day = 24 * 60 * 60
    week = day * 7
    year = day * 365.2425
    city_data['day_sin'] = np.sin(timestamp * (2 * np.pi / day))
    city_data['day_cos'] = np.cos(timestamp * (2 * np.pi / day))
    city_data['week_sin'] = np.sin(timestamp * (2 * np.pi / week))
    city_data['week_cos'] = np.cos(timestamp * (2 * np.pi / week))
    city_data['year_sin'] = np.sin(timestamp * (2 * np.pi / year))
    city_data['year_cos'] = np.cos(timestamp * (2 * np.pi / year))

    # Drop the datetime and timestamp columns
    city_data = city_data.drop(columns=['interval', 'day', 'error', 'speed'])
    city_data = city_data.set_index(['city', 'detid', 'datetime'])

    return city_data

In [3]:
frankfurt_data = convert_traffic_data('frankfurt')
frankfurt_data.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,flow,occ,day_sin,day_cos,week_sin,week_cos,year_sin,year_cos
city,detid,datetime,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
frankfurt,BP2_D1,2016-12-21 00:00:00,24.0,0.004967,-3.377779e-12,1.0,-0.781831,0.62349,-0.177914,0.984046
frankfurt,BP2_D1,2016-12-21 00:05:00,24.0,0.0069,0.02181489,0.999762,-0.779884,0.625923,-0.177855,0.984057
frankfurt,BP2_D1,2016-12-21 00:10:00,12.0,0.003567,0.04361939,0.999048,-0.77793,0.628351,-0.177796,0.984067
frankfurt,BP2_D1,2016-12-21 00:15:00,12.0,0.002667,0.06540313,0.997859,-0.775968,0.630773,-0.177737,0.984078
frankfurt,BP2_D1,2016-12-21 00:20:00,12.0,0.002267,0.08715574,0.996195,-0.773998,0.633188,-0.177679,0.984089


In [4]:
# Expand detector data
def expand_detector_data(city_data):
    # Load detectors.csv
    detectors = pd.read_csv('dataset/detectors.csv').set_index(['citycode', 'detid'])

    # Add the detector data to the city data
    city_data = city_data.join(detectors, on=['city', 'detid'])
    city_data['linkid'] = city_data['linkid'].astype('int64')

    # Drop the detector id column
    city_data = city_data.reset_index().drop(columns=['road', 'long', 'lat', 'detid']).set_index(['city', 'linkid', 'datetime'])

    # Convert the fclass column to a range
    fclass_ids = [
        'other', 
        'living_street', 
        'residential', 
        'service', 
        'tertiary_link', 
        'tertiary', 
        'secondary_link',
        'secondary', 
        'primary_link', 
        'primary',
        'trunk_lin'
        'trunk', 
        'motorway_link', 
        'motorway'
    ]
    city_data['fclass'] = city_data['fclass'].apply(lambda x: fclass_ids.index(x))

    return city_data

In [5]:
frankfurt_data = expand_detector_data(frankfurt_data)
frankfurt_data.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,flow,occ,day_sin,day_cos,week_sin,week_cos,year_sin,year_cos,length,pos,fclass,limit,lanes
city,linkid,datetime,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
frankfurt,29,2016-12-21 00:00:00,24.0,0.004967,-3.377779e-12,1.0,-0.781831,0.62349,-0.177914,0.984046,0.049609,0.016967,7,50,1.0
frankfurt,29,2016-12-21 00:05:00,24.0,0.0069,0.02181489,0.999762,-0.779884,0.625923,-0.177855,0.984057,0.049609,0.016967,7,50,1.0
frankfurt,29,2016-12-21 00:10:00,12.0,0.003567,0.04361939,0.999048,-0.77793,0.628351,-0.177796,0.984067,0.049609,0.016967,7,50,1.0
frankfurt,29,2016-12-21 00:15:00,12.0,0.002667,0.06540313,0.997859,-0.775968,0.630773,-0.177737,0.984078,0.049609,0.016967,7,50,1.0
frankfurt,29,2016-12-21 00:20:00,12.0,0.002267,0.08715574,0.996195,-0.773998,0.633188,-0.177679,0.984089,0.049609,0.016967,7,50,1.0
