In [1]:
# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

# Handle table-like data and matrices
import numpy as np
import pandas as pd
from pandas import DataFrame,Series
import csv
import pprint

# Visualisation
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.pylab as pylab
import seaborn as sns

# Others
import math
from datetime import datetime,timedelta
import time

# Configure visualisations
%matplotlib inline
mpl.style.use( 'ggplot' )
sns.set_style( 'white' )
pylab.rcParams[ 'figure.figsize' ] = 8 , 6

In [31]:
link_trajectories = pd.read_csv('datas/train/link_trajectories(table_5)_training.csv',parse_dates=['enter_time'])
link_trajectories['leave_time']  = link_trajectories.enter_time + link_trajectories.travel_time.map(lambda s: timedelta(0,float(s)))
link_trajectories['leave_time'] = link_trajectories['leave_time'].map(lambda s:s.to_datetime())
link_trajectories['enter_time'] = link_trajectories['enter_time'].map(lambda s:s.to_datetime())
del link_trajectories['starting_time']

In [32]:
link_trajectories.head()

Unnamed: 0,enter_time,intersection_id,link_id,tollgate_id,total_travel_time,travel_seq,travel_time,vehicle_id,leave_time
0,2016-07-19 00:14:24,B,105,3,70.85,105100111103122,9.56,1065642,2016-07-19 00:14:33.560
1,2016-07-19 00:14:34,B,100,3,70.85,105100111103122,6.75,1065642,2016-07-19 00:14:40.750
2,2016-07-19 00:14:41,B,111,3,70.85,105100111103122,13.0,1065642,2016-07-19 00:14:54.000
3,2016-07-19 00:14:54,B,103,3,70.85,105100111103122,7.47,1065642,2016-07-19 00:15:01.470
4,2016-07-19 00:15:02,B,122,3,70.85,105100111103122,32.85,1065642,2016-07-19 00:15:34.850


flow, speed, and lane occupancy. 

In [33]:
def group(data):
    data = sorted(data)
    it = iter(data)
    a, b = next(it)
    for c, d in it:
        if b > c:  # Use `if b > c` if you want (1,2), (2,3) not to be
            # treated as intersection.
            b = max(b, d)
        else:
            yield a, b
            a, b = c, d
    yield a, b


def get_ts(t):
    ts = t.to_datetime() - datetime(2016, 7, 19)
    ts = int(ts / timedelta(0, 20 * 60))
    return ts

In [34]:
safe = link_trajectories['enter_time'].map(get_ts) == link_trajectories['leave_time'].map(get_ts)

In [35]:
link_trajectories['safe'] = safe

In [36]:
unsafe_ds = link_trajectories[link_trajectories.safe == False].to_dict('records')

In [37]:
unsafe_ds[0]

{'enter_time': Timestamp('2016-07-19 01:38:40'),
 'intersection_id': 'C',
 'leave_time': Timestamp('2016-07-19 01:40:02.870000'),
 'link_id': 113,
 'safe': False,
 'tollgate_id': 1,
 'total_travel_time': 214.87,
 'travel_seq': '115,102,109,104,112,111,103,116,101,121,106,113',
 'travel_time': 82.87,
 'vehicle_id': 1056529}

In [38]:
def get_mid_time(record):
    temp_time = (get_ts(record['enter_time']) + 1) * np.timedelta64(20, 'm')
    mid_time = temp_time + np.datetime64('2016-07-19T00:00:00Z')
    enter_time = np.datetime64(record['enter_time'].to_datetime(), 's')
    leave_time = np.datetime64(record['leave_time'].to_datetime(), 's')
    return mid_time, mid_time - enter_time, leave_time - mid_time

In [39]:
new_safe_ds = []
for record in unsafe_ds:
    mid_time, ft, ss = get_mid_time(record)

    record_copy = record.copy()
    record['leave_time'] = mid_time
    record['travel_time'] = ft
    record_copy['enter_time'] = mid_time
    record_copy['travel_time'] = ss
    new_safe_ds.append(record)
    new_safe_ds.append(record_copy)

In [40]:
new_link_trajectories = pd.concat([
    pd.DataFrame(new_safe_ds),
    link_trajectories[link_trajectories.safe == True]
])

del new_link_trajectories['safe']
del new_link_trajectories['travel_seq']

In [None]:
new_link_trajectories.sort(columns=['enter_time'])