In [None]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
import pickle
import pandas as pd
import numpy as np
from traffic_util import TrafficData
from collections import defaultdict

In [None]:
# Load dataset as a DataFrame
df = pd.read_pickle('data/dataset_all.pickle')

In [None]:
# Load TrafficData
data = TrafficData(df)

In [None]:
print "Number of vehicles %s" %(len(data.df.veh_id.unique()))
print "Number of frames %s" %(len(df.frame_id.unique()))

In [None]:
# Compute average lane speed
for lane_id, lane_df in df.groupby('lane_id'):
    print "Lane %s average velocity: %s" %(lane_id, lane_df.veh_vel.mean())

In [None]:
# Process lane change data to extract lane-changes
lane_change_dict = defaultdict(list)
lc_df = data.df.groupby('lane_change').get_group(True)
print "Number of lane changes: %s" %(len(lc_df))
frames_before, frames_after = 60,40
clean_count = 0
for index, row in lc_df.iterrows(): # Iterate over all veh snapshots containing a lanechange
    
    # Skip NaN in from_lane
    if np.isnan(row.from_lane):
        continue
        
    from_lane = int(row.from_lane) #TODO: fix the from_lane in the DataFrame
    lane_id = row.lane_id
    lane_distance = data.get_lane_center(lane_id) - data.get_lane_center(from_lane)
    veh_id = row.veh_id
    lc_frame = row.frame_id
    
    veh_lc_df = data.get_veh_df_between(veh_id,lc_frame - frames_before, lc_frame + frames_after)
    veh_lc_df_pre = veh_lc_df.loc[veh_lc_df.frame_id < lc_frame]
    veh_lc_df_post = veh_lc_df.loc[veh_lc_df.frame_id >= lc_frame]
    if (veh_lc_df_pre.lane_id == from_lane).all() and (veh_lc_df_post.lane_id == lane_id).all():
        # This is a clean lane change
        clean_count = clean_count + 1
        veh_lc_df['lc_frame_index'] = veh_lc_df.frame_id - lc_frame
        veh_lc_df['local_x_normalized'] = (veh_lc_df.local_x - data.get_lane_center(from_lane)) \
            *(1./lane_distance)
        vel_start = veh_lc_df.veh_vel.head(1).squeeze()
        vel_max = max(veh_lc_df.veh_vel.tolist())
        veh_lc_df['veh_vel_normalized'] = (veh_lc_df.veh_vel - vel_start)/vel_max
        
        # Figure out gaps
        frame_start, frame_end = min(veh_lc_df.frame_id),max(veh_lc_df.frame_id)
        lc_snapshot = veh_lc_df[veh_lc_df.frame_id == lc_frame].squeeze()
        veh_id_front = lc_snapshot.veh_front
        veh_id_back = lc_snapshot.veh_back
        if (veh_id_front > 0) and (veh_id_back > 0):
            gap_series =  data.get_diff_series(frame_start, frame_end, veh_id_front, lc_snapshot.veh_id, 'local_y')
            if len(veh_lc_df) == len(gap_series):
                veh_lc_df['gap_length'] = gap_series.tolist()
            else:
                print "veh: %s lc_frame %s veh front: %s veh back: %s frame start: %s frame end: %s len(lc_df) %s len(series) %s" \
                %(lc_snapshot.veh_id, lc_frame, veh_id_front, veh_id_back, frame_start, frame_end, len(veh_lc_df), len(gap_series))
                veh_lc_df['gap_length'] = np.nan
                
        lane_change_dict[(from_lane,lane_id)].append(veh_lc_df)
        
print "Number of clean lane changes %s" %(clean_count)

In [None]:
# Save to pickle
with open('data/lane_change_dict.pickle','w') as f:
    pickle.dump(lane_change_dict,f)