In [17]:
import matplotlib.pyplot as plt
import pandas as pd
import geopandas as gpd
import numpy as np
from darksky.api import DarkSky

from Algorithms.general_data_preprocessing import *
from Algorithms.etrim_data_manipulation import *
from Algorithms.shapefile_analysis import *

In [2]:
interstate_list = ['I0003', 'I0022', 'I0024', 'I0026', 'I0040',
                   'I0055', 'I0065', 'I0069', 'I0075', 'I0081',
                   'I0124', 'I0140', 'I0155', 'I0169', 'I0181',
                   'I0240', 'I0255', 'I0265', 'I0269', 'I0275',
                   'I0440', 'I0475', 'I0640', 'I0840']

darksky_key = '6d720fda16cbaa741f049c73e622a64f'

In [5]:
# step 1: obtain dataset
etrim_data = pd.read_csv("Data/etrim.csv")
shapefile = gpd.read_file('Data/tennessee2017/Tennessee2017.shp')

  interactivity=interactivity, compiler=compiler, result=result)


In [7]:
# step 2: general data preproessing
# step 2(a): filter interstates
interstate_etrim = etrim_interstate_filter(etrim_data, interstate_list)
interstate_shapefile = shapefile_interstate_filter(shapefile, interstate_list)
# step 2(b): clean bad record
interstate_etrim = etrim_cleaner(interstate_etrim)
interstate_shapefile = shapefile_cleaner(interstate_shapefile)
# step 2(c): filter important features
interstate_etrim = etrim_feature_filter(interstate_etrim)
interstate_shapefile = shapefile_feature_filter(interstate_shapefile)

In [8]:
interstate_etrim.head()

Unnamed: 0,GPS Coordinate Latitude,GPS Coordinate Longitude,Date of Crash,Time of Crash,Weather Cond,Light Conditions
0,36.1784,-84.09151,1/12/17 0:00,1010,Rain,Daylight
1,36.21026,-84.13017,1/30/17 0:00,2308,Clear,Dark-Not Lighted
2,35.84854,-88.07133,1/11/17 0:00,52,Cloudy,Dark-Not Lighted
3,35.12685,-85.0212,1/11/17 0:00,715,Rain,Dawn
4,35.13047,-85.00501,1/18/17 0:00,1535,Cloudy,Daylight


In [22]:
interstate_shapefile.head()

Unnamed: 0,Route_ID,Begin_Poin,End_Point,Route_Numb,Route_Name,Urban_Code,County_Cod,Truck_NN,Through_La,Speed_Limi,AADT,AADT_Singl,AADT_Combi,IRI,geometry
0,54I0075001,23.9,24.0,75,75,99999,107,1,4,70,38210,2293,11463,32,LINESTRING (-84.53457719599999 35.578495521000...
1,57I0040001,25.4,25.5,40,40,99999,113,1,4,0,36920,2584,10707,67,LINESTRING (-88.65077860999997 35.712850435000...
2,47I0040001,29.5,29.6,40,40,45640,93,1,6,0,71000,2130,23430,42,LINESTRING (-83.76576149199997 36.003828577000...
3,82I0081001,2.1,2.2,81,81,45235,163,1,4,0,28010,1401,9523,41,LINESTRING (-82.54077927299994 36.444400962000...
4,54I0075001,8.3,8.4,75,75,99999,107,1,4,0,38120,2287,11436,36,LINESTRING (-84.70818958099994 35.402718893000...


In [10]:
# step 3: etrim data manipulation
# step 3(a): process temporal features
temporal_features = process_temporal(interstate_etrim)
interstate_etrim = pd.concat([interstate_etrim.loc[:, :'GPS Coordinate Longitude'], temporal_features, interstate_etrim.loc[:, 'Weather Cond':]], axis=1)
derived_temporal_features = derive_temporal(interstate_etrim)
interstate_etrim = pd.concat([interstate_etrim.loc[:, :'timestamp'], derived_temporal_features, interstate_etrim.loc[:, 'Weather Cond':]], axis=1)
# step 3(b): aggregate dark sky data
darksky = DarkSky(darksky_key)
weather_data = get_darksky_feature(interstate_etrim, darksky)
interstate_etrim = pd.concat([interstate_etrim.loc[:, :'timestamp'], weather_data, interstate_etrim.loc[:, 'Light Conditions':]], axis=1)

In [11]:
interstate_etrim.head()

Unnamed: 0,GPS Coordinate Latitude,GPS Coordinate Longitude,time,year,month,day,hour,minute,weekday,timestamp,temperature,cloud_cover,dew_point,humidity,precip_intensity,precip_probability,uv_index,visibility,wind_speed,Light Conditions
0,36.1784,-84.09151,2017-01-12T10:10:00,2017,1,12,10,10,3,1484237000.0,61.3,1.0,54.53,0.78,0.0,0.0,2.0,9.997,7.37,Daylight
1,36.21026,-84.13017,2017-01-30T23:08:00,2017,1,30,23,8,0,1485839000.0,39.35,0.0,27.33,0.62,0.0,0.0,0.0,9.997,2.44,Dark-Not Lighted
2,35.84854,-88.07133,2017-01-11T00:52:00,2017,1,11,0,52,2,1484118000.0,56.34,1.0,54.3,0.93,0.0,0.0,0.0,9.997,3.86,Dark-Not Lighted
3,35.12685,-85.0212,2017-01-11T07:15:00,2017,1,11,7,15,2,1484140000.0,52.17,1.0,50.19,0.93,0.0023,0.18,0.0,4.429,3.68,Dawn
4,35.13047,-85.00501,2017-01-18T15:35:00,2017,1,18,15,35,2,1484775000.0,62.05,0.11,46.11,0.56,0.0,0.0,0.0,8.858,1.3,Daylight


In [23]:
# finish step 3, write to file
interstate_etrim.to_csv("Data/interstate_etrim.csv")

In [24]:
# step 4: shapefile analysis
# step 4(a)(b): haversine distance, sinuosity, menger curvature
shape_features = haversine_sinuosity_menger(interstate_shapefile)
interstate_shapefile = pd.concat([interstate_shapefile.loc[:, :'IRI'], shape_features, interstate_shapefile['geometry']], axis=1)
# step 4(c): is_ramp
is_ramp = ramp(interstate_shapefile)
interstate_shapefile = pd.concat([interstate_shapefile.loc[:, :'curvature'], is_ramp, interstate_shapefile['geometry']], axis=1)

In [26]:
# step 4(d): number of intersection
count = count_intersections(interstate_shapefile)
interstate_shapefile = pd.concat([interstate_shapefile.loc[:, :'is_ramp'], count, interstate_shapefile['geometry']], axis=1)
for index, row in interstate_shapefile.iterrows():
    if row['is_ramp'] == 1:
        interstate_shapefile.at[index, 'count'] = 2

In [52]:
interstate_shapefile.head()

Unnamed: 0,Route_ID,Begin_Poin,End_Point,Route_Numb,Route_Name,Urban_Code,County_Cod,Truck_NN,Through_La,Speed_Limi,AADT,AADT_Singl,AADT_Combi,IRI,length,sinuosity,curvature,is_ramp,intersection_count,geometry
0,54I0075001,23.9,24.0,75,75,99999,107,1,4,70,38210,2293,11463,32,0.078048,1.0,0.006547,0,0.0,LINESTRING (-84.53457719599999 35.578495521000...
1,57I0040001,25.4,25.5,40,40,99999,113,1,4,0,36920,2584,10707,67,0.114347,1.0,0.001402,0,0.0,LINESTRING (-88.65077860999997 35.712850435000...
2,47I0040001,29.5,29.6,40,40,45640,93,1,6,0,71000,2130,23430,42,0.122089,1.0,0.002554,0,0.0,LINESTRING (-83.76576149199997 36.003828577000...
3,82I0081001,2.1,2.2,81,81,45235,163,1,4,0,28010,1401,9523,41,0.119948,1.000004,0.072003,0,3.0,LINESTRING (-82.54077927299994 36.444400962000...
4,54I0075001,8.3,8.4,75,75,99999,107,1,4,0,38120,2287,11436,36,0.071896,1.000001,0.021869,0,0.0,LINESTRING (-84.70818958099994 35.402718893000...


In [44]:
interstate_shapefile.to_file('Data/interstate_shapefile/interstate_shapefile.shp')

In [60]:
# step 5: spatial join etrim and shapefile
etrim_onto_shapefile = gpd.read_file('Data/etrim_onto_shapefile/etrim_onto_shapefile.shp')
shapefile_onto_etrim = gpd.read_file('Data/shapefile_onto_etrim/shapefile_onto_etrim.shp')

In [66]:
etrim_onto_shapefile = etrim_onto_shapefile.loc[:, 'Route_ID':'Count_']
shapefile_onto_etrim = shapefile_onto_etrim.loc[:, 'GPS_Coordi':'intersection_count']

In [67]:
etrim_onto_shapefile.head()

Unnamed: 0,Route_ID,Begin_Poin,End_Point,Route_Numb,Route_Name,Urban_Code,County_Cod,Truck_NN,Through_La,Speed_Limi,AADT,AADT_Singl,AADT_Combi,IRI,length,sinuosity,curvature,is_ramp,count,Count_
0,54I0075001,23.9,24.0,75,75,99999,107,1,4,70,38210,2293,11463,32,0.078048,1.0,0.006547,0,0.0,1
1,57I0040001,25.4,25.5,40,40,99999,113,1,4,0,36920,2584,10707,67,0.114347,1.0,0.001402,0,0.0,2
2,47I0040001,29.5,29.6,40,40,45640,93,1,6,0,71000,2130,23430,42,0.122089,1.0,0.002554,0,0.0,2
3,82I0081001,2.1,2.2,81,81,45235,163,1,4,0,28010,1401,9523,41,0.119948,1.000004,0.072003,0,3.0,0
4,54I0075001,8.3,8.4,75,75,99999,107,1,4,0,38120,2287,11436,36,0.071896,1.000001,0.021869,0,0.0,1


In [68]:
shapefile_onto_etrim.head()

Unnamed: 0,GPS_Coordi,GPS_Coor_1,time_,year_,month_,day_,hour_,minute_,weekday,timestamp_,...,Speed_Limi,AADT,AADT_Singl,AADT_Combi,IRI,length,sinuosity,curvature,is_ramp,count
0,36.15314,-86.85298,2017-01-14T15:13:00,2017,1,14,15,13,5,1484428000.0,...,0,128940,2579,14183,121,0.059476,1.0,0.015223,0,4.0
1,35.03496,-85.16427,2017-01-06T23:00:00,2017,1,6,23,0,4,1483765000.0,...,55,90560,2717,19923,91,0.048815,1.0,0.017798,0,1.0
2,36.06971,-86.76997,2017-01-20T07:38:00,2017,1,20,7,38,4,1484919000.0,...,0,184700,3694,7388,37,0.019369,1.00001,0.171159,0,0.0
3,36.07049,-86.77003,2017-01-29T11:20:00,2017,1,29,11,20,6,1485710000.0,...,0,184700,3694,7388,66,0.024276,1.000021,0.294403,0,0.0
4,36.07093,-86.76993,2017-01-26T22:40:00,2017,1,26,22,40,3,1485492000.0,...,0,184700,3694,7388,66,0.024276,1.000021,0.294403,0,0.0


In [59]:
# finish step 5, write to file
etrim_onto_shapefile.to_csv('Data/etrim_onto_shapefile.csv')
shapefile_onto_etrim.to_csv('Data/shapefile_onto_etrim.csv')