# Traffic count data processing

## Traffic studies often start with traffic count data that consist of vehicle type, cardinal direction and time a vehicle or pedestrian moved through an intersection.  This notebook will walk you through some ways to use python to clean and organize the data for further analysis.

## Python modules that need to be imported

In [2]:
# imports
import pandas as pd
import numpy as np
import datetime
from datetime import timedelta
import ast

## Move sample data into a pandas dataframe

#### Below is a picture of the sample data.

<img src = "images/sample_data.png", width = 180, height= 70>

### *sample data consists of three columns
1. movment type (mvmt) 
2. timestamp (time) 
3. type of vehical (veh_type)

#### Read the CSV file to a pandas dataframe

In [None]:
# define a variable that contains the raw string file path to where you saved the csv file
# * using the raw format is probably not nessecary for Mac users
file_path = r'.....\ETL-Traffic_Counts\sample-data\csv_sample.csv'

In [49]:
file_path = r'C:\Users\aregel\Documents\ETL-Traffic_Counts\sample-data\csv_sample.csv'
# 

In [60]:
df_0 = pd.read_csv(file_path)
df_0.head(10)

Unnamed: 0,mvmt,time,veh_type
0,EBL,10/13/2007 6:48,car
1,WBT,10/13/2007 6:38,car
2,EBR,10/13/2007 8:20,semi trucks
3,NBR,10/13/2007 10:16,car
4,EBT,10/13/2007 8:54,Pedestrian
5,EBT,10/13/2007 10:51,car
6,EBT,10/13/2007 7:50,truck
7,WBT,10/13/2007 7:46,car
8,SBT,10/13/2007 7:43,car
9,WBR,10/13/2007 9:57,car


#### Define some helper functions that take a dataframe with traffic count data and modifies the pedestrian movements and re-labels the semi truck vehicle type to just Truck

In [13]:
# map_mvmt_labels takes a dataframe and re-labels the pedestrian movement types so that they are conform to standard labeling 
def map_mvmt_labels(df):
    # Mvmt type map
    direction_map = {'EBL':'EBL', 'EBR':'EBR', 'EBT':'EBT', 'NBL':'NBL', 'NBR':'NBR', 'NBT':'NBT',
                     'SBL':'SBL', 'SBR':'SBR', 'SBT':'SBT', 'WBL':'WBL', 'WBR':'WBR', 'WBT':'WBT',
                     'PNER':'WBT', 'PNWL':'SBL', 'PSEL':'NBL', 'PSER':'NBT', 'PSWL':'EBL',
                     'PSWR':'EBT'}
    
    # applies mvmt type map 
    mapped_mvmt = []
    for direction in df.mvmt:
        mapped_mvmt.append(direction_map[direction])
    df.mvmt = mapped_mvmt
    
    return df

# map_veh_type_labels takes a dataframe and re-labels semi-trucks to Truck
def map_veh_type_labels(df):
    # veh_type map that converts semi trucks to trucks
    veh_map = {'car':'Car', 'truck':'Truck', 'semi trucks':'Truck', 'Pedestrian':'Pedestrian'}
    # applies veh_type map
    mapped_veh_type = []
    for veh in df.veh_type:
        mapped_veh_type.append(veh_map[veh])
    df.veh_type = mapped_veh_type
    return df
    

#### Clean the data by applying the helper functions and converting the 'time' series to datetime objects

In [61]:
# apply all the helper functions and convert time to datetime
def clean_data(df):
    df['time'].apply(lambda x: pd.to_datetime(x))
    df = map_mvmt_labels(df)
    df = map_veh_type_labels(df)
    return df

In [None]:


# takes csv count data and returns parsed data table with multi-index
def df_veh_counts(file_path, date, min_time_interval):
    df_0 = pd.read_csv(file)
    df_1 = map_labels(df_0)
    count = np.ones(len(df_1.TIME))
    date = date
    df_1['COUNT'] = count
    
    df_1['datetime'] = date + ' ' + df_1.TIME
    df_1['datetime'] = pd.to_datetime(df_1['datetime'])
    data = df_1[['Mapped Button ID','Mapped VEH Type', 'datetime', 'COUNT']]
    data.index = pd.to_datetime(data.datetime)
    # df.pivot(index='date', columns='variable', values='value')
    table = data.pivot_table(index=data.index, columns = ['Mapped Button ID','Mapped VEH Type'], values = 'COUNT')
    table_noNan= table.fillna(value=0)
    sample_time = min_time_interval + 'T'
    table_resampled = table_noNan.resample(sample_time).sum()
    final_table = table_resampled.fillna(value=0)
    return final_table

# takes parsed multi-index dataframe(df), returns total sum of ALL cars
def total_cars(df):
    mvmt_type = ['EBL', 'EBR', 'EBT', 'NBL', 'NBR', 'NBT','SBL', 'SBR', 'SBT', 'WBL', 'WBR', 'WBT']
    total_cars = 0
    for mvmt in mvmt_type:
        total_cars = total_cars + df[mvmt]['Car'][1]
    return total_cars

# takes csv file(file), the date the count was done(date), 
# how you want to bin the time in minutes (min_time_interval), and how you want to define Peak time
# in hours (time_delta)
# returns the TOTAL (cars + trucks +peds) traffic for the peak hour
def total_peak_hr_traffic(file, date, min_time_interval, time_delta):
    data = df_veh_counts(file, date, min_time_interval)
    peak_hr_df = peak_hr_data(data, time_delta)
    sums = peak_hr_df.sum().sum()
    return sums

# takes parsed multi-index dataframe, returns total sum of ALL counts (including pedestrians/bike)
def total_sum(df):
    return df.sum().sum()

# takes the parsed data table and peak time delta and returns the peak traffic for the time delta
def find_peak_hour(df, time_delta=1):
    time_slice_dict = {}
    for i in range(0,len(df),1):
        start_date = df.index[i]
        end_date = start_date + timedelta(hours=time_delta)
        time_slice = df[start_date : end_date]
        total_sum = time_slice.sum().sum()
        time_slice_dict[total_sum] = [start_date, end_date]
    lst = [key for key,value in time_slice_dict.items()]
    largest_value = max(lst)
    peak_hour = time_slice_dict[largest_value]
    return peak_hour
# takes the parsed multi-index dataframe, returns a dataframe with ONLY PEAK HOUR data
def peak_hr_data(df, time_delta):
    
    time_interval = find_peak_hour(df, time_delta)
    start = time_interval[0]
    end = time_interval[1]
    peak_data = df[start:end]
    return peak_data
# takes csv file(file), the date the count was done(date), 
# how you want to bin the time in minutes (min_time_interval),and how you want to define Peak time
# in hours (time_delta)
# returns the TOTAL VEH (cars + trucks) traffic for the PEAK HOUR
def total_peak_hr_traffic(file_path, date, min_time_interval,time_delta):
    mvmt_type = ['EBL', 'EBR', 'EBT', 'NBL', 'NBR', 'NBT','SBL', 'SBR', 'SBT', 'WBL', 'WBR', 'WBT']
    total_veh = 0
    data = df_veh_counts(file, date, min_time_interval)
    df = peak_hr_data(data, timeD)
    for mvmt in mvmt_type:
        total_veh = total_veh + df[mvmt]['Car'][1] + df[mvmt]['Truck'][1]
    return total_veh
# takes df slice (peak hour ONLY), returns dict of key = Button ID, value = sub-totals
def sub_total_dict(peak_hr_df):
    mvmt_type = ['EBL', 'EBR', 'EBT', 'NBL', 'NBR', 'NBT','SBL', 'SBR', 'SBT', 'WBL', 'WBR', 'WBT']
    intersect_name = {}
    for label in mvmt_type:
        key = label
        value = peak_hr_df[label].sum()
        intersect_name[key] = value
    return intersect_name
def current_numbers(file_path, date, min_time_interval, timeD, road_mvmt_lst):
    data = peak_hr_data(file_path, date, min_time_interval, timeD)
    labels = list(road_mvmt_lst)
    road_name = []
    AM = []
    PM = []
    mvmts = []
    mvmt_type = ['EBL', 'EBR', 'EBT', 'NBL', 'NBR', 'NBT','SBL', 'SBR', 'SBT', 'WBL', 'WBR', 'WBT']
    for label in labels:
        road_name.append(label[0])
        mvmts.append(label[1])
    for mvmt in mvmt_type:
        AM.append(data[mvmt]['Car'][1] + data[mvmt]['Truck'][1])
    numbers = pd.DataFrame(
        {'road_name': road_name,
         'mvmts': mvmts,
         'AM': AM
        })
    return numbers

##  **_This section asks you some questions_**

In [None]:
file = r'C:\Users\aregel\Documents\McDowell_eng\Steamboat\workproduct\2015-10-13_M1195_Traffic_Counts_noNaN.xlsx'
[('EBL', 'US 40'),
 ('EBR', 'US 40'),
 ('EBT', 'US 40'),
 ('NBL', 'RiverRoad'),
 ('NBR', 'RiverRoad'),
 ('NBT', 'RiverRoad'),
 ('SBL', 'RiverRoad'),
 ('SBR', 'RiverRoad'),
 ('SBT', 'RiverRoad'),
 ('WBL', 'US 40'),
 ('WBR', 'US 40'),
 ('WBT', 'US 40')]

In [None]:
file_input = input("Input the path to the file you want to process")
road_mvmt = input("List the name of the road and the turn ID, e.g. ('US 40', 'EBL'), ('US 40','EBT)..")
data = input("What is the date the traffic count was taken?")
min_time_interval = input("What is the time interval you want to break the data into (e.g. '15')")
timeD = input('How long(in hours) is the "Peak Time"')


In [None]:
road_mvmt_lst = list(ast.literal_eval(road_mvmt))
file = file_input.encode(encoding='UTF-8',errors='strict')
date = data

current_numbers(file, date, min_time_interval, timeD, road_mvmt_lst)

In [None]:
type(r'Hi')


In [None]:
road_mvmt = input("List the name of the road and the turn ID, e.g. ('US 40', 'EBL'), ('US 40','EBT)..")

In [None]:
list(ast.literal_eval(road_mvmt))