In [10]:
import matplotlib.pyplot as plt
from datetime import datetime
import pandas as pd
import numpy as np
import requests
import glob
import json
%matplotlib inline

In [3]:
def haversine(lat1, lon1, lat2, lon2, to_radians=True, earth_radius=6371):
    if to_radians:
        lat1, lon1, lat2, lon2 = np.radians([lat1, lon1, lat2, lon2])

    a = np.sin((lat2-lat1)/2.0)**2 + \
        np.cos(lat1) * np.cos(lat2) * np.sin((lon2-lon1)/2.0)**2

    return earth_radius * 2 * np.arcsin(np.sqrt(a)) * 1000

In [4]:
def extract_displacement_speed_acceleration(csv_filepath):
    df = pd.read_csv(csv_filepath)
    df["Timestamp"] = df["Timestamp"].str.replace(".000-04:00", "").apply(lambda t: datetime.strptime(t, "%Y-%m-%dT%H:%M:%S"))
    bus_ids = df["ID"].unique()
    result = pd.DataFrame(columns=df.columns)
    for bus_id in bus_ids:
        bus_df = df[df["ID"] == bus_id]
        bus_df.insert(2, "Time difference (s)", bus_df["Timestamp"].diff().apply(lambda t: t.total_seconds()), True)
        bus_df.insert(9, "Traveled (m)", haversine(bus_df.Latitude.shift(), bus_df.Longitude.shift(), bus_df.loc[:, "Latitude"], bus_df.loc[:, "Longitude"]), True) 
        bus_df.insert(10, "Speed (m/s)", bus_df.apply(lambda row: row["Traveled (m)"]/row["Time difference (s)"] if row["Time difference (s)"] > 0 else np.nan, axis=1), True)
        bus_df.insert(11, "Change in Speed (m/s)", bus_df["Speed (m/s)"].diff(), True)
        bus_df.insert(12, "Acceleration (m/s^2)", bus_df.apply(lambda row: row["Change in Speed (m/s)"]/row["Time difference (s)"] if row["Time difference (s)"] > 0 else np.nan, axis=1), True)
        result = pd.concat([result, bus_df])
    return result

## June 24 2020

In [26]:
jun_24_df = extract_displacement_speed_acceleration("../data/bustime_log_data/2020-06-24.csv")
jun_24_df.head()

Unnamed: 0,ID,Timestamp,Route,Next stop,Destination,Bearing,Longitude,Latitude,Time difference (s),Traveled (m),Speed (m/s),Change in Speed (m/s),Acceleration (m/s^2)
0,4964,2020-06-24 05:01:28,M14A-SBS,HUDSON ST/W 12 ST,SELECT BUS WEST SIDE via 14 ST,262.61395,-74.005284,40.740057,,,,,
1,4964,2020-06-24 05:01:59,M14A-SBS,HUDSON ST/W 12 ST,SELECT BUS WEST SIDE via 14 ST,260.69006,-74.005574,40.738222,31.0,205.500323,6.629043,,
2,4964,2020-06-24 05:03:01,M14A-SBS,,SELECT BUS WEST SIDE via 14 ST,261.3268,-74.005661,40.73766,62.0,62.919967,1.014838,-5.614204,-0.090552
3,4964,2020-06-24 05:03:31,M14A-SBS,,SELECT BUS WEST SIDE via 14 ST,261.3268,-74.005661,40.73766,30.0,0.0,0.0,-1.014838,-0.033828
4,4964,2020-06-24 05:04:02,M14A-SBS,,SELECT BUS WEST SIDE via 14 ST,261.3268,-74.005661,40.73766,31.0,0.0,0.0,0.0,0.0


In [9]:
jun_24_df.to_csv("../data/kinematics_data/2020-06-24_kinematics.csv")

In [24]:
for csv_filepath in glob.glob("../data/bustime_log_data/*.csv"):
    try:
        df = extract_displacement_speed_acceleration(csv_filepath)
    except:
        continue
    name = csv_filepath[csv_filepath.rfind("/") + 1:csv_filepath.find(".csv")]
    print(name)
    df.to_csv(f"../data/kinematics_data/{name}_kinematics.csv")

2020-06-27
2020-06-16
2020-07-01
2020-07-02
2020-06-29
2020-06-25
2020-06-24
2020-07-03
2020-06-30
2020-06-19
2020-06-17
2020-06-28
