## KC Metro Data Exploration

Bootstrap the CSVs into dataframes.

In [126]:
## Get a dataframe going.

import os
import pandas as pd

AGENCY_DIR = 'kc_metro'

data = {
    "routes": {
        "csv": "routes.txt",
        "df": None
    },
    "stops": {
        "csv": "stops.txt",
        "df": None
    },
    "stop_times": {
        "csv": "stop_times.txt",
        "df": None
    },
    "trips": {
        "csv": "trips.txt",
        "df": None
    },
}

for header, vals in data.items():
    data_dir = os.path.join(os.getcwd(), os.pardir, os.pardir, 'data')
    data_csv =  os.path.join(data_dir, AGENCY_DIR, vals['csv'])
    data_df = pd.read_csv(data_csv)
    data[header]['df'] = data_df


#### Try to find all stops for a given route (from `routes.txt`)

In [127]:
ROUTE_ID = 100275

# Get some sample `trip_id` values for Route 8
# From `trips.txt`
trips_df = data['trips']['df']
trips_filtered_by_route = trips_df[trips_df['route_id'] == ROUTE_ID]['trip_id']
trips_filtered_by_route.head()


12523    572987776
12524    572987826
12525    572987836
12526    572987856
12527    572987866
Name: trip_id, dtype: int64

In [128]:
FIRST_TRIP_ID = trips_filtered_by_route.head(1).values[0]

# Filter `stop_times.txt` to get stop ids
stop_times_df = data['stop_times']['df']
stop_ids = stop_times_df[stop_times_df['trip_id'] == FIRST_TRIP_ID]['stop_id']

# Total number of stop counts
stop_ids.count()

np.int64(35)

In [129]:
# Join previously filtered stop_ids to stops.txt
# giving us the stop geometries

stops_df = data['stops']['df']

stops_filtered_by_route = pd.merge(stop_ids, stops_df)

stops_filtered_by_route.head()

Unnamed: 0,stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,stop_timezone,wheelchair_boarding
0,8402,8402,Mount Baker Transit Center - Bay 2,,47.577591,-122.297127,1,,0,,America/Los_Angeles,1
1,8672,8672,Martin L King Jr Way S & S Bayview St,,47.581486,-122.29702,1,,0,,America/Los_Angeles,1
2,36752,36752,Martin L King Jr Way S & S Walker St,,47.583851,-122.298042,1,,0,,America/Los_Angeles,1
3,12497,12497,Martin L King Jr Way S & S Massachusetts St,,47.588676,-122.297852,1,,0,,America/Los_Angeles,1
4,43765,43765,Martin L King Jr Way S & S Irving St,,47.59063,-122.297348,1,,0,,America/Los_Angeles,1


In [130]:
## Make a map

import folium

m = folium.Map(location=[47.617, -122.34], zoom_start=15)

## `Polyline()` Expects a list of coordinate tuples.
coord_data = list(zip(stops_filtered_by_route['stop_lat'], stops_filtered_by_route['stop_lon']))

## Get the route name

route_df = data['routes']['df']
route_num = route_df[route_df['route_id'] == ROUTE_ID]['route_short_name'].values[0]
route_name = route_df[route_df['route_id'] == ROUTE_ID]['route_desc'].values[0]
route_name_pretty = " | ".join([route_num, route_name])

folium.PolyLine(coord_data, tooltip=route_name_pretty).add_to(m)

## Display the map
m