In [None]:
import datetime as dt
import sys, os
import dateutil.relativedelta as rd
import json
import pathlib as pl
from typing import List
import warnings

import pandas as pd
import numpy as np
import geopandas as gpd
import shapely
import shapely.geometry as sg
import shapely.ops as so

DIR = pl.Path('..')
sys.path.append(str(DIR))

import gtfs_kit as gk


warnings.filterwarnings('ignore')

%load_ext autoreload
%autoreload 2

DATA = DIR / 'data'

In [None]:
#feed = gk.read_feed(DATA / "cairns_gtfs.zip", dist_units='km')
path = pl.Path.home() / "Desktop"
feed = gk.read_feed(pl.Path.home() / "Desktop" / "gtfs_feed_test.zip", dist_units="km")
feed.list_fields()

In [None]:
feed.calendar

In [None]:
import functools as ft

def compute_trip_activity(feed: "Feed", dates: list[str]) -> pd.DataFrame:
    """
    Mark trips as active or inactive on the given dates (YYYYMMDD date strings).
    Return a table with the columns

    - ``'trip_id'``
    - ``dates[0]``: 1 if the trip is active on ``dates[0]``;
      0 otherwise
    - ``dates[1]``: 1 if the trip is active on ``dates[1]``;
      0 otherwise
    - etc.
    - ``dates[-1]``: 1 if the trip is active on ``dates[-1]``;
      0 otherwise

    If ``dates`` is ``None`` or the empty list, then return an
    empty DataFrame.
    """
    dates = feed.subset_dates(dates)
    if not dates:
        return pd.DataFrame()

    frames = [feed.trips[["trip_id"]]]
    for date in dates:
        frames.append(
            gk.get_trips(feed, date)[["trip_id"]].assign(**{date: 1})
        )
    print(dates)
    f = (
        ft.reduce(
            lambda left, right: left.merge(right, how="outer"),
            frames
        )
        .fillna({date: 0 for date in dates})
    )
    # Convert date columns to integers 
    f[dates] = f[dates].astype(int)
    return f



In [None]:
compute_trip_activity(feed, feed.get_first_week())

In [None]:
feed.compute_trip_activity(feed.get_first_week())

In [None]:
#path = DATA / 'cairns_gtfs.zip'
path = pl.Path.home() / "Desktop" / "gtfs.zip"
feed = gk.read_feed(path, dist_units='km')
feed.describe()

In [None]:
feed.get_shapes(as_gdf=True)

In [None]:
gk.geometrize_shapes(feed.shapes.iloc[:1])

In [None]:
ts = feed.compute_trip_stats()
display(ts.head().T)
ts.to_csv(DATA / "cairns_trip_stats.csv", index=False)

In [None]:
# Maybe a useful function to add
def compute_stop_time_stats(feed):
    if "shape_dist_traveled" in feed.stop_times.columns:
        st = feed.stop_times.copy()
    else:
        st = feed.append_dist_to_stop_times().stop_times
    f = st.sort_values(["trip_id", "stop_sequence"])

    # Choose strategy for calculating duration to next stop
    if (
        f["arrival_time"].count() > 0.5 * f["arrival_time"].shape[0] 
        and f["departure_time"].count() > 0.5 * f["departure_time"].shape[0]
    ):
        # Use stop departure to next stop arrival as duration
        f["atime"] = f["arrival_time"].map(gk.timestr_to_seconds)
        f["dtime"] = f["departure_time"].map(gk.timestr_to_seconds)
        f["duration_to_next_stop"] = f.groupby("trip_id")["atime"].shift(-1) - f["dtime"] 
    elif f["arrival_time"].count() > 0.5 * f["arrival_time"].shape[0]:
        # Use stop arrival to next stop arrival as duration
        f["atime"] = f["arrival_time"].apply(gk.timestr_to_seconds)
        f["duration_to_next_stop"] = f.groupby("trip_id")["atime"].diff().shift(-1) 
    else:
        # Nothing satisfactory, so just use departure times and get many NAs
        f["dtime"] = f["departure_time"].map(gk.timestr_to_seconds)
        f["duration_to_next_stop"] = f.groupby("trip_id")["dtime"].diff().shift(-1) 

    # convert to km or mi
    if gk.is_metric(feed.dist_units):
        g = gk.get_convert_dist(feed.dist_units, "km")
    else:
        g = gk.get_convert_dist(feed.dist_units, "mi")

    f["distance_to_next_stop"] = f.groupby('trip_id')['shape_dist_traveled'].diff().shift(-1)  # feed.dist_units
    f["speed_to_next_stop"] = f["distance_to_next_stop"].map(g) / (f["duration_to_next_stop"] / 3600) # km/h or mi/h
    return f.drop(["atime", "dtime"], axis=1)

compute_stop_time_stats(feed).head(38)
    


In [None]:
g = feed.geometrize_stops()
p = g["geometry"].iat[0]
type(p)
q = sg.MultiPoint([p, p])
list(q.geoms)

In [None]:
feed.map_trips(trip_ids[:1], include_stops=True, include_arrows=True)

In [None]:
dates = feed.get_first_week()[6:]
activity = feed.compute_trip_activity(dates)
display(activity)

trip_ids = activity.loc[
    lambda x: x.filter(dates).sum(axis=1) > 0,
    "trip_id",
]
trip_ids

In [None]:
feed.trips

In [None]:
dates = feed.get_first_week()[:1] + ["20010101"]

path = DATA_DIR / "cairns_screen_lines.geojson"
screen_lines = gpd.read_file(path)
f = feed.compute_screen_line_counts(screen_lines, dates)
f

In [None]:
path = DATA_DIR/'cairns_gtfs.zip'
gk.list_feed(path)



In [None]:
feed = (
    gk.read_feed(path, dist_units='km')
    .append_dist_to_stop_times()
)
feed.describe()

In [None]:
shapes = feed.geometrize_shapes(use_utm=True)
shapes.crs
screen_lines = screen_lines.to_crs(shapes.crs)
screen_lines

In [None]:
url = "http://transitfeeds.com/p/10-15-transit/936/latest/download"
gk.read_feed(url, dist_units="km").describe()

In [None]:
(
    g1.crossing_time.equals(g2.crossing_time),
    g1.crossing_direction.equals(g2.orientation)
)

In [None]:
feed.map_routes(feed.routes.route_id.iloc[:4], include_stops=False)
