# Create station pairs with rebalance counts
Data will be used to draw arcs via pydeck + deck.gl

In [None]:
import pandas as pd
import pydeck as pdk
import dash_deck
import os

# import our helpers module. appending parent directory ("..") to sys.path might be necessary for helpers to be imported on some systems
import sys

sys.path.insert(0, "..")
import helpers

In [None]:
DATA_DIR = "data/"
STATIONS_DIR = DATA_DIR + "stations/"
REBALANCE_DIR = DATA_DIR + "rebalance_parquet/"
STATIONS_PAIRS_DIR = DATA_DIR + "stations-pairs/"

TRIP_YEAR = 2019

In [None]:
# read stations
stations = pd.read_csv(STATIONS_DIR + "stations.csv", index_col=0)
stations["stationid"] = stations["stationid"].astype("int64")
stations.drop(
    ["capacity", "neighbourhood", "boro", "zipcode", "elevation_ft"],
    axis=1,
    inplace=True,
)
stations

In [None]:
rebpairs = pd.read_parquet(
    REBALANCE_DIR + "rebalance_pairs" + helpers.PARQUET_EXTENSION, engine="pyarrow"
)
rebpairs = rebpairs.loc[rebpairs.rebal_year == TRIP_YEAR].drop("rebal_year", axis=1)
rebpairs["stationid_from"] = rebpairs["stationid_from"].astype("int")
rebpairs["stationid_to"] = rebpairs["stationid_to"].astype("int")
rebpairs = rebpairs[["stationid_from", "stationid_to", "rebal_count"]]
rebpairs = rebpairs.sort_values(by="rebal_count", ascending=False).head(100)
rebpairs

In [None]:
_from = pd.merge(
    left=rebpairs,
    right=stations,
    left_on="stationid_from",
    right_on="stationid",
    how="left",
)
_from.drop("stationid", axis=1, inplace=True)
_from.rename(
    columns={
        "latitude": "latitude_from",
        "longitude": "longitude_from",
        "stationname": "stationname_from",
    },
    inplace=True,
)
_from

In [None]:
_to = pd.merge(
    left=_from, right=stations, left_on="stationid_to", right_on="stationid", how="left"
)
_to.rename(
    columns={
        "latitude": "latitude_to",
        "longitude": "longitude_to",
        "stationname": "stationname_to",
    },
    inplace=True,
)
_to.insert(0, "rebal_count", _to.pop("rebal_count"))
_to

In [None]:
station_pairs = _to
station_pairs

In [None]:
if not os.path.exists(STATIONS_PAIRS_DIR):
    os.mkdir(STATIONS_PAIRS_DIR)

station_pairs.to_csv(STATIONS_PAIRS_DIR + "pairs" + helpers.CSV_EXTENSION)