# Analysis of weather effects on Citibike trips

In [None]:
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as datetime

## Read data
* trips
* weather

In [None]:
DATA_DIR = "../data/"
YEARLY_TRIPS_DIR = DATA_DIR + "tripdata_parquet/NY/"
WEATHER_DIR = DATA_DIR + "weather/"
PARQUET_EXTENSION = ".parquet"
TRIPS_COLUMNS = [
    "tripduration",
    "starttime",
    "stoptime",
    "startstationid",
    "endstationid",
    "bikeid",
    "usertype",
    "birthyear",
    "gender",
]

In [None]:
%%time
# read trips parquet, reset index, and drop dask column
trips = pd.read_parquet(
    YEARLY_TRIPS_DIR + "2019" + PARQUET_EXTENSION,
    columns=TRIPS_COLUMNS,
    engine="pyarrow",
).reset_index()
trips.drop(trips.columns[0], axis=1, inplace=True)  # drop the dask index

# manually change dtype of columns for trips df
trips["tripduration"] = trips["tripduration"].astype("int32")
trips["startstationid"] = trips["startstationid"].astype("int16")
trips["endstationid"] = trips["endstationid"].astype("int16")
trips["bikeid"] = trips["bikeid"].astype("int32")
trips["birthyear"] = trips["birthyear"].astype("int16")
trips["gender"] = trips["gender"].astype("int8")
trips["usertype"] = trips["usertype"].astype("category")
trips["starttime"] = pd.to_datetime(trips["starttime"])
trips["stoptime"] = pd.to_datetime(trips["stoptime"])

trips

In [None]:
# read weather
daily_weather = pd.read_csv(WEATHER_DIR + "GHCN-Daily-Cleaned.csv", index_col=0)

## Helper Functions

In [None]:
# https://stackoverflow.com/questions/36271302/changing-color-scale-in-seaborn-bar-plot
def colors_from_values(values: pd.Series, palette_name: str, ascending=True):
    # convert to indices
    values = values.sort_values(ascending=ascending).reset_index()
    indices = values.sort_values(by=values.columns[0]).index
    # use the indices to get the colors
    palette = sns.color_palette(palette_name, len(values))
    return np.array(palette).take(indices, axis=0)

## Weather EDA