In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from yellow_taxis.fetch import most_recent_dataset_date
from yellow_taxis.tasks.monthly_averages import AggregateMonthlyAveragesTask
from yellow_taxis.tasks.rolling_averages import (
    AggregateRollingAveragesTask,
    RollingAveragesTask,
)

%load_ext autoreload
%autoreload 2

## Visualize output of monthly averages

In [None]:
monthly_df = pd.read_parquet(AggregateMonthlyAveragesTask().get_output_path())
monthly_df.head()

In [None]:
def plot_trip_lenghs(durations: pd.Series, distances: pd.Series):
    fig, time_ax = plt.subplots(figsize=np.array([5, 3])*1.5, dpi=200)

    color = 'tab:blue'
    time_ax.set_ylabel("trip duration / minutes", color=color)
    (durations / 60).plot(
           ax=time_ax, color=color, label="trip duration"
    )
    time_ax.tick_params(axis='y', labelcolor=color)
    dist_ax = time_ax.twinx()  # instantiate a second axes that shares the same x-axis
    color = 'tab:red'
    dist_ax.set_ylabel("trip distance / Miles", color=color)
    distances.plot(ax=dist_ax, color=color, label="trip distance")
    dist_ax.tick_params(axis='y', labelcolor=color)
    time_ax.set_xlabel("year")
    fig.set_layout_engine("constrained")
    return fig, time_ax, dist_ax

In [None]:
plot_trip_lenghs(monthly_df["trip_duration_mean"], monthly_df["trip_distance_mean"])
plt.title("Average monthly trip durations and distances")
plt.savefig("trip_lenghts_monthly_averages.webp")

## Visualize output of rolling averages

In [None]:
rolling_df = pd.read_parquet(AggregateRollingAveragesTask().get_output_path())

In [None]:
plot_trip_lenghs(rolling_df["trip_duration"], rolling_df["trip_distance"])
plt.title("45 day rolling trip durations and distances")
plt.savefig("trip_lenghts_rolling_averages.webp")

### Just most recent month

In [None]:
most_recent_date = most_recent_dataset_date()
most_recent_rolling_avg_task = RollingAveragesTask(
    year=most_recent_date.year, month=most_recent_date.month
)
last_month_rolling_df = pd.read_parquet(
    most_recent_rolling_avg_task.get_output_path()
)

In [None]:
plot_trip_lenghs(
    last_month_rolling_df["trip_duration"],
    last_month_rolling_df["trip_distance"]
)
plt.title("45 day rolling trip durations and distances\n(most recent month)")
plt.savefig("trip_lenghts_rolling_averages_last_month.webp")