In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
import polars as pl 
from pathlib import Path
from datetime import datetime

from sklearn.metrics import mean_absolute_error

from src.plots import plot_ts
from src.paths import *
from src.config import TODAY_IS, TS_INDEX
from src.dwh import run_database_operation
from src.train import split_into_train_and_test
from src.pipeline import model

# Load from DWH

In [3]:
df = run_database_operation(
    operation="fetch_pickup_data",
    from_date=datetime(2022, 1, 1),
    to_date=TODAY_IS,
    pickup_locations=[43]
)


train, test = split_into_train_and_test(df)

In [4]:
train.shape, test.shape, train.head()

((426, 3),
 (90, 3),
 shape: (5, 3)
 ┌──────────────────────┬────────────────────┬────────────┐
 │ pickup_datetime_hour ┆ pickup_location_id ┆ num_pickup │
 │ ---                  ┆ ---                ┆ ---        │
 │ date                 ┆ i16                ┆ f64        │
 ╞══════════════════════╪════════════════════╪════════════╡
 │ 2022-01-01           ┆ 43                 ┆ 864.0      │
 │ 2022-01-02           ┆ 43                 ┆ 991.0      │
 │ 2022-01-03           ┆ 43                 ┆ 1246.0     │
 │ 2022-01-04           ┆ 43                 ┆ 1334.0     │
 │ 2022-01-05           ┆ 43                 ┆ 1306.0     │
 └──────────────────────┴────────────────────┴────────────┘)

## Baseline model

- Features: 1d, 7d, 14d, 28d
- Prediction: Average of features
- Forecast horizon: Next day

In [5]:
model.fit(train)
predictions = model.predict(train)
test_predictions = model.predict(test)


In [6]:
train_with_predicitions = train.join(predictions, on=TS_INDEX, how="inner")
test_with_predictions = test.join(test_predictions, on=TS_INDEX, how="inner")



plot_ts(train_with_predicitions, ["num_pickup", "prediction"])
plot_ts(test_with_predictions, ["num_pickup", "prediction"])


train_mae = mean_absolute_error(train_with_predicitions["num_pickup"], train_with_predicitions["prediction"])
test_mae = mean_absolute_error(test_with_predictions["num_pickup"], test_with_predictions["prediction"])

print(f"Train MAE: {train_mae:.2f}, Test MAE: {test_mae:.2f}")




Train MAE: 187.63, Test MAE: 153.41
