In [None]:
import pandas as pd



In [None]:
future_covariates_df = pd.read_parquet("../data/pipeline_runs/future_covariates.parquet")
past_covariates_df = pd.read_parquet("../data/pipeline_runs/past_covariates.parquet")
target_train_df = pd.read_parquet("../data/pipeline_runs/train_targets_daily.parquet")
target_test_df = pd.read_parquet("../data/pipeline_runs/test_targets_daily.parquet")

In [None]:
# plot all columns in all dataframes as individual time series plots (one plot per column)
import matplotlib.pyplot as plt

dataframes = {
    'Future Covariates': future_covariates_df,
    'Past Covariates': past_covariates_df,
    'Target Train': target_train_df,
    'Target Test': target_test_df  
}

for df_name, df in dataframes.items():
    # Set InvoiceDate as index if it's a column
    if 'InvoiceDate' in df.columns:
        df = df.set_index('InvoiceDate').sort_index()
    
    print(f"\n{df_name} - Shape: {df.shape}")
    
    # Plot each column
    for col in df.columns:
        plt.figure(figsize=(12, 4))
        plt.plot(df.index, df[col], linewidth=1)
        plt.title(f"{df_name} - {col}")
        plt.xlabel("Date")
        plt.ylabel(col)
        plt.grid(True, alpha=0.3)
        plt.xticks(rotation=45)
        plt.tight_layout()
        plt.show()


In [None]:
print(target_test_df.columns)
print(target_train_df.columns)

In [None]:
from darts import TimeSeries
future_covariates_columns = ["is_holiday"]
past_covariates_columns = ["num_transactions", "num_unique_customers", 
                           "num_unique_articles", "avg_basket_size", 
                           "avg_unit_price"]
target_columns = ["Quantity"]
time_index_column = "InvoiceDate"

future_covariates = TimeSeries.from_dataframe(
    future_covariates_df, 
    time_col=time_index_column, 
    value_cols=future_covariates_columns,
    fill_missing_dates=True,
    fillna_value=0, 
    freq='D',
)
past_covariates = TimeSeries.from_dataframe(
    past_covariates_df, 
    time_col=time_index_column,
    value_cols=past_covariates_columns,
    fill_missing_dates=True,
    fillna_value=0,
    freq='D',
)
target_train = TimeSeries.from_dataframe(
    target_train_df,
    time_col=time_index_column,
    value_cols=target_columns,
    fill_missing_dates=True,
    fillna_value=0,
    freq='D',
)
target_test = TimeSeries.from_dataframe(
    target_test_df,
    time_col=time_index_column,
    value_cols=target_columns,
    fill_missing_dates=True,
    fillna_value=0,
    freq='D',
)


In [None]:
target_train.plot(label="Train Target Series")

In [None]:
from darts.models import RandomForest
from darts.dataprocessing.transformers import Scaler

model = RandomForest(
    lags=7,
    lags_past_covariates=7,
    lags_future_covariates=(7, 7),
    add_encoders={
        "cyclic": {"future": ["day_of_week", "month"]},
        "transformer": Scaler()
    }
)

model.fit(
    series=target_train,
    past_covariates=past_covariates,
    future_covariates=future_covariates
)
forecast = model.predict(
    n=len(target_test),
    past_covariates=past_covariates,
    future_covariates=future_covariates
)
forecast.plot(label="Forecasted Target Series")
target_test.plot(label="Test Target Series")
plt.legend()
plt.show()
