In [None]:
import os
import sys
import pandas as pd
import numpy as np
import plotly.express as px
import datetime as dt
import matplotlib.pyplot as plt
from dataclasses import dataclass

from gluonts.dataset.pandas import PandasDataset
from gluonts.dataset.split import split
from gluonts.torch import DeepAREstimator

os.chdir("../")

In [None]:
FILE_PATH = "./data/adh.csv"

@dataclass
class Features:
    DATE: str = "date"
    TARGET: str = "#adh"
    TARGET_PRED: str = "#adh_pred"
    YEAR: str = "annee"
    FREQ: str = "D"
    OFFSET: int = 360
    PRED_LENGTH: int = 180
    N_EPOCHS: int = 30
    WINDOWS: int = 2

# Data Viz

In [None]:
df_adh = pd.read_csv(
    FILE_PATH, sep="|"
)

df_adh[Features.DATE] = df_adh[Features.DATE].apply(
    lambda x: dt.datetime.strptime(x, "%Y-%m-%d")
)

_df_adh = df_adh.copy()

_df_adh[Features.YEAR] = _df_adh[Features.DATE].apply(lambda x: x.year)

In [None]:
df_adh.head()

In [None]:
# serie temporelle des adhesions
fig = px.line(
    _df_adh,
    x=Features.DATE, y=Features.TARGET, color=Features.YEAR,
    width=1000, height=700
)
fig.update_xaxes(rangeslider_visible=True)
fig.show()

# Modeling with GluonTS

In [None]:
# Load data from a CSV file into a PandasDataset
TEST_DATA_START = (
    df_adh.iloc[df_adh.shape[0]-1, 0] - dt.timedelta(Features.PRED_LENGTH)
).strftime("%Y-%m-%d")
TEST_DATA_END = (
    df_adh.iloc[df_adh.shape[0]-1, 0]
).strftime("%Y-%m-%d")
df_futur = pd.DataFrame(
    {
        Features.DATE: [df_adh.iloc[df_adh.shape[0]-1, 0] + dt.timedelta(x) for x in range(1, 1+Features.PRED_LENGTH,)]
    }
)
df_adh = pd.concat([df_adh, df_futur]).reset_index(drop=True)
df = df_adh.set_index(Features.DATE)
dataset = PandasDataset(df, target=Features.TARGET)
dataset

In [None]:
# Split the data for training and testing
training_data, test_gen = split(dataset, offset=-Features.OFFSET)
test_data = test_gen.generate_instances(prediction_length=Features.PRED_LENGTH, windows=Features.WINDOWS)

In [None]:
# Train the model and make predictions
model = DeepAREstimator(
    prediction_length=Features.PRED_LENGTH,
    freq=Features.FREQ,
    trainer_kwargs={"max_epochs": Features.N_EPOCHS}
).train(training_data)

In [None]:
# forecasting using trained model
forecasts = list(model.predict(test_data.input))

In [None]:
# Plot predictions
plt.figure(figsize=(10, 7))
plt.plot(df[TEST_DATA_START:], color="black")
for forecast in forecasts:
  forecast.plot()
plt.legend(["True values"], loc="upper left", fontsize="xx-large")
plt.show()

In [None]:
forecasts[0].samples

In [None]:
forecast_ = np.concatenate([forecast.samples.mean(axis=0) for forecast in forecasts])

df_adh.loc[df_adh[Features.DATE] > TEST_DATA_START, Features.TARGET_PRED] = forecast_

In [None]:
df_adh_pred = df_adh.query(f"{Features.DATE} > @TEST_DATA_START").reset_index(drop=True)

In [None]:
df_adh_pred.query(f"{Features.DATE} <= @TEST_DATA_END")

In [None]:
df_adh_pred.query(f"{Features.DATE} <= @TEST_DATA_END").iloc[:, 1:].sum()

In [None]:
100*(df_adh_pred.query(f"{Features.DATE} <= @TEST_DATA_END").iloc[:, 1].sum() - \
    df_adh_pred.query(f"{Features.DATE} <= @TEST_DATA_END").iloc[:, 2].sum()) / df_adh_pred.query(f"{Features.DATE} <= @TEST_DATA_END").iloc[:, 2].sum()

In [None]:
df_adh_pred.query(f"{Features.DATE} > @TEST_DATA_END")

In [None]:
df_adh_pred.query(f"{Features.DATE} > @TEST_DATA_END").iloc[:, 2:].sum()