In [1]:
import joblib
import pandas as pd

station_model_name = "station_518.pkl"
regressor = joblib.load( station_model_name ) 
 

In [2]:

def create_features(df, lags=[1, 2, 3], dropna=True):
    """
    Create lagged features for time series data.

    Parameters:
    - df (DataFrame): The input DataFrame containing the time series data.
    - lags (list): A list of integers representing the lag periods for creating features. Default is [1, 2, 3].
    - dropna (bool): Whether to drop rows with missing values after creating features. Default is True.

    Returns:
    - df_features (DataFrame): The DataFrame with lagged features created from the input data.

    """

    df_features = df.copy()
    for lag in lags:
        df_features[f"P_lag_time_{lag}"] = df_features["tp"].shift(periods=lag)

    for lag in lags:
        df_features[f"Q_lag_time_{lag}"] = df_features["obsdis"].shift(periods=lag)

    P_cols = [f"P_lag_time_{idx}" for idx in range(1, len(lags) + 1)]
    Q_cols = [f"Q_lag_time_{idx}" for idx in range(1, len(lags) + 1)]

    all_cols = ["tp", "obsdis"] + P_cols + Q_cols
    df_features = df_features[all_cols]

    if dropna:
        df_features.dropna(inplace=True)

    return df_features

In [50]:
df = pd.read_parquet("../../data/parquet.raw/station_518.0.parquet")

df = df[['tp', 'obsdis']]
df_test = df.dropna().tail(6)


# create two dummy gaps at the end of the dataframe
df_test.loc[df_test.index[-1], 'obsdis'] = None
df_test.loc[df_test.index[-2], 'obsdis'] = None
df_test

Unnamed: 0_level_0,tp,obsdis
time,Unnamed: 1_level_1,Unnamed: 2_level_1
2012-06-22,8.432112,128.0
2012-06-23,0.562295,125.0
2012-06-24,0.000588,113.0
2012-06-25,0.314714,104.0
2012-06-26,2.312112,
2012-06-27,0.347611,


In [61]:
def recursive_prediction( regressor, df, num_steps):

    for step in range(num_steps):
        df_features = create_features(df)
        predicted_dis = regressor.predict(df_features.drop(['tp', 'obsdis'], axis=1)) # 114.65299988
        #print(  "predicted_dis:", predicted_dis, "step:", step-num_steps)
        df.loc[df.index[step-num_steps], 'obsdis'] = predicted_dis[step]

    return predicted_dis

recursive_prediction(regressor, df_test, 2)

array([114.65299988,  97.99599991, 111.18200012])