In [1]:
import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error, root_mean_squared_error
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd

In [2]:
data = [{'quantity': '5', 'created_at': '2024-01-01', 'positive': 5, 'negative': 0}, {'quantity': '9', 'created_at': '2024-01-02', 'positive': 3, 'negative': 6}, {'quantity': '5', 'created_at': '2024-01-03', 'positive': 3, 'negative': 2}, {'quantity': '5', 'created_at': '2024-01-04', 'positive': 4, 'negative': 1}, {'created_at': '2024-01-05', 'quantity': 0}, {'quantity': '4', 'created_at': '2024-01-06', 'positive': 2, 'negative': 2}, {'quantity': '5', 'created_at': '2024-01-07', 'positive': 1, 'negative': 4}, {'quantity': '4', 'created_at': '2024-01-08', 'positive': 1, 'negative': 3}, {'quantity': '9', 'created_at': '2024-01-09', 'positive': 4, 'negative': 5}, {'quantity': '3', 'created_at': '2024-01-10', 'positive': 3, 'negative': 0}, {'quantity': '1', 'created_at': '2024-01-11', 'positive': 0, 'negative': 1}, {'quantity': '5', 'created_at': '2024-01-12', 'positive': 5, 'negative': 0}, {'quantity': '9', 'created_at': '2024-01-13', 'positive': 9, 'negative': 0}, {'quantity': '6', 'created_at': '2024-01-14', 'positive': 4, 'negative': 2}, {'quantity': '6', 'created_at': '2024-01-15', 'positive': 2, 'negative': 4}, {'quantity': '8', 'created_at': '2024-01-16', 'positive': 8, 'negative': 0}, {'quantity': '5', 'created_at': '2024-01-17', 'positive': 5, 'negative': 0}, {'quantity': '2', 'created_at': '2024-01-18', 'positive': 2, 'negative': 0}, {'quantity': '7', 'created_at': '2024-01-19', 'positive': 5, 'negative': 2}, {'quantity': '5', 'created_at': '2024-01-20', 'positive': 4, 'negative': 1}, {'quantity': '9', 'created_at': '2024-01-21', 'positive': 9, 'negative': 0}, {'quantity': '4', 'created_at': '2024-01-22', 'positive': 2, 'negative': 2}, {'quantity': '6', 'created_at': '2024-01-23', 'positive': 2, 'negative': 4}, {'quantity': '3', 'created_at': '2024-01-24', 'positive': 0, 'negative': 3}, {'quantity': '5', 'created_at': '2024-01-25', 'positive': 4, 'negative': 1}, {'created_at': '2024-01-26', 'quantity': 0}, {'quantity': '6', 'created_at': '2024-01-27', 'positive': 2, 'negative': 4}, {'quantity': '7', 'created_at': '2024-01-28', 'positive': 1, 'negative': 6}, {'quantity': '6', 'created_at': '2024-01-29', 'positive': 4, 'negative': 2}, {'quantity': '5', 'created_at': '2024-01-30', 'positive': 2, 'negative': 3}, {'quantity': '6', 'created_at': '2024-01-31', 'positive': 2, 'negative': 4}, {'quantity': '6', 'created_at': '2024-02-01', 'positive': 0, 'negative': 6}, {'quantity': '5', 'created_at': '2024-02-02', 'positive': 5, 'negative': 0}, {'created_at': '2024-02-03', 'quantity': 0}, {'quantity': '3', 'created_at': '2024-02-04', 'positive': 3, 'negative': 0}, {'quantity': '6', 'created_at': '2024-02-05', 'positive': 6, 'negative': 0}, {'quantity': '7', 'created_at': '2024-02-06', 'positive': 6, 'negative': 1}, {'quantity': '10', 'created_at': '2024-02-07', 'positive': 10, 'negative': 0}, {'quantity': '10', 'created_at': '2024-02-08', 'positive': 3, 'negative': 7}, {'quantity': '7', 'created_at': '2024-02-09', 'positive': 7, 'negative': 0}, {'quantity': '10', 'created_at': '2024-02-10', 'positive': 4, 'negative': 6}, {'quantity': '11', 'created_at': '2024-02-11', 'positive': 11, 'negative': 0}, {'quantity': '7', 'created_at': '2024-02-12', 'positive': 7, 'negative': 0}, {'created_at': '2024-02-13', 'quantity': 0}, {'quantity': '5', 'created_at': '2024-02-14', 'positive': 1, 'negative': 4}, {'quantity': '1', 'created_at': '2024-02-15', 'positive': 1, 'negative': 0}, {'quantity': '7', 'created_at': '2024-02-16', 'positive': 4, 'negative': 3}, {'quantity': '5', 'created_at': '2024-02-17', 'positive': 4, 'negative': 1}, {'quantity': '3', 'created_at': '2024-02-18', 'positive': 0, 'negative': 3}, {'quantity': '5', 'created_at': '2024-02-19', 'positive': 4, 'negative': 1}, {'quantity': '6', 'created_at': '2024-02-20', 'positive': 5, 'negative': 1}, {'quantity': '9', 'created_at': '2024-02-21', 'positive': 7, 'negative': 2}, {'quantity': '6', 'created_at': '2024-02-22', 'positive': 6, 'negative': 0}, {'quantity': '5', 'created_at': '2024-02-23', 'positive': 5, 'negative': 0}, {'quantity': '9', 'created_at': '2024-02-24', 'positive': 6, 'negative': 3}, {'created_at': '2024-02-25', 'quantity': 0}, {'quantity': '5', 'created_at': '2024-02-26', 'positive': 5, 'negative': 0}, {'quantity': '6', 'created_at': '2024-02-27', 'positive': 6, 'negative': 0}, {'quantity': '10', 'created_at': '2024-02-28', 'positive': 10, 'negative': 0}, {'quantity': '6', 'created_at': '2024-02-29', 'positive': 6, 'negative': 0}, {'quantity': '10', 'created_at': '2024-03-01', 'positive': 10, 'negative': 0}, {'quantity': '5', 'created_at': '2024-03-02', 'positive': 4, 'negative': 1}, {'quantity': '8', 'created_at': '2024-03-03', 'positive': 4, 'negative': 4}, {'quantity': '9', 'created_at': '2024-03-04', 'positive': 9, 'negative': 0}, {'quantity': '6', 'created_at': '2024-03-05', 'positive': 1, 'negative': 5}, {'quantity': '6', 'created_at': '2024-03-06', 'positive': 5, 'negative': 1}, {'quantity': '6', 'created_at': '2024-03-07', 'positive': 6, 'negative': 0}, {'quantity': '9', 'created_at': '2024-03-08', 'positive': 9, 'negative': 0}, {'quantity': '1', 'created_at': '2024-03-09', 'positive': 0, 'negative': 1}, {'quantity': '2', 'created_at': '2024-03-10', 'positive': 2, 'negative': 0}, {'quantity': '3', 'created_at': '2024-03-11', 'positive': 2, 'negative': 1}, {'quantity': '6', 'created_at': '2024-03-12', 'positive': 6, 'negative': 0}, {'quantity': '4', 'created_at': '2024-03-13', 'positive': 3, 'negative': 1}, {'quantity': '5', 'created_at': '2024-03-14', 'positive': 4, 'negative': 1}, {'created_at': '2024-03-15', 'quantity': 0}, {'quantity': '5', 'created_at': '2024-03-16', 'positive': 4, 'negative': 1}, {'quantity': '7', 'created_at': '2024-03-17', 'positive': 7, 'negative': 0}, {'quantity': '7', 'created_at': '2024-03-18', 'positive': 6, 'negative': 1}, {'quantity': '1', 'created_at': '2024-03-19', 'positive': 1, 'negative': 0}, {'quantity': '5', 'created_at': '2024-03-20', 'positive': 2, 'negative': 3}, {'quantity': '8', 'created_at': '2024-03-21', 'positive': 1, 'negative': 7}, {'created_at': '2024-03-22', 'quantity': 0}, {'quantity': '11', 'created_at': '2024-03-23', 'positive': 11, 'negative': 0}, {'quantity': '3', 'created_at': '2024-03-24', 'positive': 3, 'negative': 0}, {'quantity': '11', 'created_at': '2024-03-25', 'positive': 5, 'negative': 6}, {'quantity': '1', 'created_at': '2024-03-26', 'positive': 0, 'negative': 1}, {'created_at': '2024-03-27', 'quantity': 0}, {'quantity': '7', 'created_at': '2024-03-28', 'positive': 7, 'negative': 0}, {'quantity': '5', 'created_at': '2024-03-29', 'positive': 4, 'negative': 1}, {'quantity': '6', 'created_at': '2024-03-30', 'positive': 6, 'negative': 0}, {'quantity': '1', 'created_at': '2024-03-31', 'positive': 0, 'negative': 1}, {'quantity': '6', 'created_at': '2024-04-01', 'positive': 6, 'negative': 0}, {'quantity': '3', 'created_at': '2024-04-02', 'positive': 0, 'negative': 3}, {'quantity': '8', 'created_at': '2024-04-03', 'positive': 8, 'negative': 0}, {'quantity': '3', 'created_at': '2024-04-04', 'positive': 2, 'negative': 1}, {'quantity': '5', 'created_at': '2024-04-05', 'positive': 5, 'negative': 0}, {'quantity': '8', 'created_at': '2024-04-06', 'positive': 4, 'negative': 4}, {'quantity': '2', 'created_at': '2024-04-07', 'positive': 2, 'negative': 0}, {'created_at': '2024-04-08', 'quantity': 0}, {'quantity': '8', 'created_at': '2024-04-09', 'positive': 6, 'negative': 2}, {'quantity': '6', 'created_at': '2024-04-10', 'positive': 6, 'negative': 0}, {'quantity': '6', 'created_at': '2024-04-11', 'positive': 5, 'negative': 1}, {'quantity': '9', 'created_at': '2024-04-12', 'positive': 5, 'negative': 4}, {'quantity': '11', 'created_at': '2024-04-13', 'positive': 9, 'negative': 2}, {'quantity': '4', 'created_at': '2024-04-14', 'positive': 4, 'negative': 0}, {'quantity': '10', 'created_at': '2024-04-15', 'positive': 4, 'negative': 6}, {'quantity': '8', 'created_at': '2024-04-16', 'positive': 8, 'negative': 0}, {'quantity': '7', 'created_at': '2024-04-17', 'positive': 7, 'negative': 0}, {'quantity': '11', 'created_at': '2024-04-18', 'positive': 2, 'negative': 9}, {'quantity': '10', 'created_at': '2024-04-19', 'positive': 3, 'negative': 7}, {'quantity': '7', 'created_at': '2024-04-20', 'positive': 6, 'negative': 1}, {'quantity': '8', 'created_at': '2024-04-21', 'positive': 8, 'negative': 0}, {'quantity': '11', 'created_at': '2024-04-22', 'positive': 10, 'negative': 1}, {'quantity': '9', 'created_at': '2024-04-23', 'positive': 8, 'negative': 1}, {'quantity': '7', 'created_at': '2024-04-24', 'positive': 7, 'negative': 0}, {'quantity': '11', 'created_at': '2024-04-25', 'positive': 6, 'negative': 5}, {'quantity': '7', 'created_at': '2024-04-26', 'positive': 3, 'negative': 4}, {'quantity': '5', 'created_at': '2024-04-27', 'positive': 5, 'negative': 0}, {'quantity': '7', 'created_at': '2024-04-28', 'positive': 7, 'negative': 0}, {'quantity': '6', 'created_at': '2024-04-29', 'positive': 0, 'negative': 6}, {'quantity': '6', 'created_at': '2024-04-30', 'positive': 6, 'negative': 0}]

In [3]:
FEATURES_1 = ['positive', 'negative','month', 'day_of_month', 'day_of_year', 'week_of_year', 'day_of_week','year','is_wknd','is_month_start','is_month_end']
FEATURES_2 = ['month', 'day_of_month', 'day_of_year', 'week_of_year', 'day_of_week','year','is_wknd','is_month_start','is_month_end']
TARGET = 'quantity'

In [4]:
# print(data)

In [5]:
# check if data is a list and convert to pandas dataframe
if data and isinstance(data, list):
    print('Data is a list')
    try:
        # convert data to a pandas dataframe
        df = pd.DataFrame(data)
        # convert created_at to datetime value
        df['created_at'] = pd.to_datetime(df['created_at'])
        # set created_at as index
        df.index = df['created_at']
        # drop created_at column because it is now the index
        df.drop('created_at', axis=1, inplace=True)
        print('Dataframe created successfully')
    except KeyError:
        print('KeyError: created_at column not found')
else:
    print('data is not a list')
    
    
def create_date_features(dataframe: pd.DataFrame):
    df_copy = dataframe.copy()  # Create a copy of the DataFrame to ensure the original remains unchanged

    df_copy['month'] = df_copy.index.month    
    df_copy['day_of_month'] = df_copy.index.day
    df_copy['day_of_year'] = df_copy.index.dayofyear
    df_copy['week_of_year'] = df_copy.index.isocalendar().week
    df_copy['day_of_week'] = df_copy.index.dayofweek
    df_copy['year'] = df_copy.index.year
    df_copy["is_wknd"] = df_copy.index.weekday // 4
    df_copy['is_month_start'] = df_copy.index.is_month_start.astype(int)
    df_copy['is_month_end'] = df_copy.index.is_month_end.astype(int)
    
    return df_copy

Data is a list
Dataframe created successfully


In [6]:
df

Unnamed: 0_level_0,quantity,positive,negative
created_at,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-01-01,5,5.0,0.0
2024-01-02,9,3.0,6.0
2024-01-03,5,3.0,2.0
2024-01-04,5,4.0,1.0
2024-01-05,0,,
...,...,...,...
2024-04-26,7,3.0,4.0
2024-04-27,5,5.0,0.0
2024-04-28,7,7.0,0.0
2024-04-29,6,0.0,6.0


In [7]:
min = df.index.min()
max = df.index.max()
print(f'min: {min}, max: {max}')

min: 2024-01-01 00:00:00, max: 2024-04-30 00:00:00


# DROP MISSING VALUES

In [11]:
df.dropna(inplace=True)
print(f'df zero value count: {df.isnull().sum()}')

df zero value count: quantity    0
positive    0
negative    0
dtype: int64


In [12]:
# create date features for the dataframe
df_w_features = create_date_features(df)

In [13]:
df_w_features

Unnamed: 0_level_0,quantity,positive,negative,month,day_of_month,day_of_year,week_of_year,day_of_week,year,is_wknd,is_month_start,is_month_end
created_at,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2024-01-01,5,5.0,0.0,1,1,1,1,0,2024,0,1,0
2024-01-02,9,3.0,6.0,1,2,2,1,1,2024,0,0,0
2024-01-03,5,3.0,2.0,1,3,3,1,2,2024,0,0,0
2024-01-04,5,4.0,1.0,1,4,4,1,3,2024,0,0,0
2024-01-06,4,2.0,2.0,1,6,6,1,5,2024,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
2024-04-26,7,3.0,4.0,4,26,117,17,4,2024,1,0,0
2024-04-27,5,5.0,0.0,4,27,118,17,5,2024,1,0,0
2024-04-28,7,7.0,0.0,4,28,119,17,6,2024,1,0,0
2024-04-29,6,0.0,6.0,4,29,120,18,0,2024,0,0,0


def generate_features(df):
    df_copy = df.copy()
    # create lag features
    for i in range(1, 8):
        df_copy[f'quantity_lag_{i}'] = df_copy['quantity'].shift(i)
        df_copy[f'positive_lag_{i}'] = df_copy['positive'].shift(i)
        df_copy[f'negative_lag_{i}'] = df_copy['negative'].shift(i)
    return df_copy

# TIME SERIES CROSS VALIDATION

In [14]:
from sklearn.model_selection import TimeSeriesSplit
import numpy as np

# Example data (7 days worth of data)
data = np.random.rand(7)  # Replace this with your actual data

# Number of splits (e.g., 5-fold cross-validation)
n_splits = 5

# Create TimeSeriesSplit object
tscv = TimeSeriesSplit(n_splits=n_splits)

# Iterate over the splits and print the train and test indices
for train_index, test_index in tscv.split(data):
    print("Train indices:", train_index)
    print("Test indices:", test_index)
    print("---")


Train indices: [0 1]
Test indices: [2]
---
Train indices: [0 1 2]
Test indices: [3]
---
Train indices: [0 1 2 3]
Test indices: [4]
---
Train indices: [0 1 2 3 4]
Test indices: [5]
---
Train indices: [0 1 2 3 4 5]
Test indices: [6]
---


# TRAIN TEST SPLIT

In [15]:
# train test split
train = df_w_features[df_w_features.index < '2024-03-01']
test = df_w_features[df_w_features.index >= '2024-03-01']
print(f'train shape: {train.shape}, test shape: {test.shape}')
print(f'train min and max: {train.index.min()}, {train.index.max()}')
print(f'test min and max: {test.index.min()}, {test.index.max()}')

train shape: (55, 12), test shape: (57, 12)
train min and max: 2024-01-01 00:00:00, 2024-02-29 00:00:00
test min and max: 2024-03-01 00:00:00, 2024-04-30 00:00:00


In [16]:
# generate features for train and test
train = create_date_features(train)
test = create_date_features(test)

In [17]:
# create X_train, y_train
X_train = train[FEATURES_2]
y_train = train[TARGET]

# create X_test, y_test
X_test = test[FEATURES_2]
y_test = test[TARGET]

In [18]:
X_train

Unnamed: 0_level_0,month,day_of_month,day_of_year,week_of_year,day_of_week,year,is_wknd,is_month_start,is_month_end
created_at,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2024-01-01,1,1,1,1,0,2024,0,1,0
2024-01-02,1,2,2,1,1,2024,0,0,0
2024-01-03,1,3,3,1,2,2024,0,0,0
2024-01-04,1,4,4,1,3,2024,0,0,0
2024-01-06,1,6,6,1,5,2024,1,0,0
2024-01-07,1,7,7,1,6,2024,1,0,0
2024-01-08,1,8,8,2,0,2024,0,0,0
2024-01-09,1,9,9,2,1,2024,0,0,0
2024-01-10,1,10,10,2,2,2024,0,0,0
2024-01-11,1,11,11,2,3,2024,0,0,0


# MODEL TRAINING

In [22]:
# Define the parameter grid
param_grid = {
    'n_estimators': [500, 1000, 2000],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.0001, 0.001, 0.01, 0.1, 0.2, 0.3],
    'subsample': [0.6, 0.8, 1.0],
    'min_child_weight': [1, 3, 5]
}

# Initialize the XGBRegressor
reg = xgb.XGBRegressor(
    base_score=0.5,
    booster='gbtree',
    # objective='reg:squarederror',
    objective='reg:squaredlogerror',
    early_stopping_rounds=50,
    eval_metric='mae',
    n_jobs=-1,
)

# Initialize the GridSearchCV
grid_search = GridSearchCV(estimator=reg, param_grid=param_grid, cv=3, n_jobs=-1)

# Fit the GridSearchCV to the data
grid_search.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_test, y_test)], verbose=1000)

# Get the best parameters
best_params = grid_search.best_params_

# Print the best parameters
print(f'Best parameters: {best_params}')

[0]	validation_0-mae:5.49991	validation_1-mae:5.93851
[0]	validation_0-mae:5.49991	validation_1-mae:5.93851
[0]	validation_0-mae:5.49991	validation_1-mae:5.93851
[0]	validation_0-mae:5.49991	validation_1-mae:5.93851
[0]	validation_0-mae:5.49991	validation_1-mae:5.93851
[0]	validation_0-mae:5.49991	validation_1-mae:5.93851
[0]	validation_0-mae:5.49991	validation_1-mae:5.93851
[0]	validation_0-mae:5.49991	validation_1-mae:5.93851
[0]	validation_0-mae:5.49991	validation_1-mae:5.93851
[0]	validation_0-mae:5.49991	validation_1-mae:5.93851
[0]	validation_0-mae:5.49991	validation_1-mae:5.93851
[0]	validation_0-mae:5.49991	validation_1-mae:5.93851
[499]	validation_0-mae:5.45556	validation_1-mae:5.89416
[499]	validation_0-mae:5.45531	validation_1-mae:5.89390
[499]	validation_0-mae:5.45598	validation_1-mae:5.89458
[499]	validation_0-mae:5.45680	validation_1-mae:5.89539
[499]	validation_0-mae:5.45641	validation_1-mae:5.89500
[499]	validation_0-mae:5.45644	validation_1-mae:5.89503
[499]	validation

In [25]:
# # Train the model using the best parameters
# stopping_round = {'early_stopping_rounds': 50}
# 
# best_params = {
#     **best_params,
#     **stopping_round
# }

print(f'Best parameters: {best_params}')

reg = xgb.XGBRegressor(
    base_score=0.5,
    booster='gbtree',
    # objective='reg:squarederror',
    objective='reg:squaredlogerror',
    early_stopping_rounds=50,
    eval_metric='mae',
    n_jobs=-1,
    **best_params
)
reg.fit(
    X_train, y_train,
    eval_set=[(X_train, y_train), (X_test, y_test)],
    verbose=1000
)

Best parameters: {'learning_rate': 0.3, 'max_depth': 3, 'min_child_weight': 1, 'n_estimators': 500, 'subsample': 1.0}
[0]	validation_0-mae:5.23588	validation_1-mae:5.67447
[135]	validation_0-mae:1.74695	validation_1-mae:2.36689


# MODEL EVALUATION

In [28]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error
from math import sqrt

# Generate predictions
=
y_pred_test = reg.predict(X_test)

# Calculate metrics for the training set
mse_train = mean_squared_error(y_train, y_pred_train)
mae_train = mean_absolute_error(y_train, y_pred_train)
r2_train = r2_score(y_train, y_pred_train)
mape_train = mean_absolute_percentage_error(y_train, y_pred_train)
rmse_train = sqrt(mse_train)

# Calculate metrics for the test set
mse_test = mean_squared_error(y_test, y_pred_test)
mae_test = mean_absolute_error(y_test, y_pred_test)
r2_test = r2_score(y_test, y_pred_test)
mape_test = mean_absolute_percentage_error(y_test, y_pred_test)
rmse_test = sqrt(mse_test)

# Print the metrics
print(f'Training set metrics: MSE={mse_train}, MAE={mae_train}, R2={r2_train}, MAPE={mape_train}, RMSE={rmse_train}')
print(f'Test set metrics: MSE={mse_test}, MAE={mae_test}, R2={r2_test}, MAPE={mape_test}, RMSE={rmse_test}')

Training set metrics: MSE=5.300703349471748, MAE=1.746950002150102, R2=-0.0338251213508729, MAPE=0.4405017251031975, RMSE=2.302325639320326
Test set metrics: MSE=8.486106081542111, MAE=2.366886849988971, R2=-0.09427522856526105, MAPE=0.6525664883252502, RMSE=2.9130921855550866


In [29]:
# test_w_prediction = test.copy()
# test_w_prediction['quantity_pred'] = reg.predict(X_test)
# 
# # Display the first few rows of the test_w_prediction DataFrame
# test_w_prediction.head()

In [53]:
# plot quanity and quantity_pred
# plt.figure(figsize=(15, 7))
# plt.plot(test_w_prediction.index, test_w_prediction['quantity'], label='quantity')
# plt.plot(test_w_prediction.index, test_w_prediction['quantity_pred'], label='quantity_pred')
# plt.legend()
# plt.show()

# MODEL EVALUATION

# GENERATE FUTURE DATES

In [113]:
def generate_future_target_dates(last_date, target_dates):
    # create a Datetimeindex from pass variables
    future_dates = pd.date_range(last_date, target_dates, freq='D')
    # Create a single column called date
    future_dates_df = pd.DataFrame(future_dates, columns=['date'])
    # Make the date the first column the index
    future_dates_df.set_index('date', inplace=True)
    # Add quantity column
    future_dates_df['quantity'] = 0
    # add isFuture column
    future_dates_df['isFuture'] = True
    # create date features
    future_dates_df = create_date_features(future_dates_df)
    return future_dates_df

In [114]:
test_df = generate_future_target_dates('2024-04-30', '2024-05-31')

In [115]:
test_df

Unnamed: 0_level_0,quantity,isFuture,month,day_of_month,day_of_year,week_of_year,day_of_week,year,is_wknd,is_month_start,is_month_end
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2024-04-30,0,True,4,30,121,18,1,2024,0,0,1
2024-05-01,0,True,5,1,122,18,2,2024,0,1,0
2024-05-02,0,True,5,2,123,18,3,2024,0,0,0
2024-05-03,0,True,5,3,124,18,4,2024,1,0,0
2024-05-04,0,True,5,4,125,18,5,2024,1,0,0
2024-05-05,0,True,5,5,126,18,6,2024,1,0,0
2024-05-06,0,True,5,6,127,19,0,2024,0,0,0
2024-05-07,0,True,5,7,128,19,1,2024,0,0,0
2024-05-08,0,True,5,8,129,19,2,2024,0,0,0
2024-05-09,0,True,5,9,130,19,3,2024,0,0,0
