In [None]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta
from sklearn.preprocessing import MinMaxScaler
from statsmodels.tsa.arima.model import ARIMA
from prophet import Prophet
from sklearn.ensemble import GradientBoostingRegressor
from keras.models import Sequential
from keras.layers import LSTM, Dense
import openpyxl

# Load the dataset
df = pd.read_csv('Trueliquor_store_sales_data.csv')

# Ensure the 'Date' column is in datetime format
df['Date'] = pd.to_datetime(df['Date'])


In [None]:
# Feature Engineering: Adding Day of Week, Month, and Is Weekend as Features
df['day_of_week'] = df['Date'].dt.dayofweek
df['month'] = df['Date'].dt.month
df['quarter'] = df['Date'].dt.quarter
df['is_weekend'] = df['day_of_week'].isin([5, 6]).astype(int)
df['is_holiday'] = df['Date'].isin(pd.to_datetime(['2017-12-25', '2017-12-31'])).astype(int)

# Create lag features
for lag in range(1, 8):  # 1 to 7 days lag
    df[f'lag_{lag}'] = df['Total Amount'].shift(lag)

# Create rolling window features
df['rolling_mean_7'] = df['Total Amount'].rolling(window=7).mean()
df['rolling_std_7'] = df['Total Amount'].rolling(window=7).std()

# Drop rows with NaN values after feature engineering
df.dropna(inplace=True)

# Prepare data for modeling
df_model = df[['Date', 'Total Amount', 'day_of_week', 'month', 'quarter', 'is_weekend', 'is_holiday', 
               'lag_1', 'lag_2', 'lag_3', 'lag_4', 'lag_5', 'lag_6', 'lag_7', 
               'rolling_mean_7', 'rolling_std_7']]

# Ensure the 'Date' column is in datetime format
df_model.loc[:, 'Date'] = pd.to_datetime(df_model['Date'])


In [None]:
# Split the data into training and validation sets
train_size = int(len(df_model) * 0.8)
train_df = df_model[:train_size].copy()
val_df = df_model[train_size:].copy()

# Use TimeSeriesSplit for cross-validation
from sklearn.model_selection import TimeSeriesSplit

tscv = TimeSeriesSplit(n_splits=10)
train_val_sets = []
for train_index, val_index in tscv.split(df_model):
    train_val_sets.append((df_model.iloc[train_index].copy(), df_model.iloc[val_index].copy()))


In [None]:
def train_arima(train_df):
    model = ARIMA(train_df['Total Amount'], order=(5, 1, 0))
    model_fit = model.fit()
    return model_fit

def train_prophet(train_df):
    model = Prophet()
    model.fit(train_df.rename(columns={'Date': 'ds', 'Total Amount': 'y'}))
    return model

def train_xgboost(train_df, val_df):
    model = GradientBoostingRegressor()
    X_train = train_df.drop(columns=['Total Amount', 'Date'])
    y_train = train_df['Total Amount']
    model.fit(X_train, y_train)
    return model

def train_lstm(train_df, val_df):
    X_train = train_df.drop(columns=['Total Amount', 'Date']).values
    y_train = train_df['Total Amount'].values
    X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))

    model = Sequential()
    model.add(LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], 1)))
    model.add(LSTM(50))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=0)
    return model


In [None]:
def forecast_arima(model, steps):
    forecast = model.forecast(steps)
    return forecast

def forecast_prophet(model, steps):
    future = model.make_future_dataframe(periods=steps)
    forecast = model.predict(future)
    return forecast['yhat'][-steps:].values

def forecast_xgboost(model, val_df):
    X_val = val_df.drop(columns=['Total Amount', 'Date'])
    forecast = model.predict(X_val)
    return forecast

def forecast_lstm(model, scaler, val_df):
    X_val = val_df.drop(columns=['Total Amount', 'Date']).values
    X_val = X_val.reshape((X_val.shape[0], X_val.shape[1], 1))
    forecast = model.predict(X_val)
    return forecast.flatten()


In [None]:
# Train models
arima_model = train_arima(train_df)
prophet_model = train_prophet(train_df)
xgb_model = train_xgboost(train_df, val_df)
lstm_model = train_lstm(train_df, val_df)

# Forecasting for the validation period
arima_forecast = forecast_arima(arima_model, len(val_df))
prophet_forecast = forecast_prophet(prophet_model, len(val_df))
xgb_forecast = forecast_xgboost(xgb_model, val_df)
lstm_forecast = forecast_lstm(lstm_model, None, val_df)



In [None]:
def forecast_arima(model, steps):
    forecast = model.forecast(steps)
    return forecast

def forecast_prophet(model, steps):
    future = model.make_future_dataframe(periods=steps)
    forecast = model.predict(future)
    return forecast['yhat'][-steps:].values

def forecast_xgboost(model, val_df):
    X_val = val_df.drop(columns=['Total Amount', 'Date'])
    forecast = model.predict(X_val)
    return forecast

def forecast_lstm(model, scaler, val_df):
    X_val = val_df.drop(columns=['Total Amount', 'Date']).values
    X_val = X_val.reshape((X_val.shape[0], X_val.shape[1], 1))
    forecast = model.predict(X_val)
    return forecast.flatten()


In [None]:
# Evaluation Metrics
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

def evaluate_model(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    mse = mean_squared_error(y_true, y_pred)
    mbd = np.mean(y_pred - y_true)
    smape = np.mean(2 * np.abs(y_pred - y_true) / (np.abs(y_pred) + np.abs(y_true))) * 100
    r_squared = r2_score(y_true, y_pred)
    return mae, rmse, mape, mse, mbd, smape, r_squared

# Evaluate each model
models = {
    'ARIMA': arima_forecast,
    'Prophet': prophet_forecast,
    'XGBoost': xgb_forecast,
    'LSTM': lstm_forecast
}

for model_name, forecast in models.items():
    eval_results = evaluate_model(val_df['Total Amount'], forecast)
    print(f"{model_name} - MAE: {eval_results[0]}, RMSE: {eval_results[1]}, MAPE: {eval_results[2]}%, MSE: {eval_results[3]}, MBD: {eval_results[4]}, sMAPE: {eval_results[5]}%, R-squared: {eval_results[6]}")


In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Assuming you have the forecasts from split 5 stored
arima_forecast_split_5 = [21.5, 22.0, 20.8, 24.0, 21.9]  # Replace with actual values
prophet_forecast_split_5 = [22.3, 21.1, 23.7, 21.6, 22.2]  # Replace with actual values
xgb_forecast_split_5 = [20.9, 21.2, 20.7, 21.8, 21.0]  # Replace with actual values
lstm_forecast_split_5 = [20.5, 20.7, 21.0, 20.8, 21.3]  # Replace with actual values

# Example values for train_val_sets and val_df_split_5
train_val_sets = [(None, pd.DataFrame({'Total Amount': [23, 25, 27, 22, 24]}))] * 10
_, val_df_split_5 = train_val_sets[4]
y_true_split_5 = val_df_split_5['Total Amount']

# Creating the meta-model training data
meta_train = pd.DataFrame({
    'ARIMA': arima_forecast_split_5,
    'Prophet': prophet_forecast_split_5,
    'XGBoost': xgb_forecast_split_5,
    'LSTM': lstm_forecast_split_5
})
meta_train['y'] = y_true_split_5.values

# Train the meta-model
X_meta = meta_train[['ARIMA', 'Prophet', 'XGBoost', 'LSTM']]
y_meta = meta_train['y']
meta_model = LinearRegression()
meta_model.fit(X_meta, y_meta)

# Make final predictions for split 5
final_forecast_split_5 = meta_model.predict(X_meta)

# Evaluate the final forecast for split 5
mae = mean_absolute_error(y_true_split_5, final_forecast_split_5)
rmse = np.sqrt(mean_squared_error(y_true_split_5, final_forecast_split_5))
mape = np.mean(np.abs((y_true_split_5 - final_forecast_split_5) / y_true_split_5)) * 100
mse = mean_squared_error(y_true_split_5, final_forecast_split_5)
mbd = np.mean(final_forecast_split_5 - y_true_split_5)
smape = np.mean(2 * np.abs(final_forecast_split_5 - y_true_split_5) / (np.abs(final_forecast_split_5) + np.abs(y_true_split_5))) * 100
r_squared = r2_score(y_true_split_5, final_forecast_split_5)

print(f'Stacked Model Results for Split 5:')
print(f'MAE: {mae}')
print(f'RMSE: {rmse}')
print(f'MAPE: {mape}%')
print(f'MSE: {mse}')
print(f'MBD: {mbd}')
print(f'sMAPE: {smape}%')
print(f'R-squared: {r_squared}')
