In [None]:
!pip install pandas --quiet
!pip install numpy --quiet
!pip install matplotlib --quiet
!pip install statsmodels --quiet
!pip install pmdarima --quiet
!pip install xgboost --quiet
!pip install scikit-learn --quiet
!pip install keras --quiet
!pip install fbprophet

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from statsmodels.tsa.arima.model import ARIMA
from pmdarima import auto_arima
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
from keras.models import Sequential
from keras.layers import LSTM, Dense

In [None]:
# Load historical data from a CSV file into a pandas DataFrame
train_data = pd.read_csv('/Users/ronangabriel/Downloads/VAL_Forecasting Data (Train).csv')
test_data = pd.read_csv('/Users/ronangabriel/Downloads/VAL_Forecasting Data (Test).csv')


In [None]:
# Convert 'Date' column to datetime format
train_data['Date'] = pd.to_datetime(train_data['Date'])
test_data['Date'] = pd.to_datetime(test_data['Date'])

# Set 'Date' column as the index of the DataFrame
train_data.set_index('Date', inplace=True)
test_data.set_index('Date', inplace=True)


In [None]:
# Views
plt.plot(train_data.index, train_data['Views'], label='Train Data')
plt.plot(test_data.index, test_data['Views'], label='Test Data')
plt.xlabel('Date')
plt.ylabel('Views')
plt.title('Views over Time')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Sessions
plt.plot(train_data.index, train_data['Sessions'], label='Train Data')
plt.plot(test_data.index, test_data['Sessions'], label='Test Data')
plt.xlabel('Date')
plt.ylabel('Sessions')
plt.title('Sessions over Time')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
# Views per session
plt.plot(train_data.index, train_data['Views per session'], label='Train Data')
plt.plot(test_data.index, test_data['Views per session'], label='Test Data')
plt.xlabel('Date')
plt.ylabel('Views per session')
plt.title('Views per Session over Time')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Total users
plt.plot(train_data.index, train_data['Total users'], label='Train Data')
plt.plot(test_data.index, test_data['Total users'], label='Test Data')
plt.xlabel('Date')
plt.ylabel('Total users')
plt.title('Total Users over Time')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Engaged sessions/sessions
plt.plot(train_data.index, train_data['Engaged sessions/sessions'], label='Train Data')
plt.plot(test_data.index, test_data['Engaged sessions/sessions'], label='Test Data')
plt.xlabel('Date')
plt.ylabel('Engaged sessions/sessions')
plt.title('Engaged Sessions per session over Time')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Initialize dictionaries to store the forecasts and evaluation metrics for each model
forecasts = {}
mae_scores = {}
mape_scores = {}

# Loop through each dependent variable and build and evaluate the models
dependent_vars = ['Views', 'Sessions', 'Views per session', 'Total users', 'Engaged sessions/sessions']

for var in dependent_vars:
    # Exponential Smoothing
    model_es = ExponentialSmoothing(train_data[var], trend='add', seasonal='add', freq='D')
    model_es_fit = model_es.fit()
    forecasts[var + '_ES'] = model_es_fit.forecast(len(test_data))

    # ARIMA
    model_arima = ARIMA(train_data[var], order=(1, 0, 0), freq='D')
    model_arima_fit = model_arima.fit()
    forecasts[var + '_ARIMA'] = model_arima_fit.predict(start=len(train_data), end=len(train_data) + len(test_data) - 1)

    # Auto ARIMA
    model_autoarima = auto_arima(train_data[var], seasonal=False, suppress_warnings=True)
    model_autoarima_fit = model_autoarima.fit(train_data[var])
    forecasts[var + '_AutoARIMA'] = model_autoarima_fit.predict(n_periods=len(test_data))

    # XGBoost
    model_xgb = XGBRegressor()
    model_xgb.fit(np.arange(len(train_data)).reshape(-1, 1), train_data[var])
    forecasts[var + '_XGBoost'] = model_xgb.predict(np.arange(len(train_data), len(train_data) + len(test_data)).reshape(-1, 1))

    # LSTM
    model_lstm = Sequential()
    model_lstm.add(LSTM(100, activation='relu', input_shape=(1, 1)))
    model_lstm.add(Dense(1))
    model_lstm.compile(optimizer='adam', loss='mse')
    model_lstm.fit(np.array(train_data[var]).reshape(-1, 1, 1), np.array(train_data[var]).reshape(-1, 1), epochs=100, verbose=0)
    forecasts[var + '_LSTM'] = model_lstm.predict(np.array(test_data[var]).reshape(-1, 1, 1)).flatten()

    # Calculate evaluation metrics
    mae_scores[var + '_ES'] = mean_absolute_error(test_data[var], forecasts[var + '_ES'])
    mae_scores[var + '_ARIMA'] = mean_absolute_error(test_data[var], forecasts[var + '_ARIMA'])
    mae_scores[var + '_AutoARIMA'] = mean_absolute_error(test_data[var], forecasts[var + '_AutoARIMA'])
    mae_scores[var + '_XGBoost'] = mean_absolute_error(test_data[var], forecasts[var + '_XGBoost'])
    mae_scores[var + '_LSTM'] = mean_absolute_error(test_data[var], forecasts[var + '_LSTM'])

    mape_scores[var + '_ES'] = np.mean(np.abs((test_data[var] - forecasts[var + '_ES']) / test_data[var])) * 100
    mape_scores[var + '_ARIMA'] = np.mean(np.abs((test_data[var] - forecasts[var + '_ARIMA']) / test_data[var])) * 100
    mape_scores[var + '_AutoARIMA'] = np.mean(np.abs((test_data[var] - forecasts[var + '_AutoARIMA']) / test_data[var])) * 100
    mape_scores[var + '_XGBoost'] = np.mean(np.abs((test_data[var] - forecasts[var + '_XGBoost']) / test_data[var])) * 100
    mape_scores[var + '_LSTM'] = np.mean(np.abs((test_data[var] - forecasts[var + '_LSTM']) / test_data[var])) * 100


In [None]:
# Print the evaluation metrics for each model and dependent variable
for var in dependent_vars:
    print('---', var, '---')
    print('MAE scores:')
    for model, score in mae_scores.items():
        if var == model.split('_')[0]:
            print(model, ':', score)

    print('\nMAPE scores:')
    for model, score in mape_scores.items():
        if var == model.split('_')[0]:  #
            print(model, ':', score)
    print('\n')