#### Firstly, 
1. Import data
2. Add the date as index to our date set
3. Change the dtype date to datetime object before do anything else

In [None]:
from utilities import *

raw_data = pd.read_excel("data/Datos_Market_copy.xlsx")
sa = SalesAnalysis(raw_data)
data = sa.data[sa.brand35]

# set the index to the date column and convert it to datetime
data.set_index("date", inplace=True)
data.index = pd.to_datetime(data.index)

Split data into train and test data

In [None]:
train_data, test_data = sa.divide_data_for_train_and_test(data=data, train_size=0.8)
print(train_data.head(10))

#### Check if the data has been split correctly

In [None]:
filtered_train_data = train_data[
    (train_data["supermarket"] == "supermarket-A")
    & (train_data["variant"] == "flavoured")
]

filtered_test_data = test_data[
    (test_data["supermarket"] == "supermarket-A")
    & (test_data["variant"] == "flavoured")
]

plt.figure(figsize=(12, 6))
plt.plot(
    filtered_train_data["volume.sales"],
    label="Train data",
    color="blue",
)
plt.plot(
    filtered_test_data["volume.sales"],
    label="Test data",
    color="green",
)
plt.legend()
plt.show()

In [None]:
print("Filtered Train Data:")
print(filtered_train_data.head(10))

#TODO: I don't know if I can use the autoarima without removing value and unit columns. Try it later!

In [None]:
import warnings
from urllib3.exceptions import NotOpenSSLWarning

# 1) Opcional: silenciar solo ese aviso
warnings.filterwarnings("ignore", category=NotOpenSSLWarning)

# 2) Librer√≠as
import numpy as np
from pmdarima.arima import auto_arima

auto_arima_model = auto_arima( #FIXME: Ensure if data is stationary before choosing parameter d 
    filtered_train_data["volume.sales"],
    start_p=0,
    d=1,  # Set differencing explicitly instead of None
    start_q=0,
    max_p=5,
    max_q=5,
    start_P=0,
    D=1,  # Set seasonal differencing explicitly
    start_Q=0,
    max_P=1,
    max_Q=1,
    m=12,  # Monthly data
    seasonal=True,
    trace=True,
    error_action="warn",  # Change to warn to see potential issues
    suppress_warnings=True,
    stepwise=True,
    random_state=42,
    n_fits=50,
    information_criterion='aic'
)

# Print the best model and its parameters
print("Best model parameters:", auto_arima_model.order)
print("Best seasonal parameters:", auto_arima_model.seasonal_order)
print("AIC:", auto_arima_model.aic())

Performing stepwise search to minimize aic
 ARIMA(0,1,0)(0,1,0)[12]             : AIC=297.456, Time=0.01 sec
 ARIMA(1,1,0)(1,1,0)[12]             : AIC=300.602, Time=0.04 sec
 ARIMA(0,1,1)(0,1,1)[12]             : AIC=300.563, Time=0.21 sec
 ARIMA(0,1,0)(1,1,0)[12]             : AIC=299.069, Time=0.26 sec
 ARIMA(0,1,0)(0,1,1)[12]             : AIC=298.687, Time=0.02 sec
 ARIMA(0,1,0)(1,1,1)[12]             : AIC=300.488, Time=0.04 sec
 ARIMA(1,1,0)(0,1,0)[12]             : AIC=299.141, Time=0.01 sec
 ARIMA(0,1,1)(0,1,0)[12]             : AIC=299.291, Time=0.01 sec
 ARIMA(1,1,1)(0,1,0)[12]             : AIC=299.184, Time=0.03 sec
 ARIMA(0,1,0)(0,1,0)[12] intercept   : AIC=299.440, Time=0.00 sec

Best model:  ARIMA(0,1,0)(0,1,0)[12]          
Total fit time: 0.623 seconds
Best model parameters: (0, 1, 0)
Best seasonal parameters: (0, 1, 0, 12)
AIC: 297.45647128978203
