# Analysis DATA

In [None]:
from utilities import *

raw_data = pd.read_excel("data/Datos_Market_copy.xlsx")

sa = SalesAnalysis(raw_data)

data = sa.data

In [None]:
train_data, test_data = sa.divide_data_for_train_and_test(data=data, train_size=0.8)

# convert train_data into excel file
sa.excel(train_data, path="data/train_data.xlsx")
sa.excel(test_data, path="data/test_data.xlsx")

#### Lineal-Regresion Model

In [None]:
model_brand35 = sa.modelization_with_backward_elimination(train_data[sa.brand35])
model_brand35.summary()

In [None]:
####RESIDUALS
sa.plot_resid_ACF_PACF(model_brand35.resid, lags=40)

#### ARIMA

In [None]:
####ARIMA
""" (p, d, q) 
    p = (PACF)
    d = diff
    q = (ACF) 
"""
possible_models = {
    "1": (0, 0, 5),
    "2": (5, 0, 0),
    "3": (5, 0, 5),
}

model_arima_brand35_selected = sa.ARIMA(
    residues=model_brand35.resid,
    model_chosen=(possible_models["1"]),
    diff_need_for_residues=False,
)

##### Find the best model (ARIMA)

In [None]:
# AIC and BIC criteria for model selection (lower is better)
for i in possible_models:
    model_arima_brand35 = sa.ARIMA(
        residues=model_brand35.resid,
        model_chosen=(possible_models[i]),
        diff_need_for_residues=False,
    )
    print("[lower is better]")
    print("------------------ AIC and BIC ----------------------")
    print(
        f"Model {i} ARIMA: {possible_models[i]} ------->",
        f"AIC: {model_arima_brand35.aic}",
        f"BIC: {model_arima_brand35.bic}",
    )
    print("----------------------------------------------------")

In [None]:
# Check residuals for white noise
for modelo in possible_models:
    model_brand35_arima = sa.ARIMA(
        residues=model_brand35.resid,
        model_chosen=(possible_models[modelo]),
        diff_need_for_residues=False,
    )

    print("-----------------Residues Analysis (White Noise) -----------------")
    sa.residual_white_noise_test(model_brand35_arima.resid)
    print("------------------------------------------------------------------")

In [None]:
##### FORECASTING
# Forecasting for the next 12 months
forecasting = model_arima_brand35_selected.forecast(steps=len(test_data))

# get the real values
real_values = test_data[sa.brand35].values

# # compore the real values with the forecasting
# plt.figure(figsize=(12, 6))
# plt.plot(real_values, label="Real Values", color="blue")
# plt.plot(forecasting, label="Forecasting", color="red")
# plt.legend()
# plt.show()

# Calculate RMSE and MAE
rmse = sa.rmse(real_values, forecasting)
mae = sa.mae(real_values, forecasting)

print(f"RMSE: {rmse}")
print(f"MAE: {mae}")

#### ARIMAX


In [None]:
model_arimax = sa.ARIMAX(
    data["volume.sales"], data["unit.sales"], model_chosen=(possible_models["6"])
)

sa.test_correlation_residues(model_arimax.resid)