In [12]:
import pandas as pd
import numpy as np
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, mean_squared_error, r2_score


In [13]:
df = pd.read_csv('Book3.csv')

print("Column names:")
for col in df.columns:
    print(col)


Column names:
Time
Turnover ;  Total (State) ;  Food retailing ;
Turnover ;  Total (State) ;  Household goods retailing ;
Turnover ;  Total (State) ;  Clothing, footwear and personal accessory retailing ;
Turnover ;  Total (State) ;  Department stores ;
Turnover ;  Total (State) ;  Other retailing ;
Turnover ;  Total (State) ;  Cafes, restaurants and takeaway food services ;
Turnover ;  Total (State) ;  Total (Industry) ;
Change in Food Retail
Change in goods retail
change in acces retail
change in Dept Store
change in other retail
change in food seervices
change in total industry


In [24]:
df = pd.read_csv("Book3.csv", parse_dates=["Time"], index_col="Time")

# Add this to eliminate the warning
df.index = pd.to_datetime(df.index)
df = df.asfreq('MS')


  df = pd.read_csv("Book3.csv", parse_dates=["Time"], index_col="Time")


In [25]:
target_columns = [
    "Turnover ;  Total (State) ;  Food retailing ;",
    "Turnover ;  Total (State) ;  Household goods retailing ;",
    "Turnover ;  Total (State) ;  Clothing, footwear and personal accessory retailing ;",
    "Turnover ;  Total (State) ;  Department stores ;",
    "Turnover ;  Total (State) ;  Other retailing ;",
    "Turnover ;  Total (State) ;  Cafes, restaurants and takeaway food services ;"
]

In [27]:
for col in target_columns:
    df[col] = pd.to_numeric(df[col], errors="coerce")


In [28]:
from statsmodels.tsa.stattools import adfuller

result = adfuller(df[target_columns[0]].dropna())
print(f'ADF Statistic: {result[0]}')
print(f'p-value: {result[1]}')


ADF Statistic: 3.86139994946027
p-value: 1.0


In [29]:
results = []


In [32]:
for col in target_columns:
    series = df[col].dropna()

    try:
        model = SARIMAX(series, order=(1, 1, 1), seasonal_order=(1, 1, 1, 12),
                        enforce_stationarity=False, enforce_invertibility=False)
        result = model.fit(disp=False)

        y_true = series[-12:]
        y_pred = result.predict(start=len(series) - 12, end=len(series) - 1)

        mae = mean_absolute_error(y_true, y_pred)
        mape = mean_absolute_percentage_error(y_true, y_pred) * 100
        rmse = mean_squared_error(y_true, y_pred) ** 0.5
        r2 = r2_score(y_true, y_pred)

        results.append({
            "Category": col.strip(),
            "MAE": round(mae, 2),
            "MAPE (%)": round(mape, 2),
            "RMSE": round(rmse, 2),
            "R²": round(r2, 4)
        })

    except Exception as e:
        results.append({
            "Category": col.strip(),
            "Error": str(e)
        })


In [33]:
results_df = pd.DataFrame(results)
results_df


Unnamed: 0,Category,Error,MAE,MAPE (%),RMSE,R²
0,Turnover ; Total (State) ; Food retailing ;,got an unexpected keyword argument 'squared',,,,
1,Turnover ; Total (State) ; Household goods r...,got an unexpected keyword argument 'squared',,,,
2,"Turnover ; Total (State) ; Clothing, footwea...",got an unexpected keyword argument 'squared',,,,
3,Turnover ; Total (State) ; Department stores ;,got an unexpected keyword argument 'squared',,,,
4,Turnover ; Total (State) ; Other retailing ;,got an unexpected keyword argument 'squared',,,,
5,"Turnover ; Total (State) ; Cafes, restaurant...",got an unexpected keyword argument 'squared',,,,
6,Turnover ; Total (State) ; Food retailing ;,,166.43,1.17,210.04,0.9428
7,Turnover ; Total (State) ; Household goods r...,,138.87,2.38,173.95,0.9423
8,"Turnover ; Total (State) ; Clothing, footwea...",,63.67,2.33,87.28,0.9734
9,Turnover ; Total (State) ; Department stores ;,,43.73,2.49,55.24,0.9846


* PROPHET MODEL

In [34]:
import pandas as pd
from prophet import Prophet
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, mean_squared_error, r2_score


  from .autonotebook import tqdm as notebook_tqdm


In [35]:
df = pd.read_csv("Book3.csv", parse_dates=["Time"], index_col="Time")

df.index = pd.to_datetime(df.index)
df = df.asfreq('MS')


  df = pd.read_csv("Book3.csv", parse_dates=["Time"], index_col="Time")


In [36]:
target_columns = [
    "Turnover ;  Total (State) ;  Food retailing ;",
    "Turnover ;  Total (State) ;  Household goods retailing ;",
    "Turnover ;  Total (State) ;  Clothing, footwear and personal accessory retailing ;",
    "Turnover ;  Total (State) ;  Department stores ;",
    "Turnover ;  Total (State) ;  Other retailing ;",
    "Turnover ;  Total (State) ;  Cafes, restaurants and takeaway food services ;"
]


In [38]:
results = []

for col in target_columns:
    data = df[[col]].dropna().reset_index()
    data.columns = ['ds', 'y']

    try:
        model = Prophet()
        model.fit(data)

        future = model.make_future_dataframe(periods=12, freq='MS')
        forecast = model.predict(future)

        y_true = data['y'][-12:].values
        y_pred = forecast['yhat'][-24:-12].values

        mae = mean_absolute_error(y_true, y_pred)
        mape = mean_absolute_percentage_error(y_true, y_pred) * 100
        rmse = mean_squared_error(y_true, y_pred) ** 0.5
        r2 = r2_score(y_true, y_pred)

        results.append({
            "Category": col.strip(),
            "MAE": round(mae, 2),
            "MAPE (%)": round(mape, 2),
            "RMSE": round(rmse, 2),
            "R²": round(r2, 4)
        })

    except Exception as e:
        results.append({
            "Category": col.strip(),
            "Error": str(e)
        })


12:23:51 - cmdstanpy - INFO - Chain [1] start processing
12:23:51 - cmdstanpy - INFO - Chain [1] done processing
12:23:51 - cmdstanpy - INFO - Chain [1] start processing
12:23:51 - cmdstanpy - INFO - Chain [1] done processing
12:23:51 - cmdstanpy - INFO - Chain [1] start processing
12:23:51 - cmdstanpy - INFO - Chain [1] done processing
12:23:51 - cmdstanpy - INFO - Chain [1] start processing
12:23:51 - cmdstanpy - INFO - Chain [1] done processing
12:23:52 - cmdstanpy - INFO - Chain [1] start processing
12:23:52 - cmdstanpy - INFO - Chain [1] done processing
12:23:52 - cmdstanpy - INFO - Chain [1] start processing
12:23:52 - cmdstanpy - INFO - Chain [1] done processing


In [39]:
results_df = pd.DataFrame(results)
results_df


Unnamed: 0,Category,MAE,MAPE (%),RMSE,R²
0,Turnover ; Total (State) ; Food retailing ;,332.69,2.24,463.93,0.721
1,Turnover ; Total (State) ; Household goods r...,416.68,7.2,451.27,0.6117
2,"Turnover ; Total (State) ; Clothing, footwea...",170.82,4.86,304.31,0.6769
3,Turnover ; Total (State) ; Department stores ;,83.51,3.64,147.14,0.8906
4,Turnover ; Total (State) ; Other retailing ;,231.66,3.81,328.64,0.7197
5,"Turnover ; Total (State) ; Cafes, restaurant...",351.49,6.31,383.13,-0.8771
