# Forecasting

Get fast, easy model settings (SARIMA) from notebook 3 and predict revenue for all province

In [1]:
# Import
import warnings
warnings.filterwarnings(action='ignore', category=FutureWarning)

from pandas.errors import SettingWithCopyWarning
warnings.filterwarnings(action='ignore', category=SettingWithCopyWarning)

import pandas as pd
import numpy as np
import pmdarima as pm

In [2]:
# Settings
DATA_CLEANED_VARIABLE_PATH = "../data/cleaned_data_variable.csv"
DATA_CLEANED_PATH = "../data/cleaned_data.csv"
THAI_COVID_RAW_DATA_PATH = "../raw_data/data_thai_covid_case.xlsx"
DATA_VARIABLE_OUTPUT_PATH = "../data/forecast_data_variable.csv"
DATA_OUTPUT_PATH = "../data/forecast_data.csv"
FORECASTING_MONTH = 12

In [3]:
data_tourism = pd.read_csv(DATA_CLEANED_PATH)
data_tourism["date"] = pd.to_datetime(data_tourism["date"])
# Our problem is about sales, so in this notebook, we focus on revenue_all
data_tourism_revenue_all  = data_tourism[["date", "province", "region", "revenue_all"]]
## divide by 1e6 
data_tourism_revenue_all["revenue_all"]  = data_tourism_revenue_all["revenue_all"].astype(np.float64) / 1e6
thai_revenue_all = data_tourism_revenue_all.groupby(["date", "province", "region"]).sum("revenue_all").reset_index()

In [4]:
thai_revenue_all

Unnamed: 0,date,province,region,revenue_all
0,2019-01-01,Amnat Charoen,east_northeast,34.510000
1,2019-01-01,Ang Thong,central,132.070000
2,2019-01-01,Bangkok Metropolis,central,81926.490000
3,2019-01-01,Bueng Kan,east_northeast,137.810000
4,2019-01-01,Buri Ram,east_northeast,364.160000
...,...,...,...,...
4461,2023-10-01,Udon Thani,east_northeast,873.069999
4462,2023-10-01,Uthai Thani,north,144.190000
4463,2023-10-01,Uttaradit,north,147.760000
4464,2023-10-01,Yala,south,338.720000


In [5]:
map_province_to_region = {}
for index, row in thai_revenue_all.iterrows():
    map_province_to_region[row["province"]] = row["region"]

## SARIMA

In [6]:
result = []
result_variable = []

for province, region in map_province_to_region.items():
    print(f"Forecasting {province} ....")
    thai_revenue_all_province = thai_revenue_all[thai_revenue_all["province"] == province].sort_values(by="date").reset_index(drop=True)
    train_data = thai_revenue_all_province.set_index("date")

    SARIMAModel = pm.auto_arima(
                            train_data["revenue_all"],
                            start_p=0,
                            start_q=0, 
                            test= "adf",  # use adftest to find optimal 'd'
                            max_p=3,
                            max_q=3,
                            m=12,
                            start_P=0,
                            seasonal=True,
                            D=1,
                            alpha=0.05,
                            trace=False,
                            suppress_warnings=True,
                            stepwise=True)

    forecast, forecast_int = SARIMAModel.predict(n_periods=FORECASTING_MONTH, 
                                      return_conf_int=True,
                                      alpha=0.05)
        
    for idx, data_sample in enumerate(forecast.iteritems()):
        lower_bound, upper_bound = forecast_int[idx]
        change_ratio = np.abs((upper_bound - data_sample[1]) / data_sample[1])  
        result.append([data_sample[0], province, region, data_sample[1] * 1e6, lower_bound * 1e6, upper_bound * 1e6, change_ratio])
        result_variable.append([data_sample[0], province, region, "revenue_all_forecast", data_sample[1] * 1e6])
        result_variable.append([data_sample[0], province, region, "revenue_all_forecast_lower", lower_bound * 1e6])
        result_variable.append([data_sample[0], province, region, "revenue_all_forecast_upper", upper_bound * 1e6])
        result_variable.append([data_sample[0], province, region, "revenue_all_forecast_change_ratio", change_ratio])

Forecasting Amnat Charoen ....
Forecasting Ang Thong ....
Forecasting Bangkok Metropolis ....
Forecasting Bueng Kan ....
Forecasting Buri Ram ....
Forecasting Chachoengsao ....
Forecasting Chai Nat ....
Forecasting Chaiyaphum ....
Forecasting Chanthaburi ....
Forecasting Chiang Mai ....
Forecasting Chiang Rai ....
Forecasting Chon Buri ....
Forecasting Chumphon ....
Forecasting Kalasin ....
Forecasting Kamphaeng Phet ....
Forecasting Kanchanaburi ....
Forecasting Khon Kaen ....
Forecasting Krabi ....
Forecasting Lampang ....
Forecasting Lamphun ....
Forecasting Loei ....
Forecasting Lop Buri ....
Forecasting Mae Hong Son ....
Forecasting Maha Sarakham ....
Forecasting Mukdahan ....
Forecasting Nakhon Nayok ....
Forecasting Nakhon Pathom ....
Forecasting Nakhon Phanom ....
Forecasting Nakhon Ratchasima ....
Forecasting Nakhon Sawan ....
Forecasting Nakhon Si Thammarat ....
Forecasting Nan ....
Forecasting Narathiwat ....
Forecasting Nong Bua Lam Phu ....
Forecasting Nong Khai ....
Forec

In [7]:
df_result_variable = pd.DataFrame(result_variable, columns=["date", "province", "region", "variable", "value"])
df_result_variable["model"] = "SARIMA"
df_result_variable

Unnamed: 0,date,province,region,variable,value,model
0,2023-11-01,Amnat Charoen,east_northeast,revenue_all_forecast,2.428000e+07,SARIMA
1,2023-11-01,Amnat Charoen,east_northeast,revenue_all_forecast_lower,5.517082e+06,SARIMA
2,2023-11-01,Amnat Charoen,east_northeast,revenue_all_forecast_upper,4.304292e+07,SARIMA
3,2023-11-01,Amnat Charoen,east_northeast,revenue_all_forecast_change_ratio,7.727726e-01,SARIMA
4,2023-12-01,Amnat Charoen,east_northeast,revenue_all_forecast,2.694000e+07,SARIMA
...,...,...,...,...,...,...
3691,2024-09-01,Yasothon,east_northeast,revenue_all_forecast_change_ratio,1.706299e+00,SARIMA
3692,2024-10-01,Yasothon,east_northeast,revenue_all_forecast,5.934310e+07,SARIMA
3693,2024-10-01,Yasothon,east_northeast,revenue_all_forecast_lower,-4.659899e+07,SARIMA
3694,2024-10-01,Yasothon,east_northeast,revenue_all_forecast_upper,1.652852e+08,SARIMA


In [8]:
df_result = pd.DataFrame(result, columns=["date", "province", "region", "revenue_all_forecast", "revenue_all_forecast_lower",
                                          "revenue_all_forecast_upper", "revenue_all_forecast_change_ratio"])
df_result["model"] = "SARIMA"
df_result

Unnamed: 0,date,province,region,revenue_all_forecast,revenue_all_forecast_lower,revenue_all_forecast_upper,revenue_all_forecast_change_ratio,model
0,2023-11-01,Amnat Charoen,east_northeast,2.428000e+07,5.517082e+06,4.304292e+07,0.772773,SARIMA
1,2023-12-01,Amnat Charoen,east_northeast,2.694000e+07,4.052273e+05,5.347477e+07,0.984958,SARIMA
2,2024-01-01,Amnat Charoen,east_northeast,2.824000e+07,-4.258327e+06,6.073833e+07,1.150791,SARIMA
3,2024-02-01,Amnat Charoen,east_northeast,2.598000e+07,-1.154584e+07,6.350584e+07,1.444412,SARIMA
4,2024-03-01,Amnat Charoen,east_northeast,2.342000e+07,-1.853516e+07,6.537516e+07,1.791424,SARIMA
...,...,...,...,...,...,...,...,...
919,2024-06-01,Yasothon,east_northeast,6.336960e+07,-2.471556e+07,1.514548e+08,1.390022,SARIMA
920,2024-07-01,Yasothon,east_northeast,6.180383e+07,-3.106442e+07,1.546721e+08,1.502629,SARIMA
921,2024-08-01,Yasothon,east_northeast,6.119584e+07,-3.617571e+07,1.585674e+08,1.591147,SARIMA
922,2024-09-01,Yasothon,east_northeast,5.961885e+07,-4.210876e+07,1.613465e+08,1.706299,SARIMA


In [9]:
## Output
data_cleaned_variable_df = pd.read_csv(DATA_CLEANED_VARIABLE_PATH)

In [10]:
data_cleaned_variable_df

Unnamed: 0,date,province,region,variable,value
0,2019-01-01,Bangkok Metropolis,central,occupancy_rate,9.337000e-01
1,2019-01-01,Lop Buri,central,occupancy_rate,6.132000e-01
2,2019-01-01,Phra Nakhon Si Ayutthaya,central,occupancy_rate,7.337000e-01
3,2019-01-01,Saraburi,central,occupancy_rate,6.733000e-01
4,2019-01-01,Chai Nat,central,occupancy_rate,7.931000e-01
...,...,...,...,...,...
35723,2023-10-01,Roi Et,east_northeast,revenue_foreign,2.410000e+06
35724,2023-10-01,Si Sa Ket,south,revenue_foreign,8.600000e+05
35725,2023-10-01,Surin,east_northeast,revenue_foreign,4.440000e+06
35726,2023-10-01,Amnat Charoen,east_northeast,revenue_foreign,7.400000e+05


In [11]:
df_variables = pd.concat([data_cleaned_variable_df, df_result_variable])
df = pd.concat([data_tourism,  df_result]) 

## SARIMAX

In [12]:
thai_covid_data  = pd.read_excel("../raw_data/data_thai_covid_case.xlsx")

In [13]:
thai_covid_data = thai_covid_data[["date", "new_cases"]]
thai_covid_data["new_cases"] = thai_covid_data["new_cases"].fillna(0)

In [14]:
thai_covid_data["year"] = thai_covid_data["date"].dt.year
thai_covid_data["month"] = thai_covid_data["date"].dt.month


In [15]:
thai_covid_data_groupby = thai_covid_data.groupby(["year", "month"]).sum("new_cases").reset_index()
thai_covid_data_groupby["date"] = pd.to_datetime(dict(year=thai_covid_data_groupby.year, month=thai_covid_data_groupby.month, day=1))
thai_covid_data_groupby = thai_covid_data_groupby[["date", "new_cases"]]
new_date_data = list(pd.date_range(start=thai_revenue_all["date"].min(), freq="MS", end=thai_covid_data_groupby["date"].min()))

In [16]:
list_added_data = []
for sample_data in new_date_data:
    list_added_data.append({"date": sample_data, "new_cases": 0})

In [17]:
thai_covid_data_groupby = thai_covid_data_groupby.append(list_added_data)
thai_covid_data_groupby.sort_values(by = "date", inplace=True)

In [18]:
covid_data = thai_covid_data_groupby.set_index("date")
train_covid_data = covid_data[:len(thai_revenue_all["date"].unique())].values
forecast_covid_data = np.ones((FORECASTING_MONTH, 1 )) * covid_data["new_cases"][-1]

In [19]:
result = []
result_variable = []

for province, region in map_province_to_region.items():
    print(f"Forecasting {province} ....")
    thai_revenue_all_province = thai_revenue_all[thai_revenue_all["province"] == province].sort_values(by="date").reset_index(drop=True)
    train_data = thai_revenue_all_province.set_index("date")
    
    SARIMAXModel = pm.auto_arima(
                            y=train_data["revenue_all"],
                            X=train_covid_data,
                            start_p=0,
                            start_q=0, 
                            test= "adf",  # use adftest to find optimal 'd'
                            max_p=3,
                            max_q=3,
                            m=12,
                            start_P=0,
                            seasonal=True,
                            D=1,
                            alpha=0.05,
                            trace=False,
                            suppress_warnings=True,
                            error_action="ignore",
                            stepwise=True)

    forecast, forecast_int = SARIMAXModel.predict(n_periods=FORECASTING_MONTH, 
                                                  X=forecast_covid_data,
                                      return_conf_int=True,
                                      alpha=0.05)
        
    for idx, data_sample in enumerate(forecast.iteritems()):
        lower_bound, upper_bound = forecast_int[idx]
        change_ratio = np.abs((upper_bound - data_sample[1]) / data_sample[1])  
        result.append([data_sample[0], province, region, data_sample[1] * 1e6, lower_bound * 1e6, upper_bound * 1e6, change_ratio])
        result_variable.append([data_sample[0], province, region, "revenue_all_forecast", data_sample[1] * 1e6])
        result_variable.append([data_sample[0], province, region, "revenue_all_forecast_lower", lower_bound * 1e6])
        result_variable.append([data_sample[0], province, region, "revenue_all_forecast_upper", upper_bound * 1e6])
        result_variable.append([data_sample[0], province, region, "revenue_all_forecast_change_ratio", change_ratio])

Forecasting Amnat Charoen ....


  return np.roots(self.polynomial_reduced_ma)**-1
  return np.roots(self.polynomial_reduced_ma)**-1
  return np.roots(self.polynomial_reduced_ma)**-1
  return np.roots(self.polynomial_reduced_ma)**-1
  return np.roots(self.polynomial_reduced_ar)**-1
  return np.roots(self.polynomial_reduced_ma)**-1
  return np.roots(self.polynomial_reduced_ar)**-1
  return np.roots(self.polynomial_reduced_ma)**-1


Forecasting Ang Thong ....


  return np.roots(self.polynomial_reduced_ma)**-1
  return np.roots(self.polynomial_reduced_ma)**-1
  return np.roots(self.polynomial_reduced_ma)**-1


Forecasting Bangkok Metropolis ....
Forecasting Bueng Kan ....
Forecasting Buri Ram ....
Forecasting Chachoengsao ....


  return np.roots(self.polynomial_reduced_ma)**-1


Forecasting Chai Nat ....


  return np.roots(self.polynomial_reduced_ma)**-1


Forecasting Chaiyaphum ....
Forecasting Chanthaburi ....
Forecasting Chiang Mai ....
Forecasting Chiang Rai ....
Forecasting Chon Buri ....
Forecasting Chumphon ....
Forecasting Kalasin ....


  return np.roots(self.polynomial_reduced_ma)**-1
  return np.roots(self.polynomial_reduced_ar)**-1
  return np.roots(self.polynomial_reduced_ma)**-1
  return np.roots(self.polynomial_reduced_ma)**-1
  return np.roots(self.polynomial_reduced_ar)**-1
  return np.roots(self.polynomial_reduced_ma)**-1
  return np.roots(self.polynomial_reduced_ar)**-1
  return np.roots(self.polynomial_reduced_ma)**-1


Forecasting Kamphaeng Phet ....
Forecasting Kanchanaburi ....
Forecasting Khon Kaen ....
Forecasting Krabi ....
Forecasting Lampang ....
Forecasting Lamphun ....


  return np.roots(self.polynomial_reduced_ma)**-1
  return np.roots(self.polynomial_reduced_ma)**-1


Forecasting Loei ....
Forecasting Lop Buri ....


  return np.roots(self.polynomial_reduced_ma)**-1


Forecasting Mae Hong Son ....
Forecasting Maha Sarakham ....


  return np.roots(self.polynomial_reduced_ar)**-1
  return np.roots(self.polynomial_reduced_ma)**-1


Forecasting Mukdahan ....


  return np.roots(self.polynomial_reduced_ma)**-1


Forecasting Nakhon Nayok ....
Forecasting Nakhon Pathom ....
Forecasting Nakhon Phanom ....


  return np.roots(self.polynomial_reduced_ma)**-1


Forecasting Nakhon Ratchasima ....
Forecasting Nakhon Sawan ....


  return np.roots(self.polynomial_reduced_ma)**-1


Forecasting Nakhon Si Thammarat ....
Forecasting Nan ....
Forecasting Narathiwat ....


  return np.roots(self.polynomial_reduced_ma)**-1
  return np.roots(self.polynomial_reduced_ma)**-1
  return np.roots(self.polynomial_reduced_ma)**-1


Forecasting Nong Bua Lam Phu ....


  return np.roots(self.polynomial_reduced_ma)**-1


Forecasting Nong Khai ....
Forecasting Nonthaburi ....


  return np.roots(self.polynomial_reduced_ma)**-1


Forecasting Pathum Thani ....


  return np.roots(self.polynomial_reduced_ma)**-1


Forecasting Pattani ....


  return np.roots(self.polynomial_reduced_ma)**-1
  return np.roots(self.polynomial_reduced_ma)**-1


Forecasting Phangnga ....
Forecasting Phatthalung ....


  return np.roots(self.polynomial_reduced_ma)**-1


Forecasting Phayao ....


  return np.roots(self.polynomial_reduced_ma)**-1


Forecasting Phetchabun ....
Forecasting Phetchaburi ....
Forecasting Phichit ....


  return np.roots(self.polynomial_reduced_ma)**-1


Forecasting Phitsanulok ....


  return np.roots(self.polynomial_reduced_ma)**-1


Forecasting Phra Nakhon Si Ayutthaya ....
Forecasting Phrae ....


  return np.roots(self.polynomial_reduced_ma)**-1


Forecasting Phuket ....
Forecasting Prachin Buri ....
Forecasting Prachuap Khiri Khan ....
Forecasting Ranong ....


  return np.roots(self.polynomial_reduced_ma)**-1
  return np.roots(self.polynomial_reduced_ma)**-1


Forecasting Ratchaburi ....


  return np.roots(self.polynomial_reduced_ma)**-1


Forecasting Rayong ....
Forecasting Roi Et ....


  return np.roots(self.polynomial_reduced_ma)**-1


Forecasting Sa Kaeo ....
Forecasting Sakon Nakhon ....


  return np.roots(self.polynomial_reduced_ma)**-1
  return np.roots(self.polynomial_reduced_ma)**-1


Forecasting Samut Prakan ....
Forecasting Samut Sakhon ....


  return np.roots(self.polynomial_reduced_ma)**-1


Forecasting Samut Songkhram ....


  return np.roots(self.polynomial_reduced_ma)**-1


Forecasting Saraburi ....
Forecasting Satun ....
Forecasting Si Sa Ket ....


  return np.roots(self.polynomial_reduced_ma)**-1


Forecasting Sing Buri ....


  return np.roots(self.polynomial_reduced_ma)**-1


Forecasting Songkhla ....
Forecasting Sukhothai ....


  return np.roots(self.polynomial_reduced_ma)**-1


Forecasting Suphan Buri ....


  return np.roots(self.polynomial_reduced_ma)**-1
  return np.roots(self.polynomial_reduced_ma)**-1


Forecasting Surat Thani ....
Forecasting Surin ....


  return np.roots(self.polynomial_reduced_ma)**-1


Forecasting Tak ....
Forecasting Trang ....
Forecasting Trat ....
Forecasting Ubon Ratchathani ....
Forecasting Udon Thani ....
Forecasting Uthai Thani ....


  return np.roots(self.polynomial_reduced_ma)**-1


Forecasting Uttaradit ....


  return np.roots(self.polynomial_reduced_ma)**-1


Forecasting Yala ....


  return np.roots(self.polynomial_reduced_ma)**-1


Forecasting Yasothon ....


  return np.roots(self.polynomial_reduced_ma)**-1
  return np.roots(self.polynomial_reduced_ma)**-1


In [20]:
df_result_variable = pd.DataFrame(result_variable, columns=["date", "province", "region", "variable", "value"])
df_result_variable["model"] = "SARIMAX"
df_result_variable

Unnamed: 0,date,province,region,variable,value,model
0,2023-11-01,Amnat Charoen,east_northeast,revenue_all_forecast,2.431271e+07,SARIMAX
1,2023-11-01,Amnat Charoen,east_northeast,revenue_all_forecast_lower,5.354707e+06,SARIMAX
2,2023-11-01,Amnat Charoen,east_northeast,revenue_all_forecast_upper,4.327072e+07,SARIMAX
3,2023-11-01,Amnat Charoen,east_northeast,revenue_all_forecast_change_ratio,7.797569e-01,SARIMAX
4,2023-12-01,Amnat Charoen,east_northeast,revenue_all_forecast,2.696422e+07,SARIMAX
...,...,...,...,...,...,...
3691,2024-09-01,Yasothon,east_northeast,revenue_all_forecast_change_ratio,1.744079e+00,SARIMAX
3692,2024-10-01,Yasothon,east_northeast,revenue_all_forecast,4.381629e+07,SARIMAX
3693,2024-10-01,Yasothon,east_northeast,revenue_all_forecast_lower,-3.423415e+07,SARIMAX
3694,2024-10-01,Yasothon,east_northeast,revenue_all_forecast_upper,1.218667e+08,SARIMAX


In [21]:
df_result = pd.DataFrame(result, columns=["date", "province", "region", "revenue_all_forecast", "revenue_all_forecast_lower",
                                          "revenue_all_forecast_upper", "revenue_all_forecast_change_ratio"])
df_result["model"] = "SARIMAX"
df_result

Unnamed: 0,date,province,region,revenue_all_forecast,revenue_all_forecast_lower,revenue_all_forecast_upper,revenue_all_forecast_change_ratio,model
0,2023-11-01,Amnat Charoen,east_northeast,2.431271e+07,5.354707e+06,4.327072e+07,0.779757,SARIMAX
1,2023-12-01,Amnat Charoen,east_northeast,2.696422e+07,-4.640587e+04,5.397485e+07,1.001721,SARIMAX
2,2024-01-01,Amnat Charoen,east_northeast,2.825995e+07,-4.904838e+06,6.142474e+07,1.173561,SARIMAX
3,2024-02-01,Amnat Charoen,east_northeast,2.603087e+07,-1.231278e+07,6.437451e+07,1.473007,SARIMAX
4,2024-03-01,Amnat Charoen,east_northeast,2.347544e+07,-1.942639e+07,6.637727e+07,1.827520,SARIMAX
...,...,...,...,...,...,...,...,...
919,2024-06-01,Yasothon,east_northeast,4.591425e+07,-2.127386e+07,1.131023e+08,1.463339,SARIMAX
920,2024-07-01,Yasothon,east_northeast,4.345563e+07,-2.680604e+07,1.137173e+08,1.616860,SARIMAX
921,2024-08-01,Yasothon,east_northeast,4.372401e+07,-2.934191e+07,1.167899e+08,1.671071,SARIMAX
922,2024-09-01,Yasothon,east_northeast,4.336167e+07,-3.226453e+07,1.189879e+08,1.744079,SARIMAX


In [22]:
df_variables = pd.concat([df_variables, df_result_variable])
df = pd.concat([df, df_result]) 

## Prophet

In [23]:
train_data = thai_revenue_all.rename(columns={"date": "ds", "revenue_all": "y"})

In [24]:
df_covid = train_data.copy()
df_covid['pre_covid'] = pd.to_datetime(df_covid['ds']) < pd.to_datetime('2020-03-21')
df_covid['post_covid'] = ~df_covid['pre_covid']

In [25]:
train_covid_data = covid_data.reset_index().rename(columns={"date": "ds", "new_cases": "covid_case"}).merge(df_covid, on="ds")

In [26]:
import datetime


lockdowns = pd.DataFrame([
    {'holiday': 'lockdown_1', 'ds': '2020-03-01', 'lower_window': 0, 'ds_upper': '2020-06-01'},
    {'holiday': 'lockdown_2', 'ds': '2021-05-01', 'lower_window': 0, 'ds_upper': '2021-09-01'},
])

for t_col in ['ds', 'ds_upper']:
    lockdowns[t_col] = pd.to_datetime(lockdowns[t_col])
lockdowns['upper_window'] = (lockdowns['ds_upper'] - lockdowns['ds']).dt.days
lockdowns


changepoints = (
    # 5 potential changepoints this time
    pd.date_range('2020-07-01', '2021-04-01', periods=5).date.tolist() + 
    [datetime.date(2021, 9, 1)]
    # 1 potential changepoints in 1 year 2 months
)


In [27]:
from prophet import Prophet


result = []
result_variable = []

for province, region in map_province_to_region.items():
    print(f"Forecasting {province} ....")
    train_covid_data_province = train_covid_data[train_covid_data["province"] == province].sort_values(by="ds").reset_index(drop=True)
    train_data = train_covid_data_province
    
        
    prophet_model = Prophet(holidays=lockdowns, 
                            yearly_seasonality=False, 
                            changepoints=changepoints, 
                            changepoint_prior_scale=0.15)

    prophet_model.add_seasonality(
        name='yearly_pre_covid',
        period=365,
        fourier_order=10,
        condition_name='pre_covid',
        prior_scale=30
    )

    prophet_model.add_seasonality(
        name='yearly_post_covid',
        period=365,
        fourier_order=10,
        condition_name='post_covid',
        prior_scale=80
    )

    prophet_model.fit(train_data)

    future = prophet_model.make_future_dataframe(periods=FORECASTING_MONTH,freq='MS')
    future['pre_covid'] = pd.to_datetime(future['ds']) < pd.to_datetime('2020-03-21')
    future['post_covid'] = ~future['pre_covid']
    future['covid_case'] = 680

    test_prediced_result = prophet_model.predict(future)

    for index, row in test_prediced_result.reset_index().sort_values(by="ds")[-FORECASTING_MONTH:].iterrows():
        yhat, yhat_lower, yhat_upper  = row["yhat"], row["yhat_lower"],  row["yhat_upper"]
        change_ratio = np.abs((yhat_upper - yhat) / yhat)
        result.append([row["ds"], province, region, yhat * 1e6, yhat_lower * 1e6, yhat_upper * 1e6, change_ratio])
        
        result_variable.append([data_sample[0], province, region, "revenue_all_forecast", yhat * 1e6])
        result_variable.append([data_sample[0], province, region, "revenue_all_forecast_lower", yhat_lower * 1e6])
        result_variable.append([data_sample[0], province, region, "revenue_all_forecast_upper", yhat_upper * 1e6])
        result_variable.append([data_sample[0], province, region, "revenue_all_forecast_change_ratio", change_ratio])

Importing plotly failed. Interactive plots will not work.


Forecasting Amnat Charoen ....


05:13:38 - cmdstanpy - INFO - Chain [1] start processing
05:13:42 - cmdstanpy - INFO - Chain [1] done processing
05:13:43 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Ang Thong ....


05:13:46 - cmdstanpy - INFO - Chain [1] done processing
05:13:46 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Bangkok Metropolis ....


05:13:50 - cmdstanpy - INFO - Chain [1] done processing
05:13:51 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Bueng Kan ....


05:13:53 - cmdstanpy - INFO - Chain [1] done processing
05:13:53 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Buri Ram ....


05:13:56 - cmdstanpy - INFO - Chain [1] done processing
05:13:56 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Chachoengsao ....


05:13:59 - cmdstanpy - INFO - Chain [1] done processing
05:14:00 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Chai Nat ....


05:14:02 - cmdstanpy - INFO - Chain [1] done processing
05:14:03 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Chaiyaphum ....


05:14:06 - cmdstanpy - INFO - Chain [1] done processing
05:14:06 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Chanthaburi ....


05:14:09 - cmdstanpy - INFO - Chain [1] done processing
05:14:09 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Chiang Mai ....


05:14:13 - cmdstanpy - INFO - Chain [1] done processing
05:14:14 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Chiang Rai ....


05:14:17 - cmdstanpy - INFO - Chain [1] done processing
05:14:18 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Chon Buri ....


05:14:21 - cmdstanpy - INFO - Chain [1] done processing
05:14:21 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Chumphon ....


05:14:25 - cmdstanpy - INFO - Chain [1] done processing
05:14:25 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Kalasin ....


05:14:28 - cmdstanpy - INFO - Chain [1] done processing
05:14:29 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Kamphaeng Phet ....


05:14:32 - cmdstanpy - INFO - Chain [1] done processing
05:14:32 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Kanchanaburi ....


05:14:36 - cmdstanpy - INFO - Chain [1] done processing
05:14:36 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Khon Kaen ....


05:14:42 - cmdstanpy - INFO - Chain [1] done processing


Forecasting Krabi ....


05:14:43 - cmdstanpy - INFO - Chain [1] start processing
05:14:45 - cmdstanpy - INFO - Chain [1] done processing
05:14:45 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Lampang ....


05:14:48 - cmdstanpy - INFO - Chain [1] done processing
05:14:49 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Lamphun ....


05:14:53 - cmdstanpy - INFO - Chain [1] done processing
05:14:54 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Loei ....


05:14:57 - cmdstanpy - INFO - Chain [1] done processing
05:14:57 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Lop Buri ....


05:15:01 - cmdstanpy - INFO - Chain [1] done processing
05:15:01 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Mae Hong Son ....


05:15:03 - cmdstanpy - INFO - Chain [1] done processing
05:15:03 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Maha Sarakham ....


05:15:05 - cmdstanpy - INFO - Chain [1] done processing
05:15:06 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Mukdahan ....


05:15:08 - cmdstanpy - INFO - Chain [1] done processing
05:15:08 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Nakhon Nayok ....


05:15:11 - cmdstanpy - INFO - Chain [1] done processing
05:15:12 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Nakhon Pathom ....


05:15:16 - cmdstanpy - INFO - Chain [1] done processing
05:15:16 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Nakhon Phanom ....


05:15:19 - cmdstanpy - INFO - Chain [1] done processing
05:15:20 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Nakhon Ratchasima ....


05:15:24 - cmdstanpy - INFO - Chain [1] done processing
05:15:24 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Nakhon Sawan ....


05:15:27 - cmdstanpy - INFO - Chain [1] done processing
05:15:27 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Nakhon Si Thammarat ....


05:15:31 - cmdstanpy - INFO - Chain [1] done processing
05:15:31 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Nan ....


05:15:36 - cmdstanpy - INFO - Chain [1] done processing
05:15:36 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Narathiwat ....


05:15:39 - cmdstanpy - INFO - Chain [1] done processing
05:15:40 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Nong Bua Lam Phu ....


05:15:42 - cmdstanpy - INFO - Chain [1] done processing
05:15:42 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Nong Khai ....


05:15:45 - cmdstanpy - INFO - Chain [1] done processing
05:15:45 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Nonthaburi ....


05:15:47 - cmdstanpy - INFO - Chain [1] done processing
05:15:48 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Pathum Thani ....


05:15:51 - cmdstanpy - INFO - Chain [1] done processing
05:15:51 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Pattani ....


05:15:54 - cmdstanpy - INFO - Chain [1] done processing
05:15:54 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Phangnga ....


05:15:56 - cmdstanpy - INFO - Chain [1] done processing
05:15:56 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Phatthalung ....


05:15:59 - cmdstanpy - INFO - Chain [1] done processing
05:15:59 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Phayao ....


05:16:01 - cmdstanpy - INFO - Chain [1] done processing
05:16:02 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Phetchabun ....


05:16:05 - cmdstanpy - INFO - Chain [1] done processing
05:16:05 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Phetchaburi ....


05:16:08 - cmdstanpy - INFO - Chain [1] done processing
05:16:09 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Phichit ....


05:16:10 - cmdstanpy - INFO - Chain [1] done processing
05:16:11 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Phitsanulok ....


05:16:14 - cmdstanpy - INFO - Chain [1] done processing
05:16:14 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Phra Nakhon Si Ayutthaya ....


05:16:16 - cmdstanpy - INFO - Chain [1] done processing
05:16:17 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Phrae ....


05:16:19 - cmdstanpy - INFO - Chain [1] done processing
05:16:19 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Phuket ....


05:16:21 - cmdstanpy - INFO - Chain [1] done processing
05:16:21 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Prachin Buri ....


05:16:27 - cmdstanpy - INFO - Chain [1] done processing
05:16:27 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Prachuap Khiri Khan ....


05:16:31 - cmdstanpy - INFO - Chain [1] done processing
05:16:32 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Ranong ....


05:16:34 - cmdstanpy - INFO - Chain [1] done processing
05:16:34 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Ratchaburi ....


05:16:37 - cmdstanpy - INFO - Chain [1] done processing
05:16:37 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Rayong ....


05:16:40 - cmdstanpy - INFO - Chain [1] done processing
05:16:41 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Roi Et ....


05:16:45 - cmdstanpy - INFO - Chain [1] done processing
05:16:45 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Sa Kaeo ....


05:16:48 - cmdstanpy - INFO - Chain [1] done processing
05:16:48 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Sakon Nakhon ....


05:16:51 - cmdstanpy - INFO - Chain [1] done processing
05:16:51 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Samut Prakan ....


05:16:53 - cmdstanpy - INFO - Chain [1] done processing
05:16:54 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Samut Sakhon ....


05:16:57 - cmdstanpy - INFO - Chain [1] done processing
05:16:57 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Samut Songkhram ....


05:17:00 - cmdstanpy - INFO - Chain [1] done processing
05:17:00 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Saraburi ....


05:17:02 - cmdstanpy - INFO - Chain [1] done processing
05:17:02 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Satun ....


05:17:06 - cmdstanpy - INFO - Chain [1] done processing
05:17:06 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Si Sa Ket ....


05:17:09 - cmdstanpy - INFO - Chain [1] done processing
05:17:09 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Sing Buri ....


05:17:11 - cmdstanpy - INFO - Chain [1] done processing
05:17:11 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Songkhla ....


05:17:16 - cmdstanpy - INFO - Chain [1] done processing
05:17:16 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Sukhothai ....


05:17:19 - cmdstanpy - INFO - Chain [1] done processing
05:17:20 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Suphan Buri ....


05:17:25 - cmdstanpy - INFO - Chain [1] done processing
05:17:25 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Surat Thani ....


05:17:31 - cmdstanpy - INFO - Chain [1] done processing
05:17:31 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Surin ....


05:17:35 - cmdstanpy - INFO - Chain [1] done processing
05:17:35 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Tak ....


05:17:38 - cmdstanpy - INFO - Chain [1] done processing
05:17:38 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Trang ....


05:17:42 - cmdstanpy - INFO - Chain [1] done processing
05:17:42 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Trat ....


05:17:45 - cmdstanpy - INFO - Chain [1] done processing
05:17:45 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Ubon Ratchathani ....


05:17:50 - cmdstanpy - INFO - Chain [1] done processing
05:17:51 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Udon Thani ....


05:17:54 - cmdstanpy - INFO - Chain [1] done processing
05:17:54 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Uthai Thani ....


05:17:58 - cmdstanpy - INFO - Chain [1] done processing
05:17:58 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Uttaradit ....


05:18:01 - cmdstanpy - INFO - Chain [1] done processing
05:18:01 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Yala ....


05:18:04 - cmdstanpy - INFO - Chain [1] done processing
05:18:05 - cmdstanpy - INFO - Chain [1] start processing


Forecasting Yasothon ....


05:18:08 - cmdstanpy - INFO - Chain [1] done processing


In [33]:
df_result_variable = pd.DataFrame(result_variable, columns=["date", "province", "region", "variable", "value"])
df_result_variable["model"] = "Prophet"
df_result_variable

Unnamed: 0,date,province,region,variable,value,model
0,2024-10-01,Amnat Charoen,east_northeast,revenue_all_forecast,2.788035e+07,Prophet
1,2024-10-01,Amnat Charoen,east_northeast,revenue_all_forecast_lower,2.264024e+07,Prophet
2,2024-10-01,Amnat Charoen,east_northeast,revenue_all_forecast_upper,3.311344e+07,Prophet
3,2024-10-01,Amnat Charoen,east_northeast,revenue_all_forecast_change_ratio,1.876979e-01,Prophet
4,2024-10-01,Amnat Charoen,east_northeast,revenue_all_forecast,3.574062e+07,Prophet
...,...,...,...,...,...,...
3691,2024-10-01,Yasothon,east_northeast,revenue_all_forecast_change_ratio,2.668407e-01,Prophet
3692,2024-10-01,Yasothon,east_northeast,revenue_all_forecast,7.640516e+07,Prophet
3693,2024-10-01,Yasothon,east_northeast,revenue_all_forecast_lower,7.048277e+07,Prophet
3694,2024-10-01,Yasothon,east_northeast,revenue_all_forecast_upper,8.239466e+07,Prophet


In [29]:
df_result = pd.DataFrame(result, columns=["date", "province", "region", "revenue_all_forecast", "revenue_all_forecast_lower",
                                          "revenue_all_forecast_upper", "revenue_all_forecast_change_ratio"])
df_result["model"] = "Prophet"
df_result

Unnamed: 0,date,province,region,revenue_all_forecast,revenue_all_forecast_lower,revenue_all_forecast_upper,revenue_all_forecast_change_ratio,model
0,2023-11-01,Amnat Charoen,east_northeast,2.788035e+07,2.264024e+07,3.311344e+07,0.187698,Prophet
1,2023-12-01,Amnat Charoen,east_northeast,3.574062e+07,3.013518e+07,4.117264e+07,0.151984,Prophet
2,2024-01-01,Amnat Charoen,east_northeast,3.177706e+07,2.650863e+07,3.706013e+07,0.166254,Prophet
3,2024-02-01,Amnat Charoen,east_northeast,2.794022e+07,2.295051e+07,3.336793e+07,0.194261,Prophet
4,2024-03-01,Amnat Charoen,east_northeast,5.368480e+07,4.871589e+07,5.909614e+07,0.100798,Prophet
...,...,...,...,...,...,...,...,...
919,2024-06-01,Yasothon,east_northeast,2.439746e+07,1.875801e+07,3.017744e+07,0.236909,Prophet
920,2024-07-01,Yasothon,east_northeast,5.998031e+07,5.473012e+07,6.559584e+07,0.093623,Prophet
921,2024-08-01,Yasothon,east_northeast,4.026298e+07,3.461346e+07,4.612074e+07,0.145487,Prophet
922,2024-09-01,Yasothon,east_northeast,2.285483e+07,1.697228e+07,2.895343e+07,0.266841,Prophet


In [34]:
df_variables = pd.concat([df_variables, df_result_variable])
df = pd.concat([df, df_result]) 

In [40]:
df_variables

Unnamed: 0,date,province,region,variable,value,model
0,2019-01-01,Bangkok Metropolis,central,occupancy_rate,9.337000e-01,
1,2019-01-01,Lop Buri,central,occupancy_rate,6.132000e-01,
2,2019-01-01,Phra Nakhon Si Ayutthaya,central,occupancy_rate,7.337000e-01,
3,2019-01-01,Saraburi,central,occupancy_rate,6.733000e-01,
4,2019-01-01,Chai Nat,central,occupancy_rate,7.931000e-01,
...,...,...,...,...,...,...
2767,2024-10-01 00:00:00,Samut Sakhon,central,revenue_all_forecast_change_ratio,5.927104e-01,Prophet
2768,2024-10-01 00:00:00,Samut Sakhon,central,revenue_all_forecast,2.688318e+08,Prophet
2769,2024-10-01 00:00:00,Samut Sakhon,central,revenue_all_forecast_lower,2.466960e+08,Prophet
2770,2024-10-01 00:00:00,Samut Sakhon,central,revenue_all_forecast_upper,2.899011e+08,Prophet


In [44]:
df

Unnamed: 0,date,province,region,occupancy_rate,no_tourist_occupied,no_tourist_all,no_tourist_thai,no_tourist_foreign,revenue_all,revenue_thai,revenue_foreign,revenue_all_forecast,revenue_all_forecast_lower,revenue_all_forecast_upper,revenue_all_forecast_change_ratio,model
0,2019-01-01,Bangkok Metropolis,central,0.9337,3334971.0,5959075.0,3534061.0,2425014.0,8.192649e+10,2.974258e+10,5.218391e+10,,,,,
1,2019-01-01,Lop Buri,central,0.6132,51858.0,268664.0,266301.0,2363.0,4.572400e+08,4.518300e+08,5.410000e+06,,,,,
2,2019-01-01,Phra Nakhon Si Ayutthaya,central,0.7337,117052.0,730329.0,561553.0,168776.0,1.438730e+09,1.054250e+09,3.844800e+08,,,,,
3,2019-01-01,Saraburi,central,0.6733,89850.0,207236.0,201400.0,5836.0,3.477900e+08,3.361900e+08,1.160000e+07,,,,,
4,2019-01-01,Chai Nat,central,0.7931,27141.0,79073.0,78514.0,559.0,1.017900e+08,1.009000e+08,8.900000e+05,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
919,2024-06-01,Yasothon,east_northeast,,,,,,,,,2.439746e+07,1.875801e+07,3.017744e+07,0.236909,Prophet
920,2024-07-01,Yasothon,east_northeast,,,,,,,,,5.998031e+07,5.473012e+07,6.559584e+07,0.093623,Prophet
921,2024-08-01,Yasothon,east_northeast,,,,,,,,,4.026298e+07,3.461346e+07,4.612074e+07,0.145487,Prophet
922,2024-09-01,Yasothon,east_northeast,,,,,,,,,2.285483e+07,1.697228e+07,2.895343e+07,0.266841,Prophet


# Output results

In [45]:
df_variables["date"] = pd.to_datetime(df_variables["date"])
df["date"] = pd.to_datetime(df["date"])

In [46]:
df = df.where(pd.notnull(df), None)
df

Unnamed: 0,date,province,region,occupancy_rate,no_tourist_occupied,no_tourist_all,no_tourist_thai,no_tourist_foreign,revenue_all,revenue_thai,revenue_foreign,revenue_all_forecast,revenue_all_forecast_lower,revenue_all_forecast_upper,revenue_all_forecast_change_ratio,model
0,2019-01-01,Bangkok Metropolis,central,0.9337,3334971.0,5959075.0,3534061.0,2425014.0,8.192649e+10,2.974258e+10,5.218391e+10,,,,,
1,2019-01-01,Lop Buri,central,0.6132,51858.0,268664.0,266301.0,2363.0,4.572400e+08,4.518300e+08,5.410000e+06,,,,,
2,2019-01-01,Phra Nakhon Si Ayutthaya,central,0.7337,117052.0,730329.0,561553.0,168776.0,1.438730e+09,1.054250e+09,3.844800e+08,,,,,
3,2019-01-01,Saraburi,central,0.6733,89850.0,207236.0,201400.0,5836.0,3.477900e+08,3.361900e+08,1.160000e+07,,,,,
4,2019-01-01,Chai Nat,central,0.7931,27141.0,79073.0,78514.0,559.0,1.017900e+08,1.009000e+08,8.900000e+05,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
919,2024-06-01,Yasothon,east_northeast,,,,,,,,,2.439746e+07,1.875801e+07,3.017744e+07,0.236909,Prophet
920,2024-07-01,Yasothon,east_northeast,,,,,,,,,5.998031e+07,5.473012e+07,6.559584e+07,0.093623,Prophet
921,2024-08-01,Yasothon,east_northeast,,,,,,,,,4.026298e+07,3.461346e+07,4.612074e+07,0.145487,Prophet
922,2024-09-01,Yasothon,east_northeast,,,,,,,,,2.285483e+07,1.697228e+07,2.895343e+07,0.266841,Prophet


In [47]:
df_variables.to_csv(DATA_VARIABLE_OUTPUT_PATH, index=False)

In [48]:
df.to_csv(DATA_OUTPUT_PATH, index=False)

In [49]:
df[df["model"] == "SARIMAX"]

Unnamed: 0,date,province,region,occupancy_rate,no_tourist_occupied,no_tourist_all,no_tourist_thai,no_tourist_foreign,revenue_all,revenue_thai,revenue_foreign,revenue_all_forecast,revenue_all_forecast_lower,revenue_all_forecast_upper,revenue_all_forecast_change_ratio,model
0,2023-11-01,Amnat Charoen,east_northeast,,,,,,,,,2.431271e+07,5.354707e+06,4.327072e+07,0.779757,SARIMAX
1,2023-12-01,Amnat Charoen,east_northeast,,,,,,,,,2.696422e+07,-4.640587e+04,5.397485e+07,1.001721,SARIMAX
2,2024-01-01,Amnat Charoen,east_northeast,,,,,,,,,2.825995e+07,-4.904838e+06,6.142474e+07,1.173561,SARIMAX
3,2024-02-01,Amnat Charoen,east_northeast,,,,,,,,,2.603087e+07,-1.231278e+07,6.437451e+07,1.473007,SARIMAX
4,2024-03-01,Amnat Charoen,east_northeast,,,,,,,,,2.347544e+07,-1.942639e+07,6.637727e+07,1.827520,SARIMAX
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
919,2024-06-01,Yasothon,east_northeast,,,,,,,,,4.591425e+07,-2.127386e+07,1.131023e+08,1.463339,SARIMAX
920,2024-07-01,Yasothon,east_northeast,,,,,,,,,4.345563e+07,-2.680604e+07,1.137173e+08,1.616860,SARIMAX
921,2024-08-01,Yasothon,east_northeast,,,,,,,,,4.372401e+07,-2.934191e+07,1.167899e+08,1.671071,SARIMAX
922,2024-09-01,Yasothon,east_northeast,,,,,,,,,4.336167e+07,-3.226453e+07,1.189879e+08,1.744079,SARIMAX


# Format Results

In [50]:
FORECAST_DATA_INPUT = "../data/forecast_data.csv"
FORECAST_VARIABLE_DATA_INPUT = "../data/forecast_data_variable.csv"

In [51]:
import pandas as pd 

forecast_data = pd.read_csv(FORECAST_DATA_INPUT)
forecast_variable = pd.read_csv(FORECAST_VARIABLE_DATA_INPUT)

In [52]:
forecast_data 

Unnamed: 0,date,province,region,occupancy_rate,no_tourist_occupied,no_tourist_all,no_tourist_thai,no_tourist_foreign,revenue_all,revenue_thai,revenue_foreign,revenue_all_forecast,revenue_all_forecast_lower,revenue_all_forecast_upper,revenue_all_forecast_change_ratio,model
0,2019-01-01,Bangkok Metropolis,central,0.9337,3334971.0,5959075.0,3534061.0,2425014.0,8.192649e+10,2.974258e+10,5.218391e+10,,,,,
1,2019-01-01,Lop Buri,central,0.6132,51858.0,268664.0,266301.0,2363.0,4.572400e+08,4.518300e+08,5.410000e+06,,,,,
2,2019-01-01,Phra Nakhon Si Ayutthaya,central,0.7337,117052.0,730329.0,561553.0,168776.0,1.438730e+09,1.054250e+09,3.844800e+08,,,,,
3,2019-01-01,Saraburi,central,0.6733,89850.0,207236.0,201400.0,5836.0,3.477900e+08,3.361900e+08,1.160000e+07,,,,,
4,2019-01-01,Chai Nat,central,0.7931,27141.0,79073.0,78514.0,559.0,1.017900e+08,1.009000e+08,8.900000e+05,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7233,2024-06-01,Yasothon,east_northeast,,,,,,,,,2.439746e+07,1.875801e+07,3.017744e+07,0.236909,Prophet
7234,2024-07-01,Yasothon,east_northeast,,,,,,,,,5.998031e+07,5.473012e+07,6.559584e+07,0.093623,Prophet
7235,2024-08-01,Yasothon,east_northeast,,,,,,,,,4.026298e+07,3.461346e+07,4.612074e+07,0.145487,Prophet
7236,2024-09-01,Yasothon,east_northeast,,,,,,,,,2.285483e+07,1.697228e+07,2.895343e+07,0.266841,Prophet


In [53]:
forecast_data["date"] = pd.to_datetime(forecast_data["date"])

In [54]:
import datetime

data_need_added = []
model = ["SARIMA", "SARIMAX", "Prophet"]

for index, row in forecast_data.reset_index().iterrows():
    if row["date"] == datetime.datetime(2023, 10, 1):
        data_new = {"date": row["date"], 
                    "province": row["province"],
                    "region": row["region"],
                    "revenue_all_forecast": row["revenue_all"]}
        
        for model_sample in model:
            data = data_new.copy()
            data["model"] = model_sample
            data_need_added.append(data)
        


In [55]:
forecast_data_result = pd.concat([forecast_data, pd.DataFrame(data_need_added)])
forecast_data_result

Unnamed: 0,date,province,region,occupancy_rate,no_tourist_occupied,no_tourist_all,no_tourist_thai,no_tourist_foreign,revenue_all,revenue_thai,revenue_foreign,revenue_all_forecast,revenue_all_forecast_lower,revenue_all_forecast_upper,revenue_all_forecast_change_ratio,model
0,2019-01-01,Bangkok Metropolis,central,0.9337,3334971.0,5959075.0,3534061.0,2425014.0,8.192649e+10,2.974258e+10,5.218391e+10,,,,,
1,2019-01-01,Lop Buri,central,0.6132,51858.0,268664.0,266301.0,2363.0,4.572400e+08,4.518300e+08,5.410000e+06,,,,,
2,2019-01-01,Phra Nakhon Si Ayutthaya,central,0.7337,117052.0,730329.0,561553.0,168776.0,1.438730e+09,1.054250e+09,3.844800e+08,,,,,
3,2019-01-01,Saraburi,central,0.6733,89850.0,207236.0,201400.0,5836.0,3.477900e+08,3.361900e+08,1.160000e+07,,,,,
4,2019-01-01,Chai Nat,central,0.7931,27141.0,79073.0,78514.0,559.0,1.017900e+08,1.009000e+08,8.900000e+05,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
226,2023-10-01,Amnat Charoen,east_northeast,,,,,,,,,23000000.0,,,,SARIMAX
227,2023-10-01,Amnat Charoen,east_northeast,,,,,,,,,23000000.0,,,,Prophet
228,2023-10-01,Ubon Ratchathani,east_northeast,,,,,,,,,421370000.0,,,,SARIMA
229,2023-10-01,Ubon Ratchathani,east_northeast,,,,,,,,,421370000.0,,,,SARIMAX


In [9]:
forecast_data_result.to_csv(FORECAST_DATA_INPUT)