Tässä tiedostossa testataan vedenkulutusta prophetin algoritmilla erilaisissa hyperparametreilla. Tulokset viedään tiedostoon.

In [13]:
import pandas as pd
from prophet import Prophet
from prophet.diagnostics import cross_validation

# Luodaan parametrit listoina
start_dates = ['2023-01-01 00:00:00']
end_date = '2024-01-31 23:00:00'
changepoint_prior_scales = [0.001]
interval_widths = [0.5]
horizons = [375]
initials = [0.5]

# Luodaan tyhjä DataFrame tulosten tallentamista varten
results_dfs = []

# Luodaan pääohjelma, joka suorittaa iteraatiot parametreille
for start_date in start_dates:
    for changepoint_prior_scale in changepoint_prior_scales:
        for interval_width in interval_widths:
            for horizon in horizons:
                for initial in initials:
                    # Ladataan data
                    df_water = pd.read_excel('vedenkulutus.xlsx')
                    df_water['ds'] = pd.to_datetime(df_water['ds'])

                    # Suodatetaan data halutun ajanjaksolle
                    df_water_filtered = df_water[(df_water['ds'] >= start_date) & (df_water['ds'] <= end_date)]

                    # Määritellään Prophet-malli ja sovitetaan se dataan
                    m = Prophet(changepoint_prior_scale=changepoint_prior_scale)
                    m.fit(df_water_filtered)

                    # Tehdään ennuste tulevaisuudelle
                    future = m.make_future_dataframe(periods=horizon, freq='h') 
                    water_forecast = Prophet(interval_width=interval_width).fit(df_water_filtered).predict(future)

                    # Suoritetaan ristivalidointi
                    df_cv = cross_validation(m, horizon=f'{horizon} hours', initial=initial)

                    # Lasketaan prosentuaalinen osuus
                    filtered_df = df_cv[df_cv['y'] <= 0.02]
                    count_yhat_less_than_02 = (filtered_df['yhat'] <= 0.02).sum()
                    percentage = (count_yhat_less_than_02 / len(filtered_df)) * 100

                    # Tallennetaan tulokset DataFrameen
                    results_dfs.append(pd.DataFrame({'Start_Date': start_date,
                                                     'End_Date': end_date,
                                                      'Changepoint_Prior_Scale': changepoint_prior_scale,
                                                      'Interval_Width': interval_width,
                                                      'Horizon': horizon,
                                                      'Initial': initial,
                                                      'Percentage': percentage}, index=[0]))

# Yhdistetään kaikki tulokset yhteen DataFrameen
results_df = pd.concat(results_dfs, ignore_index=True)

# Tulostetaan tai tallennetaan DataFrame tulokset
print(results_df)
# results_df.to_excel('prophet_results.xlsx', index=False)  # Tallentaa tulokset Excel-tiedostoon

13:54:33 - cmdstanpy - INFO - Chain [1] start processing
13:54:34 - cmdstanpy - INFO - Chain [1] done processing
13:54:34 - cmdstanpy - INFO - Chain [1] start processing
13:54:35 - cmdstanpy - INFO - Chain [1] done processing
Seasonality has period of 7 days which is larger than initial window. Consider increasing initial.


  0%|          | 0/49 [00:00<?, ?it/s]

13:54:38 - cmdstanpy - INFO - Chain [1] start processing
13:54:38 - cmdstanpy - INFO - Chain [1] done processing
13:54:38 - cmdstanpy - ERROR - Chain [1] error: error during processing Operation not permitted
Optimization terminated abnormally. Falling back to Newton.
13:54:38 - cmdstanpy - INFO - Chain [1] start processing
13:54:38 - cmdstanpy - INFO - Chain [1] done processing
13:54:38 - cmdstanpy - INFO - Chain [1] start processing
13:54:38 - cmdstanpy - INFO - Chain [1] done processing
13:54:38 - cmdstanpy - ERROR - Chain [1] error: error during processing Operation not permitted
Optimization terminated abnormally. Falling back to Newton.
13:54:38 - cmdstanpy - INFO - Chain [1] start processing
13:54:39 - cmdstanpy - INFO - Chain [1] done processing
13:54:39 - cmdstanpy - INFO - Chain [1] start processing
13:54:39 - cmdstanpy - INFO - Chain [1] done processing
13:54:39 - cmdstanpy - INFO - Chain [1] start processing
13:54:39 - cmdstanpy - INFO - Chain [1] done processing
13:54:39 -

            Start_Date             End_Date  Changepoint_Prior_Scale  \
0  2023-01-01 00:00:00  2024-01-31 23:00:00                    0.001   

   Interval_Width  Horizon  Initial  Percentage  
0             0.5      375      0.5   53.577371  


In [14]:
results_df.to_excel('prophet_results.xlsx', index=False)  # Tallentaa tulokset Excel-tiedostoon