In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import warnings
import itertools
import numpy as np
import random
import statsmodels.api as sm
# prophet by Facebook
from prophet import Prophet
import holidays
# time series analysis
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
import matplotlib.pyplot as plt
import seaborn as sns
warnings.filterwarnings("ignore")
plt.style.use('fivethirtyeight')

In [None]:
def iqr(df,degisken):
    # 1 ve 3. çeyrekllik hesaplama
    q1 = np.quantile(df[degisken], 0.25)
    q3 = np.quantile(df[degisken], 0.75)
    # iki çeyrek arasındaki %50’lik dilim
    diff = q3 - q1
    # alt ve üst eşik değerlerini hesaplarken 1. çeyrekten 1.5 kat az, 3. çeyrekten de 1.5 kat fazla olan değeri sınır olarak belirle
    lower_t = q1 - (1.5 * diff)
    upper_t = q3 + (1.5 * diff)
    #eşik değerlerin altında ve üstünde kalan değerleri döndür
    
    return (df[(df[degisken] < lower_t) | (df[degisken] > upper_t)]),lower_t,upper_t 

In [None]:
df = pd.read_csv('final_data.csv')
df

In [None]:
df['ds'] = pd.to_datetime(df['ds'])
df

In [None]:
def pct_change(y_test, y_pred):
    df_pct = pd.DataFrame()
    df_pct['y1'] = y_test
    df_pct['y2'] = y_pred
    return float(abs(df_pct[['y2', 'y1']].pct_change(axis=1)['y1']).mean())

In [None]:
df_list = []
for i in ['Batman', 'Diyarbakir', 'Mardin', 'Siirt', 'Sirnak', 'Urfa']:
    df_sub = df[df['il'] == i]
    TuketilenEnerjiOutlier,lowerThresh,upperThresh = iqr(df_sub, "y")
    print('------' + i + '------')
    print("Çeyrekler Açıklığı  (IQR) Yöntemine Göre: ") 
    print(f"TUKETILEN_ENERJI_KWH Değişkeninde Bulunan Aykırı Değer Miktarı: {len(TuketilenEnerjiOutlier)}")
    print(f"Lower Thresh Değeri: {lowerThresh} | Upper Thresh Değeri: {upperThresh}") 
    print('-------------------')
    df_sub['y'].mask(df_sub['y'] > upperThresh, df_sub['y'].mean() , inplace=True )
    df_sub['y'].mask(df_sub['y'] < lowerThresh, df_sub['y'].mean() , inplace=True )
    df_list.append(df_sub)
df_all = pd.concat(df_list)

In [None]:
from sklearn.model_selection import ParameterGrid
params_grid = {'seasonality_mode':('multiplicative','additive'),
               'changepoint_prior_scale':[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9],
              'holidays_prior_scale':[0.1,0.2,0.3,0.4,0.5,0.6, 0.7,0.8,0.9],
              'n_changepoints' : [50,100,150,200,250,300,350,400,450,500]}
grid = ParameterGrid(params_grid)
cnt = 0
for p in grid:
    cnt = cnt+1

print('Total Possible Models',cnt)

In [None]:
city_list = list(df_all['il'].unique())
city_list

In [None]:
df_param = pd.DataFrame(columns=['il', 'mape', 'params'])

for i in city_list:
    df_sub = df_all[df_all['il'] == i].reset_index(drop = True)
    df_sub.drop('il', axis = 1 , inplace = True)
    split_date = '2023-01-01'
    df_train = df_sub[df_sub['ds'] < split_date].reset_index(drop = True)
    df_test = df_sub[df_sub['ds'] >= split_date].reset_index(drop = True) 

    model_parameters = pd.DataFrame(columns = ['MAPE','Parameters'])
    for p in grid:
        test = pd.DataFrame()
        print(p)
        random.seed(0)
        model =Prophet(changepoint_prior_scale = p['changepoint_prior_scale'],
                         holidays_prior_scale = p['holidays_prior_scale'],
                         n_changepoints = p['n_changepoints'],
                         seasonality_mode = p['seasonality_mode'],
                         weekly_seasonality=True,
                         daily_seasonality = True,
                         yearly_seasonality = True,
                         interval_width=0.95)
        for col in list(df_train.columns)[2:-1]:
            model.add_regressor(col, standardize = False)
        model.add_country_holidays(country_name='TR')
        model.fit(df_train)
        
        test_forecast = model.predict(df_test)
        test_forecast = test_forecast[['ds','yhat']]

        
        test = pd.concat([df_train[['ds','y']].rename(columns = {'y' : 'yhat'}), test_forecast])
        # print(len(df_sub), len(test))
        mape = mean_absolute_percentage_error(df_sub['y'],abs(test['yhat']))
        print('MAPE------------------------------------',mape)
        model_parameters = model_parameters.append({'MAPE':mape,'Parameters':p},ignore_index=True)


    parameters = model_parameters.sort_values(by=['MAPE'])
    parameters = parameters.reset_index(drop=True)  
    mpe = parameters['MAPE'][0]
    params = parameters['Parameters'][0]
    df_param.loc[len(df_param)] = [i, mpe, params]
df_param

In [None]:
df_param

In [None]:
best_params = df_param[['il', 'params']].set_index('il').to_dict()['params']
best_params

In [None]:
import json

In [None]:
with open('prophetCityHourlyParam.txt', 'w') as file:
     file.write(json.dumps(best_params))