In [892]:
# !pip install pytz
# !pip install suntime
# !pip install optuna

In [893]:
import pandas as pd
import numpy as np
import seaborn as sn
from suntime import Sun
import pytz
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, r2_score
from xgboost import XGBRegressor
import optuna
from scipy.stats import zscore
pd.set_option('display.float_format', lambda x: '%.8f' % x)

# Helper Functions

In [894]:
def datetime_index(dataframe):
    dataframe.columns = dataframe.columns.str.replace(' ', '')
    dataframe['Datetime'] = pd.to_datetime(dataframe['DateTime'])
    dataframe = dataframe.set_index(['Datetime'])
    del dataframe['DateTime']
    return dataframe

def fill_na(dataframe):
    dataframe['WWCode'] = dataframe['WWCode'].fillna(0).astype(int)
    return dataframe

def day_night(dataframe):

    coordinates = [40.239, 33.029] # Ankara
    dataframe['datetime_col'] = dataframe.index
    
    dataframe["Date"] = dataframe['datetime_col'].apply(pd.to_datetime).dt.date.apply(str)
    dataframe["Hour"] = dataframe['datetime_col'].apply(pd.to_datetime).dt.hour
    dataframe["Day_of_Week"] = dataframe['datetime_col'].apply(pd.to_datetime).dt.dayofweek
    dataframe["Quarter"] = dataframe['datetime_col'].apply(pd.to_datetime).dt.quarter
    dataframe["Month"] = dataframe['datetime_col'].apply(pd.to_datetime).dt.month
    dataframe["Year"] = dataframe['datetime_col'].apply(pd.to_datetime).dt.year
    dataframe["Day_of_Year"] = dataframe['datetime_col'].apply(pd.to_datetime).dt.dayofyear
    dataframe["Week"] = dataframe['datetime_col'].apply(pd.to_datetime).dt.week
    dataframe["Week_of_Year"] = dataframe['datetime_col'].apply(pd.to_datetime).dt.weekofyear
    
    sun = Sun(coordinates[0], coordinates[1])
    tz =pytz.timezone('Europe/Istanbul')
    dataframe["Day"]= dataframe[["Date","Hour"]].apply(lambda x : sun.get_local_sunrise_time(pd.to_datetime(x["Date"]).
                                                                date(),local_time_zone=tz).
                                         hour <= x["Hour"] <= sun.get_local_sunset_time(pd.to_datetime(x["Date"]).
                                                                date(),local_time_zone=tz).hour ,axis=1)
    dataframe = dataframe.drop(['Date', 'datetime_col'], axis = 1)
    return dataframe

def wwcode_encoder(dataframe):
    for index, row in dataframe.iterrows():
        if row['WWCode'] < 10:
            dataframe.loc[index, 'WWCode'] = 1
        elif row['WWCode'] < 20:
            dataframe.loc[index, 'WWCode'] = 2
        elif row['WWCode'] < 30:
            dataframe.loc[index, 'WWCode'] = 3
        elif row['WWCode'] < 40:
            dataframe.loc[index, 'WWCode'] = 4
        elif row['WWCode'] < 50:
            dataframe.loc[index, 'WWCode'] = 5
        elif row['WWCode'] < 60:
            dataframe.loc[index, 'WWCode'] = 6
        elif row['WWCode'] < 70:
            dataframe.loc[index, 'WWCode'] = 7
        elif row['WWCode'] < 80:
            dataframe.loc[index, 'WWCode'] = 8
        elif row['WWCode'] < 90:
            dataframe.loc[index, 'WWCode'] = 9
        elif row['WWCode'] < 100:
            dataframe.loc[index, 'WWCode'] = 10
    return dataframe
    
    
def season_encoder(dataframe):
    dataframe['Season'] = 0
    for index, row in dataframe.iterrows():
        if row['Month'] < 3 or row['Month'] == 12:
            dataframe.loc[index, 'Season'] = 1
        elif row['Month'] < 6:
            dataframe.loc[index, 'Season'] = 2
        elif row['Month'] < 9:
            dataframe.loc[index, 'Season'] = 3
        elif row['Month'] < 12:
            dataframe.loc[index, 'Season'] = 4
    return dataframe

def fig_plot():
    plt.figure(figsize = (10,15))
    plt.scatter(Y_test, pred_val)
    plt.xlabel('Actual')
    plt.ylabel('Predicted')
    
    
def objective(trial):
    learning_rate = trial.suggest_float("learning_rate", 1e-2, 0.25, log=True)
    reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
    reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
    subsample = trial.suggest_float("subsample", 0.1, 1.0)
    colsample_bytree = trial.suggest_float("colsample_bytree", 0.1, 1.0)
    max_depth = trial.suggest_int("max_depth", 1, 7)
    early_stopping_rounds = trial.suggest_int("early_stopping_rounds", 100, 500)
    n_estimators = trial.suggest_int("n_estimators", 0, 10000)
    
    model = XGBRegressor(
        random_state=50,
        tree_method="gpu_hist",
        gpu_id=0,
        predictor="gpu_predictor",
        n_estimators=n_estimators,
        learning_rate=learning_rate,
        reg_lambda=reg_lambda,
        reg_alpha=reg_alpha,
        subsample=subsample,
        colsample_bytree=colsample_bytree,
        max_depth=max_depth,
    )
    model.fit(
        X_train,
        Y_train,
        early_stopping_rounds=early_stopping_rounds,
        eval_set=[(X_test, Y_test)],
        verbose=1200,
    )
    preds_valid = model.predict(X_test)
    rmse = mean_squared_error(Y_test, preds_valid, squared=False)

    return rmse

# Data Extraction

In [895]:
submission = pd.read_csv('../input/enerjisa-enerji-veri-maratonu/sample_submission.csv')
label = datetime_index(pd.read_csv('../input/enerjisa-enerji-veri-maratonu/generation.csv', delimiter=';', decimal=','))
features = datetime_index(pd.read_csv('../input/enerjisa-enerji-veri-maratonu/temperature.csv', delimiter=';', decimal=','))

In [896]:
x_train  = fill_na(features.loc['2019-01-01':'2021-11-30'])
x_test = day_night(fill_na(features.loc['2021-12-01':'2021-12-31']))
y_train  = label.loc['2019-01-01':'2021-11-30']

In [897]:
df_train = day_night(pd.concat([ x_train, y_train], axis=1))

### Preprocessing

In [898]:
df_train = wwcode_encoder(df_train)
df_train = season_encoder(df_train)

x_test = wwcode_encoder(x_test)
x_test = season_encoder(x_test)

In [899]:
# df_train.drop(['ComfortTemperature'], axis=1, inplace=True)
# x_test.drop(['ComfortTemperature'], axis=1, inplace=True)

In [900]:
# df_train

In [901]:
# df_train = pd.get_dummies(df_train, columns = ['WWCode'])
# x_test = pd.get_dummies(x_test, columns = ['WWCode'])

##### Z-Score

In [902]:
df_train['Zscore_SP'] = zscore(df_train.Generation)
df_train.Generation[(df_train.Zscore_SP >1.66 ) & (df_train.Season == 1)].describe()

### Winter

In [903]:
winter = df_train[df_train.Season == 1]
winter.Generation[winter.Day != 0].describe()

In [904]:
ax = drop_winter.plot(y='Generation', figsize=(12,6))

In [905]:
ax = winter.plot(y='Generation', figsize=(12,6))

### Spring

In [906]:
spring = df_train[df_train.Season == 2]
spring.Generation[spring.Day != 0].describe()

In [907]:
ax = drop_spring.plot(y='Generation', figsize=(12,6))

In [908]:
ax = spring.plot(y='Generation', figsize=(12,6))

### Autumn

In [909]:
autumun = df_train[df_train.Season == 4]
autumun.Generation[autumun.Day != 0].describe()

In [910]:
drop_autumn = autumun[autumun.Generation >= 330]
drop_autumn

In [911]:
ax = autumun.plot(y='Generation', figsize=(12,6))

In [912]:
ax = drop_autumn.plot(y='Generation', figsize=(12,6))

### Summer

In [913]:
summer = df_train[(df_train.Season == 3)]
summer.Generation[summer.Day != 0].describe()

In [914]:
drop_summer = summer[(summer.Generation >= 350) & (summer.Day ==1)]
drop_summer

In [915]:
ax = autumun.plot(y='Generation', figsize=(12,6))

In [916]:
ax = drop_summer.plot(y='Generation', figsize=(12,6))

In [917]:
df_train = df_train[~((df_train.Generation >= 220) & (df_train.Season == 1))]
df_train = df_train[~((df_train.Generation <= 20) & (df_train.Generation >= 270) & (df_train.Season == 2))]
df_train = df_train[~((df_train.Generation <= 60) & (df_train.Generation >= 350) & (df_train.Season == 3))]
df_train = df_train[~((df_train.Generation <= 40) & (df_train.Generation >= 290) & (df_train.Season == 4))]
df_train = df_train.drop(['Zscore_SP'], axis=1)
df_train

##### Dataset no:1

In [918]:
X_train = df_train.loc['2019-01-01':'2021-10-31'].drop(['Generation'], axis=1)
X_test = df_train.loc['2021-10-01':'2021-11-30'].drop(['Generation'], axis=1)
Y_train = df_train.Generation.loc['2019-01-01':'2021-10-31']
Y_test = df_train.Generation.loc['2021-10-01':'2021-11-30']

##### Dataset no:2

In [919]:
# X_train = df_train.loc['2019-01-01':'2021-04-30'].drop(['Generation'], axis=1)
# X_test = df_train.loc['2021-05-01':'2021-11-30'].drop(['Generation'], axis=1)
# Y_train = df_train.Generation.loc['2019-01-01':'2021-04-30']
# Y_test = df_train.Generation.loc['2021-05-01':'2021-11-30']

# Model Training

### Optune

In [920]:
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=20)

In [921]:
optuna_params = study.best_params
optuna_params

In [922]:
model = XGBRegressor(
    random_state=50,
    tree_method="gpu_hist",
    gpu_id=0,
    predictor="gpu_predictor",
    **optuna_params
)
model.fit(
    X_train,
    Y_train,
    early_stopping_rounds=optuna_params['early_stopping_rounds'],
    eval_set=[(X_test, Y_test)],
    verbose=1000,
)

In [923]:
pred_val_optuna = model.predict(X_test)
pred_val_optuna = pd.DataFrame(pred_val_optuna, index=X_test.index, columns=['pred'])

In [924]:
# pred_val_optuna = day_night(pred_val_optuna)
# pred_val_optuna.pred[pred_val_optuna.Day == False] = 0
# pred_val_optuna.pred[pred_val_optuna.pred < 0] = 0
# pred_val_optuna = pred_val_optuna.pred
pred_val_optuna

In [925]:
rmse_optuna = mean_squared_error(Y_test, pred_val_optuna, squared=False)
rmse_optuna

In [926]:
plt.figure(figsize = (10,15))
plt.scatter(Y_test, pred_val_optuna)
plt.xlabel('Actual')
plt.ylabel('Predicted')

In [927]:
pred_val_optuna = day_night(pred_val_optuna)
pred_val_optuna.pred[pred_val_optuna.Day == False] = 0
pred_val_optuna.pred[pred_val_optuna.pred < 0] = 0
pred_val_optuna = pred_val_optuna.pred
pred_val_optuna

In [928]:
rmse_optuna = mean_squared_error(Y_test, pred_val_optuna, squared=False)
rmse_optuna

In [929]:
plt.figure(figsize = (10,15))
plt.scatter(Y_test, pred_val_optuna)
plt.xlabel('Actual')
plt.ylabel('Predicted')

In [930]:
submission['Generation'] = pred_val_optuna.reset_index(drop=True)
submission.to_csv('submission.csv', index=False)