In [6]:
%matplotlib inline
import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt

import numpy as np
import pandas as pd
pd.set_option('display.max_columns',50)

from xgboost import XGBRegressor
from sklearn.impute import SimpleImputer
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.inspection import permutation_importance
from sklearn.linear_model import Ridge, LinearRegression
from category_encoders import OneHotEncoder, OrdinalEncoder
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, RandomizedSearchCV
from sklearn.model_selection import train_test_split, cross_val_score, validation_curve, GridSearchCV
from sklearn.metrics import roc_curve, plot_roc_curve, mean_absolute_error, mean_squared_error, accuracy_score

import warnings
warnings.filterwarnings("ignore")

# Modelling

In [9]:
def modelden_evvel(filepath):
    
    df = pd.read_csv(filepath,parse_dates=['time'],index_col='time')
 
    df.columns = df.columns.str.replace(' ','_').str.replace('-','_')

    df.index = pd.to_datetime(df.index, utc=True)    
    df['coal']=df['generation_fossil_brown_coal/lignite']+df['generation_fossil_hard_coal']
    
    df.drop(columns=['price_day_ahead',
                     'generation_marine',
                     'total_load_forecast',
                     'generation_geothermal',
                     'generation_fossil_peat',
                     'generation_wind_offshore',
                     'forecast_solar_day_ahead',
                     'generation_fossil_oil_shale',
                     'forecast_wind_onshore_day_ahead',
                     'forecast_wind_offshore_eday_ahead',
                     'generation_fossil_coal_derived_gas',
                     'generation_hydro_pumped_storage_aggregated'],inplace=True)
    
    df = df.drop(pd.Timestamp('2014-12-31 23:00:00+00:00')) 
    
    df = df.sort_index()
    
    condition_winter = (df.index.month>=1)&(df.index.month<=3)
    condtion_spring = (df.index.month>=4)&(df.index.month<=6)
    condition_summer = (df.index.month>=7)&(df.index.month<=9)
    condition_automn = (df.index.month>=10)@(df.index.month<=12)
    
    df['season'] = np.where(condition_winter,'qis',
                            np.where(condtion_spring,'yaz',
                                     np.where(condition_summer,'yay',
                                              np.where(condition_automn,'payiz',np.nan))))
    
    return df

df=modelden_evvel('Data/energy_dataset.csv')[['coal',
    'generation_fossil_oil',
    'generation_fossil_gas',
    'generation_hydro_water_reservoir',
    'generation_hydro_run_of_river_and_poundage',
    'generation_solar',
    'season',
   'generation_wind_onshore','price_actual']]

In [10]:
target='price_actual'

y,X=df[target]*1.73,df.drop(columns=target)

X_train,X_val,y_train,y_val = train_test_split(X,y,test_size=.2,random_state=42)

In [None]:
pipe_rs_xgb = make_pipeline(OrdinalEncoder(),
                            SimpleImputer(),
                            XGBRegressor(random_state=42,
                                         n_jobs=-1))

paramajama = {'simpleimputer__strategy':['mean'],
              'xgbregressor__max_depth':[25],
              'xgbregressor__learning_rate': [0.2],
              'xgbregressor__booster':['dart'],
              'xgbregressor__min_child_weight':[4],
              'xgbregressor__gamma':[0.5],
              'xgbregressor__max_delta_step':[0.0],
              'xgbregressor__subsample':[0.7]}

model_rs_xgbr = RandomizedSearchCV(pipe_rs_xgb,
                                   param_distributions = paramajama,
                                   n_iter=20,
                                   n_jobs=-1)

model_rs_xgbr.fit(X_train,y_train);

print('Training MAE:', mean_absolute_error(y_train,model_rs_xgbr.predict(X_train)))
print('-------------------------------------------------------------------')
print('Validation MAE:', mean_absolute_error(y_val,model_rs_xgbr.predict(X_val)))
print('-------------------------------------------------------------------')
print('R2 score:', model_rs_xgbr.score(X_val,y_val))

In [None]:
from sklearn.metrics import r2_score
r2_score(y_val,model_rs_xgbr.predict(X_val))

In [None]:
import joblib
joblib.dump(model_rs_xgbr,'final_xgb_pipeline.pkl')

## Load Model

In [1]:
import joblib
model=joblib.load('final_xgb_pipeline.pkl')

  from pandas import MultiIndex, Int64Index


In [2]:
model

In [20]:
#test pred
pred_data=pd.DataFrame({'komur':[0.0],
             'neft':[1000.0],
             'gaz':[1000.0],
             'hidro su rezervi':[1000.0],
             'hidrolik-çay':[1000.0],
             'gunes':[2000.0],
             'season':['yay'],
             'kulek':[1000.0]})
model.predict(pred_data)

array([69.5254], dtype=float32)