In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

Column Description:
* DateTime: String, defines date and time of sample
* Holiday: String, gives name of holiday if day is a bank holiday
* HolidayFlag: integer, 1 if day is a bank holiday, zero otherwise
* DayOfWeek: integer (0-6), 0 monday, day of week
* WeekOfYear: integer, running week within year of this date
* Day integer: day of the date
* Month integer: month of the date
* Year integer: year of the date
* PeriodOfDay integer: denotes half hour period of day (0-47)
* ForecastWindProduction: the forecasted wind production for this period
* SystemLoadEA: the national load forecast for this period
* SMPEA: the price forecast for this period
* ORKTemperature: the actual temperature measured at Cork airport
* ORKWindspeed: the actual windspeed measured at Cork airport
* CO2Intensity: the actual CO2 intensity in (g/kWh) for the electricity produced
* ActualWindProduction: the actual wind energy production for this period
* SystemLoadEP2: the actual national system load for this period
* SMPEP2: the actual price of this time period, the value to be forecasted

In [None]:
#lots of question marks in the data so used the argument na_values =['?']
#object dtypes except the first 2 column gets converted to float type
df = pd.read_csv("../input/electrity-prices/electricity_prices.csv", na_values=['?'])
df.head()

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
df.shape

In [None]:
df = df.dropna()
df.info()

In [None]:
#we can drop datetime because it is already separated as day week month in the data

df = df.drop(['DateTime'], axis = 1)

In [None]:
#checking correlation of target values with others
df.corr().abs()['SMPEP2'].sort_values(ascending = False)

In [None]:
X = df[['ActualWindProduction', 'SystemLoadEP2', 'SMPEA', 'SystemLoadEA', 'ForecastWindProduction', 
     'DayOfWeek', 'Year', 'ORKWindspeed', 'CO2Intensity', 'PeriodOfDay']]
y = df['SMPEP2']

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from math import sqrt
import keras
from keras.models import Sequential
from keras.layers import Dense
from sklearn.preprocessing import StandardScaler

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1, random_state = 42)
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [None]:
model = keras.Sequential([
        keras.layers.Dense(512, activation="relu", input_shape=[10]),
        keras.layers.Dense(800, activation="relu"),
        keras.layers.Dropout(0.3),
        keras.layers.Dense(1024, activation="relu"),
        keras.layers.Dropout(0.3),
        keras.layers.Dense(1, activation = 'linear'),
        ])
model.summary()

In [None]:
model.compile(loss='mse', optimizer='adam', metrics=['mse','mae'])

In [None]:
early_stopping = keras.callbacks.EarlyStopping(patience = 10, min_delta = 0.001, 
                                               restore_best_weights =True )
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    batch_size=50,
    epochs=500,
    callbacks=[early_stopping],
    verbose=1, 
)

In [None]:
from sklearn.metrics import mean_absolute_error,r2_score
predictions = model.predict(X_test)
print(f"MAE: {mean_absolute_error(y_test, predictions)}")

print(f"R2_score: {r2_score(y_test, predictions)}")


58.4% accuracy Neural network

In [None]:
from xgboost import XGBRegressor
model2 = XGBRegressor(n_estimators = 8000, max_depth=17, eta=0.1, subsample=0.7, colsample_bytree=0.8)
model2.fit(X_train, y_train)
pred = model2.predict(X_test)
r2_score(y_test, pred)

61.37% accuracy xgboost

In [None]:
mean_absolute_error(y_test, pred)

In [None]:
pred[:5]  #xgboost

In [None]:
predictions[:5]  #keras