# Exploratory data analysis

In [2]:
import sklearn
import numpy as np
import pandas as pd

In [42]:
# importing dataframe
df = pd.read_csv("./data/data_2016-2021_hourly.csv", sep=";")

print(df.head(1))

   Year  Month  Day  Weekday  Hour  PRODUCTION (MWh)  CONSUMP (MWh)  \
0  2016      1    1        4     0            7964.0        10005.0   

  PRICE (EUR/MWh)  MAARIANHAMINA CLOUDS (1/8)  MAARIANHAMINA TEMP (C)  ...  \
0           16,39                         5.0                     2.9  ...   

   PORI WIND (m/s)  KUOPIO CLOUDS (1/8)  KUOPIO TEMP (C)  KUOPIO WIND (m/s)  \
0              6.6                  8.0             -4.8                7.2   

   SODANKYLÄ CLOUDS (1/8)  SODANKYLÄ TEMP (C)  SODANKYLÄ WIND (m/s)  \
0                     8.0                -2.6                   5.8   

   TURKU CLOUDS (1/8)  TURKU TEMP (C)  TURKU WIND (m/s)  
0                 7.0            -2.3               7.3  

[1 rows x 38 columns]


Add 3x5 columns for variables (PRODUCTION, CONSUMPTION, PRICE) in such a way that these columns contain values that were detected between time period [day - 11, day - 7].

In [43]:
def create_columns_with_sift(df, column_name, arr_of_sifts):
    for sift in arr_of_sifts:
        new_column_name = column_name + ' (day - ' + str(sift) + ')'
        df[new_column_name] = df[column_name].shift(sift*24)   

# fix the price data format
df['PRICE (EUR/MWh)'] = df.apply(lambda row: float(str(row['PRICE (EUR/MWh)']).replace(',', '.')), axis = 1)
        
sift_days = [11, 10, 9, 8, 7]
create_columns_with_sift(df, 'PRODUCTION (MWh)', sift_days)
create_columns_with_sift(df, 'CONSUMP (MWh)', sift_days)
create_columns_with_sift(df, 'PRICE (EUR/MWh)', sift_days)

print(df.head())

   Year  Month  Day  Weekday  Hour  PRODUCTION (MWh)  CONSUMP (MWh)  \
0  2016      1    1        4     0            7964.0        10005.0   
1  2016      1    1        4     1            7862.0         9722.0   
2  2016      1    1        4     2            7983.0         9599.0   
3  2016      1    1        4     3            7917.0         9524.0   
4  2016      1    1        4     4            7926.0         9601.0   

   PRICE (EUR/MWh)  MAARIANHAMINA CLOUDS (1/8)  MAARIANHAMINA TEMP (C)  ...  \
0            16.39                         5.0                     2.9  ...   
1            16.04                         1.0                     2.9  ...   
2            15.74                         0.0                     2.3  ...   
3            15.57                         0.0                     2.0  ...   
4            15.47                         3.0                     1.9  ...   

   CONSUMP (MWh) (day - 11)  CONSUMP (MWh) (day - 10)  \
0                       NaN              

Remove rows that contain NaN values after sift

In [44]:
df = df.dropna()

print(df.head())

     Year  Month  Day  Weekday  Hour  PRODUCTION (MWh)  CONSUMP (MWh)  \
264  2016      1   12        1     0            9109.0        11562.0   
265  2016      1   12        1     1            8947.0        11362.0   
266  2016      1   12        1     2            8886.0        11271.0   
267  2016      1   12        1     3            8967.0        11346.0   
268  2016      1   12        1     4            9437.0        11834.0   

     PRICE (EUR/MWh)  MAARIANHAMINA CLOUDS (1/8)  MAARIANHAMINA TEMP (C)  ...  \
264            21.11                         8.0                    -3.0  ...   
265            19.81                         8.0                    -4.1  ...   
266            19.10                         8.0                    -4.4  ...   
267            18.41                         8.0                    -4.1  ...   
268            25.90                         8.0                    -4.6  ...   

     CONSUMP (MWh) (day - 11)  CONSUMP (MWh) (day - 10)  \
264            

Write the sifted data into file

In [45]:
df.to_csv('data_2016-2021_hourly_sifted.csv', sep=';', encoding='utf-8', index=False)
print('Success')

Success


## Try to plot some figures that show the correlation between price and different variables

In [46]:
df.corr().style.background_gradient(cmap='Blues')

Unnamed: 0,Year,Month,Day,Weekday,Hour,PRODUCTION (MWh),CONSUMP (MWh),PRICE (EUR/MWh),MAARIANHAMINA CLOUDS (1/8),MAARIANHAMINA TEMP (C),MAARIANHAMINA WIND (m/s),JYVÄSKYLÄ CLOUDS (1/8),JYVÄSKYLÄ TEMP (C),JYVÄSKYLÄ WIND (m/s),KAJAANI CLOUDS (1/8),KAJAANI TEMP (C),KAJAANI WIND (m/s),KUUSAMO CLOUDS (1/8),KUUSAMO TEMP (C),KUUSAMO WIND (m/s),JOENSUU CLOUDS (1/8),JOENSUU TEMP (C),JOENSUU WIND (m/s),OULU CLOUDS (1/8),OULU TEMP (C),OULU WIND (m/s),PORI CLOUDS (1/8),PORI TEMP (C),PORI WIND (m/s),KUOPIO CLOUDS (1/8),KUOPIO TEMP (C),KUOPIO WIND (m/s),SODANKYLÄ CLOUDS (1/8),SODANKYLÄ TEMP (C),SODANKYLÄ WIND (m/s),TURKU CLOUDS (1/8),TURKU TEMP (C),TURKU WIND (m/s),PRODUCTION (MWh) (day - 11),PRODUCTION (MWh) (day - 10),PRODUCTION (MWh) (day - 9),PRODUCTION (MWh) (day - 8),PRODUCTION (MWh) (day - 7),CONSUMP (MWh) (day - 11),CONSUMP (MWh) (day - 10),CONSUMP (MWh) (day - 9),CONSUMP (MWh) (day - 8),CONSUMP (MWh) (day - 7),PRICE (EUR/MWh) (day - 11),PRICE (EUR/MWh) (day - 10),PRICE (EUR/MWh) (day - 9),PRICE (EUR/MWh) (day - 8),PRICE (EUR/MWh) (day - 7)
Year,1.0,-0.124559,-0.009672,-0.000786,0.000398,0.002295,-0.0724,0.179611,-0.020695,0.052627,0.079932,-0.056377,0.042833,0.009605,-0.075973,0.023111,0.191566,-0.018617,0.010783,0.162515,-0.090277,0.025029,0.117859,-0.064198,0.016747,0.043124,-0.063641,0.053335,0.050959,-0.068451,0.039857,0.079294,-0.068346,0.015043,0.097044,-0.046956,0.056638,0.050665,0.00584,0.004408,0.003501,0.002957,0.00296,-0.07476,-0.075894,-0.076636,-0.076826,-0.07618,0.164457,0.164335,0.164564,0.165529,0.167736
Month,-0.124559,1.0,0.001972,0.003997,-0.001816,-0.279799,-0.277161,0.07697,0.064952,0.316967,-0.00092,0.103869,0.272277,-0.075697,0.114821,0.290213,0.018617,0.122703,0.286926,-0.004159,0.116603,0.282764,0.006455,0.11284,0.295537,-0.024569,0.078915,0.276734,-0.041333,0.116668,0.290375,0.026476,0.107168,0.265991,-0.025943,0.073521,0.284322,0.021116,-0.339485,-0.333562,-0.327824,-0.322842,-0.319226,-0.325292,-0.319559,-0.314591,-0.310921,-0.308945,0.100388,0.099305,0.097758,0.094713,0.091754
Day,-0.009672,0.001972,1.0,0.001462,-0.002036,-0.015899,-0.042023,0.002134,-0.029047,0.039795,-0.031904,-0.01274,0.031463,-0.012809,-0.000321,0.025122,-0.01212,0.000618,0.019486,0.004258,0.012366,0.02028,-0.008699,0.001529,0.025268,0.000171,-0.007897,0.036662,-0.005902,0.010027,0.022944,-0.024562,-0.006067,0.01673,0.003615,-0.016511,0.034596,-0.040783,-0.016201,-0.011203,-0.008474,-0.002436,-0.002553,0.018653,0.016329,0.012141,0.003421,-0.006705,0.040687,0.037765,0.028699,0.023088,0.016055
Weekday,-0.000786,0.003997,0.001462,1.0,-0.001437,-0.129657,-0.135678,-0.209246,-0.006549,0.004996,-0.010998,0.004995,0.010334,-0.009166,0.022668,0.010843,-0.000815,0.022676,0.013919,0.007909,0.022687,0.011282,-0.007967,0.023337,0.008172,0.001587,0.012274,0.007524,-0.011391,0.015406,0.007789,-0.014164,0.017873,0.005252,0.008152,0.000506,0.004185,-0.016208,0.020492,0.078595,0.130937,0.018739,-0.128995,0.009618,0.07174,0.145083,0.044677,-0.134125,0.040526,0.132575,0.192306,0.022603,-0.208864
Hour,0.000398,-0.001816,-0.002036,-0.001437,1.0,0.116789,0.193366,0.141718,-0.019663,0.04205,0.021572,-0.018047,0.053322,0.006288,-0.011563,0.046004,0.009972,-0.019782,0.045003,-0.002009,-0.020162,0.041114,-0.012254,-0.01945,0.033636,-0.007494,-0.038724,0.054323,0.018003,-0.009298,0.049107,0.004834,-0.020428,0.045344,0.010192,-0.032014,0.050067,0.040402,0.104693,0.105834,0.106895,0.108612,0.109259,0.179471,0.18042,0.18132,0.182887,0.184076,0.128555,0.129782,0.131154,0.132851,0.133008
PRODUCTION (MWh),0.002295,-0.279799,-0.015899,-0.129657,0.116789,1.0,0.915881,0.1588,0.243158,-0.738531,0.150549,0.210632,-0.750136,0.151433,0.190353,-0.755909,0.126682,0.142404,-0.74222,0.101921,0.208159,-0.768508,0.16761,0.151606,-0.744514,0.151804,0.207368,-0.751063,0.175672,0.202668,-0.776221,0.169482,0.092475,-0.729581,0.070352,0.222699,-0.75523,0.12269,0.79469,0.801567,0.811864,0.847665,0.880912,0.789068,0.792449,0.796893,0.82937,0.867519,0.067405,0.067899,0.075153,0.118036,0.157343
CONSUMP (MWh),-0.0724,-0.277161,-0.042023,-0.135678,0.193366,0.915881,1.0,0.277251,0.251749,-0.721312,0.098821,0.224556,-0.755815,0.075379,0.194208,-0.766034,-0.005233,0.136928,-0.745883,-0.010581,0.206617,-0.774373,0.065211,0.154262,-0.755364,0.014938,0.211597,-0.752401,0.091419,0.203253,-0.784251,0.051024,0.088556,-0.734964,-0.02398,0.226786,-0.749541,0.075954,0.767582,0.77614,0.785473,0.824334,0.857356,0.812119,0.817922,0.824074,0.864507,0.907728,0.140107,0.14322,0.152152,0.200512,0.243834
PRICE (EUR/MWh),0.179611,0.07697,0.002134,-0.209246,0.141718,0.1588,0.277251,1.0,-0.027784,0.10038,-0.055405,-0.019107,0.070126,-0.021315,-0.080948,0.04721,-0.074609,-0.054904,0.055201,-0.047089,-0.073537,0.049297,-0.081202,-0.089749,0.058767,-0.103912,-0.070098,0.079955,-0.087711,-0.071174,0.050526,-0.133293,-0.070578,0.064386,-0.051535,-0.055478,0.091438,-0.006091,0.027643,0.029172,0.035377,0.086803,0.134335,0.104612,0.106445,0.108214,0.154571,0.211206,0.451061,0.461869,0.470105,0.568318,0.666017
MAARIANHAMINA CLOUDS (1/8),-0.020695,0.064952,-0.029047,-0.006549,-0.019663,0.243158,0.251749,-0.027784,1.0,-0.220095,0.141975,0.368278,-0.275301,-0.06411,0.275445,-0.259927,0.019191,0.224108,-0.265222,-0.021296,0.286267,-0.27251,0.022035,0.28528,-0.255696,-0.014769,0.531789,-0.272115,0.049878,0.305928,-0.279849,0.017277,0.18659,-0.268271,-0.052941,0.591435,-0.269469,0.086076,0.207589,0.211988,0.21095,0.213221,0.219657,0.211477,0.216933,0.215245,0.220226,0.225252,0.00757,0.011183,0.009747,0.008055,0.008502
MAARIANHAMINA TEMP (C),0.052627,0.316967,0.039795,0.004996,0.04205,-0.738531,-0.721312,0.10038,-0.220095,1.0,0.116118,-0.195127,0.920152,-0.003654,-0.202557,0.900355,0.094947,-0.136504,0.870093,0.116248,-0.207348,0.897593,0.025069,-0.157079,0.903462,0.084704,-0.179468,0.952459,0.103065,-0.205895,0.907998,0.016998,-0.089667,0.859557,0.109436,-0.186907,0.962039,0.144962,-0.685666,-0.685808,-0.689039,-0.695421,-0.700126,-0.654556,-0.656445,-0.660828,-0.666445,-0.670156,0.129649,0.129109,0.124773,0.128198,0.127211
