# Notebook for merging cleaned data together

Read data from the csv-files.

In [27]:
import pandas as pd

elec_production = pd.read_csv('.\\data\\electricity-production-FI_2016-2021_hourly.csv', delimiter=';')
elec_consumption = pd.read_csv('.\\data\\electricity-consumption-FI_2016-2021_hourly.csv', delimiter=';')
elec_prices = pd.read_csv('.\\data\\electricity-prices-FI_2016-2021_hourly.csv', delimiter=';')

# make sure that each dataframe has the same number of rows, and columns named Date and Hour
print(elec_production.head(1))
print(elec_production.shape[0])
print(elec_consumption.head(1))
print(elec_consumption.shape[0])
print(elec_prices.head(1))
print(elec_prices.shape[0])


         Date  Hour  PRODUCTION (MWh)
0  2016-01-01     0            7964.0
49685
         Date  Hour  CONSUMP (MWh)
0  2016-01-01     0        10005.0
49685
         Date  Hour PRICE (EUR/MWh)
0  2016-01-01     0           16,39
49685


Merge data together by using datetime information.

In [28]:
from functools import reduce

# here is a list of dataframes that needs to be combined
data_frames = [
    elec_production,
    elec_consumption,
    elec_prices,
]

# merge the frames by usind Date and Hour columns
df = reduce(lambda  left,right: pd.merge(left,right,on=['Date', 'Hour'], how='outer'), data_frames)

print(df.head(1))

         Date  Hour  PRODUCTION (MWh)  CONSUMP (MWh) PRICE (EUR/MWh)
0  2016-01-01     0            7964.0        10005.0           16,39


Calculate weekdays. Each weekday is represented by a integer from 0 to 6, where Monday is 0 and Sunday is 6.

In [29]:
from datetime import datetime

def weekday_from_date(date_str):
    date_time_obj = datetime.strptime(date_str, '%Y-%m-%d')
    return date_time_obj.weekday()

df['Weekday'] = df.apply(lambda row: weekday_from_date(row['Date']), axis=1)

print(df.head(1))

         Date  Hour  PRODUCTION (MWh)  CONSUMP (MWh) PRICE (EUR/MWh)  Weekday
0  2016-01-01     0            7964.0        10005.0           16,39        4


Split date into three separate columns.

In [31]:
def year_from_date(date_str):
    date_time_obj = datetime.strptime(date_str, '%Y-%m-%d')
    return date_time_obj.year

def month_from_date(date_str):
    date_time_obj = datetime.strptime(date_str, '%Y-%m-%d')
    return date_time_obj.month

def day_from_date(date_str):
    date_time_obj = datetime.strptime(date_str, '%Y-%m-%d')
    return date_time_obj.day

if 'Date' in df:
    df['Year'] = df.apply(lambda row: year_from_date(row['Date']), axis=1)
    df['Month'] = df.apply(lambda row: month_from_date(row['Date']), axis=1)
    df['Day'] = df.apply(lambda row: day_from_date(row['Date']), axis=1)

# remove date column that is no longer needed
df = df.drop(['Date'], errors='ignore', axis=1)

# reoder the columns
cols = list(df)
cols.insert(0, cols.pop(cols.index('Weekday')))
cols.insert(0, cols.pop(cols.index('Day')))
cols.insert(0, cols.pop(cols.index('Month')))
cols.insert(0, cols.pop(cols.index('Year')))
df = df.loc[:,cols]

print(df.head(1))

   Year  Month  Day  Weekday  Hour  PRODUCTION (MWh)  CONSUMP (MWh)  \
0  2016      1    1        4     0            7964.0        10005.0   

  PRICE (EUR/MWh)  
0           16,39  


Write the merged data into csv-file.

In [10]:
df.to_csv('data_2016-2021_hourly.csv', sep=';', encoding='utf-8', index=False)
print('Success')

Success
