In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import holidays

In [2]:
df = pd.read_csv(
    "../dataa/LD2011_2014.txt",
    sep=";",                # AYRAÇ noktalı virgül
    quotechar='"',          # değerler " " içinde
    decimal=",",            # ondalık ayracı virgül
)

df = df.rename(columns={"Unnamed: 0": "Date"})
df.index = pd.to_datetime(df.iloc[:, 0])
df = df.drop(df.columns[0], axis=1)
df = df.sort_index()
df = df.asfreq("15min")
df.head()

Unnamed: 0_level_0,MT_001,MT_002,MT_003,MT_004,MT_005,MT_006,MT_007,MT_008,MT_009,MT_010,...,MT_361,MT_362,MT_363,MT_364,MT_365,MT_366,MT_367,MT_368,MT_369,MT_370
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2011-01-01 00:15:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2011-01-01 00:30:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2011-01-01 00:45:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2011-01-01 01:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2011-01-01 01:15:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [3]:
#kW to kWh
df = df / 4

total_15min= df.sum(axis=1)

hourly_total = total_15min.resample("h").sum()
daily_total = total_15min.resample("D").sum()


In [4]:
total_15min.head()

Date
2011-01-01 00:15:00    17128.278835
2011-01-01 00:30:00    17295.076090
2011-01-01 00:45:00    17341.212643
2011-01-01 01:00:00    17087.620165
2011-01-01 01:15:00    16541.718576
Freq: 15min, dtype: float64

In [5]:
hourly_total.head()

Date
2011-01-01 00:00:00    51764.567568
2011-01-01 01:00:00    66344.627687
2011-01-01 02:00:00    65981.054883
2011-01-01 03:00:00    66576.533566
2011-01-01 04:00:00    64963.552675
Freq: h, dtype: float64

In [6]:
daily_total.head()

Date
2011-01-01    1.713244e+06
2011-01-02    2.785336e+06
2011-01-03    2.812268e+06
2011-01-04    2.866966e+06
2011-01-05    2.880437e+06
Freq: D, dtype: float64

In [7]:
df_feat = hourly_total.to_frame(name="consumption")
df_feat.head()

Unnamed: 0_level_0,consumption
Date,Unnamed: 1_level_1
2011-01-01 00:00:00,51764.567568
2011-01-01 01:00:00,66344.627687
2011-01-01 02:00:00,65981.054883
2011-01-01 03:00:00,66576.533566
2011-01-01 04:00:00,64963.552675


In [8]:
# Time Features
df_feat["hour"] = df_feat.index.hour # Tüketimin hangi saatlerde yükseldiğini öğrenir
df_feat["dayofweek"] = df_feat.index.dayofweek # Hafta içi çalışma yükü
df_feat["month"] = df_feat.index.month #Klima / ısıtma sezonlarını ayırt eder
df_feat["is_weekend"] = df_feat["dayofweek"].isin([5,6]).astype(int) #Hafta sonu mu?


In [9]:
# Cyclieanc Encode
#Saatin döngüsel doğası
df_feat["hour_sin"] = np.sin(2*np.pi*df_feat["hour"]/24)
df_feat["hour_cos"] = np.cos(2*np.pi*df_feat["hour"]/24)
#Yıllık döngü
df_feat["month_sin"] = np.sin(2*np.pi*df_feat["month"]/12)
df_feat["month_cos"] = np.cos(2*np.pi*df_feat["month"]/12)


In [10]:
# Lag Features

df_feat["lag_1h"] = df_feat["consumption"].shift(1)#Bir saat önce ne oldu?
df_feat["lag_24h"] = df_feat["consumption"].shift(24)#Bir gün önce ne oldu?
df_feat["lag_168h"] = df_feat["consumption"].shift(168)#Bir hafta önce ne oldu?


In [11]:
# Rolling Features

df_feat["rolling_mean_24h"] = df_feat["consumption"].rolling(24).mean()#Kısa vadeli davranış(1gün)
df_feat["rolling_mean_7d"] = df_feat["consumption"].rolling(168).mean()#Orta vadeli davranış(1hafta)


In [16]:
# Bu kütüphane otamtaik olarak resmi tatilleri yıllara göre görür
pt_holidays = holidays.country_holidays('PT')

df_feat["is_holiday"] = df_feat.index.map(
    lambda x: 1 if x in pt_holidays else 0
)


In [17]:
df_feat.head()

Unnamed: 0_level_0,consumption,hour,dayofweek,month,is_weekend,hour_sin,hour_cos,month_sin,month_cos,lag_1h,lag_24h,lag_168h,rolling_mean_24h,rolling_mean_7d,is_holiday
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2011-01-01 00:00:00,51764.567568,0,5,1,1,0.0,1.0,0.5,0.866025,,,,,,1
2011-01-01 01:00:00,66344.627687,1,5,1,1,0.258819,0.965926,0.5,0.866025,51764.567568,,,,,1
2011-01-01 02:00:00,65981.054883,2,5,1,1,0.5,0.866025,0.5,0.866025,66344.627687,,,,,1
2011-01-01 03:00:00,66576.533566,3,5,1,1,0.707107,0.707107,0.5,0.866025,65981.054883,,,,,1
2011-01-01 04:00:00,64963.552675,4,5,1,1,0.866025,0.5,0.5,0.866025,66576.533566,,,,,1


In [27]:
# Örneğin Portekizde 8 Aralık Günahsız Doğuş Bayramıdır (Milli Bayram)
df_feat.loc["2012-12-08"].head()

Unnamed: 0_level_0,consumption,hour,dayofweek,month,is_weekend,hour_sin,hour_cos,month_sin,month_cos,lag_1h,lag_24h,lag_168h,rolling_mean_24h,rolling_mean_7d,is_holiday
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2012-12-08 00:00:00,145033.537448,0,5,12,1,0.0,1.0,-2.449294e-16,1.0,178548.257179,132622.629106,140603.60316,187855.228043,181645.067335,1
2012-12-08 01:00:00,122888.06114,1,5,12,1,0.258819,0.965926,-2.449294e-16,1.0,145033.537448,110593.084828,118288.339005,188367.518723,181672.446634,1
2012-12-08 02:00:00,114701.83738,2,5,12,1,0.5,0.866025,-2.449294e-16,1.0,122888.06114,107772.33464,110228.376501,188656.248004,181699.074377,1
2012-12-08 03:00:00,104385.330208,3,5,12,1,0.707107,0.707107,-2.449294e-16,1.0,114701.83738,103993.531862,101567.236315,188672.572935,181715.848746,1
2012-12-08 04:00:00,100056.16168,4,5,12,1,0.866025,0.5,-2.449294e-16,1.0,104385.330208,105882.972006,99217.902311,188429.789171,181720.838385,1


In [28]:
import requests
#Lisbona göre sıcaklık verisini Open-Meteo API den çekiyoruz
url = (
    "https://archive-api.open-meteo.com/v1/archive?"
    "latitude=38.72&longitude=-9.13"
    "&start_date=2011-01-01&end_date=2014-12-31"
    "&hourly=temperature_2m,relativehumidity_2m"
    "&timezone=Europe/Lisbon"
)

response = requests.get(url)
data = response.json()

weather = pd.DataFrame({
    "Date": pd.to_datetime(data["hourly"]["time"]),
    "temperature": data["hourly"]["temperature_2m"],
    "humidity": data["hourly"]["relativehumidity_2m"]
})

weather = weather.set_index("Date")
weather.head()

Unnamed: 0_level_0,temperature,humidity
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2011-01-01 00:00:00,12.4,93
2011-01-01 01:00:00,12.3,94
2011-01-01 02:00:00,12.2,94
2011-01-01 03:00:00,12.2,94
2011-01-01 04:00:00,12.2,94


In [30]:
weather.loc['2012-08-10'].head(15)

Unnamed: 0_level_0,temperature,humidity
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2012-08-10 00:00:00,22.2,70
2012-08-10 01:00:00,21.4,75
2012-08-10 02:00:00,20.8,80
2012-08-10 03:00:00,20.4,82
2012-08-10 04:00:00,20.1,83
2012-08-10 05:00:00,20.0,83
2012-08-10 06:00:00,20.0,83
2012-08-10 07:00:00,20.3,83
2012-08-10 08:00:00,21.9,72
2012-08-10 09:00:00,23.8,61


In [32]:
df_feat = df_feat.join(weather, how="left")


In [33]:
df_feat.head()

Unnamed: 0_level_0,consumption,hour,dayofweek,month,is_weekend,hour_sin,hour_cos,month_sin,month_cos,lag_1h,lag_24h,lag_168h,rolling_mean_24h,rolling_mean_7d,is_holiday,temperature,humidity
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2011-01-01 00:00:00,51764.567568,0,5,1,1,0.0,1.0,0.5,0.866025,,,,,,1,12.4,93.0
2011-01-01 01:00:00,66344.627687,1,5,1,1,0.258819,0.965926,0.5,0.866025,51764.567568,,,,,1,12.3,94.0
2011-01-01 02:00:00,65981.054883,2,5,1,1,0.5,0.866025,0.5,0.866025,66344.627687,,,,,1,12.2,94.0
2011-01-01 03:00:00,66576.533566,3,5,1,1,0.707107,0.707107,0.5,0.866025,65981.054883,,,,,1,12.2,94.0
2011-01-01 04:00:00,64963.552675,4,5,1,1,0.866025,0.5,0.5,0.866025,66576.533566,,,,,1,12.2,94.0


In [34]:
df_feat.isna().sum()

consumption           0
hour                  0
dayofweek             0
month                 0
is_weekend            0
hour_sin              0
hour_cos              0
month_sin             0
month_cos             0
lag_1h                1
lag_24h              24
lag_168h            168
rolling_mean_24h     23
rolling_mean_7d     167
is_holiday            0
temperature           1
humidity              1
dtype: int64