Problem statement:

Situation: Energy is produced in different forms and consumed by the regions in different rates.   

Complication: Energy prices are increasing due to many reasons including but not limited to war, logistics and sanctions.   

Question: Is it possible to see what types of energy have been historically produced and consumed by the different regions? Is it possible to predict how the productions and consumption will look like in the future?   

Answer: ???


In [1]:
import pandas as pd
import numpy as np

import seaborn as sns
sns.set_theme(style='darkgrid', palette='crest')
import matplotlib.pyplot as plt


from sklearn.datasets import load_diabetes  # our diabetes dataset
from sklearn.model_selection import (
    train_test_split,
)  # will allow us to helpfully subdivide our data
from sklearn.linear_model import (
    LinearRegression,
)  # a simple model we can play around with

from sktime.datasets import load_airline
from sktime.utils.plotting import plot_series
from sktime.forecasting.naive import NaiveForecaster

from sktime.forecasting.base import ForecastingHorizon


from sktime.forecasting.theta import ThetaForecaster


# pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)

In [2]:
leveranser_flytande = pd.read_table('../data/to-use/leveranser-flytande-bransle-region-ar.csv', sep=',')
elproduction_bransleanvandning = pd.read_table('../data/to-use/elproduction-bransleanvandning-region-produktionssatt-bransletyp-ar.csv', sep=',')
fjarrvarmeproduktion = pd.read_table('../data/to-use/fjarrvarmeproduktion-region-produktionssatt-bransletyp-ar.csv', sep=',')
slutanvandning = pd.read_table('../data/to-use/slutanvandning-region-forbrukarkategori-bransletyp-kategori-energityp-ar.csv', sep=',')
bransleforbrukning = pd.read_table('../data/to-use/branslefarbrukning-far-produktion-per-ar.csv', sep=',')
energidata = pd.read_table('../data/to-use/energidata-per-ar.csv', sep=',')

datasets = [leveranser_flytande, elproduction_bransleanvandning, 
            fjarrvarmeproduktion, slutanvandning, bransleforbrukning, energidata]


## FIll NAs
leveranser_flytande.m3 = leveranser_flytande.m3.fillna(method='ffill')
fjarrvarmeproduktion.MWh = fjarrvarmeproduktion.MWh.interpolate()
# slutanvandning = slutanvandning.fillna('fffill')
energidata.MWh = energidata.MWh.fillna(method='ffill')


# Slice dataset for regions
leveranser_flytande = leveranser_flytande[leveranser_flytande.region.isin(['0160 T�by', '0117 �ster�ker', '0180 Stockholm', '0980 Gotland', '01 Stockholms l�n', '22 V�sternorrlands l�n', '09 Gotlands l�n', '00 Riket', '2581 Pite�', '1480 G�teborg'])].reset_index(drop=True)
elproduction_bransleanvandning = elproduction_bransleanvandning[elproduction_bransleanvandning.region.isin(['0160 T�by', '0117 �ster�ker', '0180 Stockholm', '0980 Gotland', '01 Stockholms l�n', '22 V�sternorrlands l�n', '09 Gotlands l�n', '00 Riket', '2581 Pite�', '1480 G�teborg'])].reset_index(drop=True)
fjarrvarmeproduktion = fjarrvarmeproduktion[fjarrvarmeproduktion.region.isin(['0160 T�by', '0117 �ster�ker', '0180 Stockholm', '0980 Gotland', '01 Stockholms l�n', '22 V�sternorrlands l�n', '09 Gotlands l�n', '00 Riket', '2581 Pite�', '1480 G�teborg'])].reset_index(drop=True)
slutanvandning = slutanvandning[slutanvandning.region.isin(['0160 T�by', '0117 �ster�ker', '0180 Stockholm', '0980 Gotland', '01 Stockholms l�n', '22 V�sternorrlands l�n', '09 Gotlands l�n', '00 Riket', '2581 Pite�', '1480 G�teborg'])].reset_index(drop=True)
energidata = energidata[energidata.region.isin(['0160 T�by', '0117 �ster�ker', '0180 Stockholm', '0980 Gotland', '01 Stockholms l�n', '22 V�sternorrlands l�n', '09 Gotlands l�n', '00 Riket', '2581 Pite�', '1480 G�teborg'])].reset_index(drop=True)


## Group datasets
leveranser_flytande = leveranser_flytande.groupby(['region', 'ar', 'fornybar_bransletyp'], as_index=False).agg(Sum=('m3', np.sum)).sort_values(by=['Sum']).reset_index(drop=True)
elproduction_bransleanvandning_grouped = elproduction_bransleanvandning.groupby(['region', 'ar', 'fornybar_bransletyp', 'fornybar_produktionssatt'], as_index=False).agg(Sum=('MWh', np.sum)).sort_values(by=['Sum']).reset_index(drop=True)
fjarrvarmeproduktion = fjarrvarmeproduktion.groupby(['region', 'ar', 'fornybar_bransletyp', 'fornybar_produktionssatt'], as_index=False).agg(Sum=('MWh', np.sum)).sort_values(by=['Sum']).reset_index(drop=True)
slutanvandning = slutanvandning.groupby(['region', 'ar', 'fornybar_bransletyp', 'forbrukarkategori'], as_index=False).agg(Sum=('MWh', np.sum)).sort_values(by=['Sum']).reset_index(drop=True)
bransleforbrukning = bransleforbrukning.groupby(['ar', 'fornybar_bransletyp', 'produktionsslag'], as_index=False).agg( Sum=('branslefarbrukning', np.sum)).sort_values(by=['Sum']).reset_index(drop=True)
energidata = energidata.groupby(['region', 'ar', 'fornybar_energityp', 'fornybar_kategori'], as_index=False).agg(Sum=('MWh', np.sum)).sort_values(by=['Sum']).reset_index(drop=True)


leveranser_flytande.ar = pd.to_datetime(leveranser_flytande.ar, format='%Y-%m-%d').dt.year
elproduction_bransleanvandning_grouped.ar = pd.to_datetime(elproduction_bransleanvandning_grouped.ar, format='%Y-%m-%d').dt.year
fjarrvarmeproduktion.ar = pd.to_datetime(fjarrvarmeproduktion.ar, format='%Y-%m-%d').dt.year
slutanvandning.ar = pd.to_datetime(slutanvandning.ar, format='%Y-%m-%d').dt.year
bransleforbrukning.ar = pd.to_datetime(bransleforbrukning.ar, format='%Y-%m-%d').dt.year
energidata.ar = pd.to_datetime(energidata.ar, format='%Y-%m-%d').dt.year



  slutanvandning = pd.read_table('../data/to-use/slutanvandning-region-forbrukarkategori-bransletyp-kategori-energityp-ar.csv', sep=',')


In [3]:
# Constants

AR = 'ar'
M3 = 'm3'
MWH = 'MWh'

BRANSLETYP = 'bransletyp'
FARBRUKARKATEGORI = 'farbrukarkategori'
PRODUCTIONSSATT = 'produktionssatt'
KATEGORI = 'kategori'
ENERGITYP = 'energityp'
PRODUCTIONSSLAG = 'produktionsslag'
BRANSLEFORBRUKNING = 'bransleforbukning'

REGION = 'region'

FORNYBAR_BRANSLETYP = 'fornybar_bransletyp'
FORNYBAR_PRODUCTIONSSATT = 'fornybar_produktionssatt'
FORNYBAR_PRODUCTIONSSLAG = 'fornybar_produktionssatt'
FORNYBAR_ENERGITYP = 'fornybar_energityp'
FORNYBAR_KATEGORI = 'fornybar_kategori'


In [4]:
encodeList = [REGION, FORNYBAR_BRANSLETYP, FORNYBAR_PRODUCTIONSSATT, FORNYBAR_PRODUCTIONSSLAG, FORNYBAR_ENERGITYP, FORNYBAR_KATEGORI]

In [5]:
for encode_item in encodeList:
    if encode_item in leveranser_flytande.columns:
        leveranser_flytande[encode_item].replace(leveranser_flytande[encode_item].unique(), range(
            0, len(leveranser_flytande[encode_item].unique())), inplace=True)

In [6]:
# split our data - by loading X and y we get a frame of all our patient attributes (age, sex serum etc) and a seperate frame of outcomes
(
    attributes_train,
    attributes_test,
    progression_train,
    progression_test,
) = train_test_split(leveranser_flytande, leveranser_flytande.Sum, test_size=0.33)

# Instantiate
linear_regression = LinearRegression()

# Fit
linear_regression.fit(attributes_train, progression_train)

# Predict
progression_predict = linear_regression.predict(attributes_test)

In [7]:
type(load_airline())

pandas.core.series.Series

In [8]:
leveranser_flytande

Unnamed: 0,region,ar,fornybar_bransletyp,Sum
0,0,1970,0,0.0
1,1,1970,0,0.0
2,1,1970,0,0.0
3,1,1970,0,0.0
4,2,1970,1,0.0
...,...,...,...,...
553,1,1970,2,1456162.3
554,8,1970,2,3062611.2
555,8,1970,2,3553752.7
556,8,1970,2,5088307.9


In [9]:
leveranser_flytande.reset_index(level=['ar', 'region', 'fornybar_bransletyp'])

KeyError: 'Requested level (ar) does not match index name (None)'

In [None]:
# Get our airline data:
y = leveranser_flytande[['ar','Sum']]
plot_series(y, labels=['fornybar_bransletyp'])

# Instantiate:
forecaster = NaiveForecaster()

# Fit:
forecaster.fit(y)

In [None]:

# An sktime ForecastingHorizon object:
sktime_fh = ForecastingHorizon(
    pd.PeriodIndex(pd.date_range("1961-01", periods=36, freq="M")), is_relative=False
)

# numpy arrays are also valid:
simple_fh = np.arange(1, 37)

print(
    f"The sktime Forecasting Horizon:\n\n {sktime_fh}"
    " \n\n The simple numpy based Forecasting Horizon:"
    " \n\n {simple_fh} \n\n Both work!!"
)

In [None]:
# Predict:
y_pred = forecaster.predict(sktime_fh)

# Check it out:
plot_series(y, y_pred, labels=["y", "y_pred"])

In [None]:
# Instantiate:
forecaster = NaiveForecaster(sp=12)

# Fit:
forecaster.fit(y)

# Predict:
y_pred = forecaster.predict(sktime_fh)

# Check it out:
plot_series(y, y_pred, labels=["y", "y_pred"])

In [None]:

# Instantiate:
forecaster = ThetaForecaster(sp=12)

# Fit:
forecaster.fit(y)

# Predict:
y_pred_theta = forecaster.predict(sktime_fh)

# Check it out:
plot_series(y, y_pred, y_pred_theta, labels=["y", "y_pred_naive", "y_pred_theta"])