In [57]:
import os

import numpy as np
import pandas as pd
import polars as pl
import polars.selectors as cs
from pylab import plt, mpl

from sklearn.metrics import r2_score
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import torch

import tensorflow as tf

from keras.layers import Dense
from keras.models import Sequential
from keras.callbacks import EarlyStopping
from scikeras.wrappers import KerasRegressor


os.environ['TF_CPP_MIN_LOG_LEVEL'] = '6'

tf.random.set_seed(100)

plt.style.use('seaborn-v0_8')
mpl.rcParams['savefig.dpi'] = 300
mpl.rcParams['font.family'] = 'serif'
pd.set_option('display.precision', 4)
np.set_printoptions(suppress=True, precision=4)

In [None]:
# !wget http://hilpisch.com/aiif_eikon_eod_data.csv

# url = 'http://hilpisch.com/aiif_eikon_eod_data.csv'
# raw = pd.read_csv(url, index_col=0, parse_dates=True)['EUR=']

--2025-12-12 11:16:01--  http://hilpisch.com/aiif_eikon_eod_data.csv
Resolving hilpisch.com (hilpisch.com)... 64.226.93.207
Connecting to hilpisch.com (hilpisch.com)|64.226.93.207|:80... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://hilpisch.com/aiif_eikon_eod_data.csv [following]
--2025-12-12 11:16:01--  https://hilpisch.com/aiif_eikon_eod_data.csv
Connecting to hilpisch.com (hilpisch.com)|64.226.93.207|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 245030 (239K) [application/octet-stream]
Saving to: ‘aiif_eikon_eod_data.csv’


2025-12-12 11:16:03 (251 KB/s) - ‘aiif_eikon_eod_data.csv’ saved [245030/245030]



# Data (Dane)

In [2]:
raw = pl.read_csv(
    source="./aiif_eikon_eod_data.csv", 
    has_header=True, 
    try_parse_dates=True)[['Date', 'EUR=']]
raw.head()

Date,EUR=
date,f64
2010-01-01,1.4323
2010-01-04,1.4411
2010-01-05,1.4368
2010-01-06,1.4412
2010-01-07,1.4318


In [3]:
rate = raw.sort("Date").group_by_dynamic("Date", every='1mo').agg(pl.col("EUR=").last())
rate.tail()

Date,EUR=
date,f64
2019-09-01,1.0898
2019-10-01,1.115
2019-11-01,1.1015
2019-12-01,1.121
2020-01-01,1.121


In [4]:
px.line(
    x=rate["Date"],
    y=rate["EUR="]
)

In [5]:
arr = rate["EUR="].to_numpy()
arr = arr - arr.mean()
arg = np.linspace(-2, 2, len(arr))
px.scatter(
    x=arg,
    y=arr
)

# Success (Sukces)

In [6]:
def MSE(arr1, arr2):
    return np.mean((arr1 - arr2) ** 2)

## Regression (Regresja)

In [7]:
reg = np.polyfit(arg, arr, deg=5)
reg

array([-0.0191, -0.0147,  0.1099,  0.0601, -0.2083, -0.0328])

In [8]:
p = np.polyval(reg, arg)
MSE(p, arr)

0.003416642295737102

In [9]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=arg, y=arr, name="przykładowe dane"))
fig.add_trace(go.Scatter(x=arg, y=p, name="regresja"))
fig.show()

In [10]:
%%time
for i in range(10, len(arg) + 1, 20):
    _reg = np.polyfit(arg[:i], arr[:i], deg=3)
    res_reg = np.polyval(_reg, arg)
    mse = MSE(arr, res_reg)
    print(f'{i:3d} | MSE={mse}')

 10 | MSE=248628.10681695852
 30 | MSE=731.9382249304387
 50 | MSE=12.236088505004414
 70 | MSE=0.7410590619743298
 90 | MSE=0.005743061730409332
110 | MSE=0.006492800939555585
CPU times: user 1.75 ms, sys: 0 ns, total: 1.75 ms
Wall time: 1.62 ms


In [11]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=arg, y=arr, name="przykładowe dane"))
fig.add_trace(go.Scatter(x=arg, y=res_reg, name="regresja"))
fig.add_trace(go.Scatter(x=arg, y=p, name="p"))
fig.show()

## NN (Sieci neuronowe)

### TensorFlow

In [12]:
model_tf = Sequential()  
model_tf.add(Dense(256, activation='relu', input_dim=1))
model_tf.add(Dense(1, activation='linear'))
model_tf.compile(loss='mse', optimizer='rmsprop')

2025-12-13 20:52:05.192280: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-12-13 20:52:05.194973: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2211] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [13]:
model_tf.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 256)               512       
                                                                 
 dense_1 (Dense)             (None, 1)                 257       
                                                                 
Total params: 769 (3.00 KB)
Trainable params: 769 (3.00 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [14]:
%time 
hist = model_tf.fit(arg, arr, epochs=1500, verbose=False)

CPU times: user 3 μs, sys: 2 μs, total: 5 μs
Wall time: 9.06 μs


In [16]:
res_nn_tf = model_tf.predict(arg).flatten()
MSE(arr, res_nn_tf)





0.002150489032068904

In [18]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=arg, y=arr, name="przykładowe dane"))
fig.add_trace(go.Scatter(x=arg, y=res_reg, name="regresja"))
fig.add_trace(go.Scatter(x=arg, y=res_nn_tf, name="nn tf"))
fig.show()

[0.007991624996066093,
 0.0042166272178292274,
 0.0035649852361530066,
 0.0033198222517967224,
 0.002906306879594922,
 0.002871581818908453,
 0.002796181244775653,
 0.0027708911802619696,
 0.0027815334033221006,
 0.0027876843232661486,
 0.003286968683823943,
 0.00272320955991745,
 0.0030705388635396957,
 0.0030803976114839315,
 0.0026210055220872164,
 0.0026014416944235563,
 0.0025830776430666447,
 0.0026276453863829374,
 0.0034013562835752964,
 0.002652592957019806,
 0.00257584685459733,
 0.002508098492398858,
 0.0026877596974372864,
 0.0033206951338797808,
 0.002631010487675667,
 0.003061337396502495,
 0.0027350380551069975,
 0.0027061065193265676,
 0.0026278668083250523,
 0.002698267810046673,
 0.0024929074570536613,
 0.0027429864276200533,
 0.0026841098442673683,
 0.0026561333797872066,
 0.0026045639533549547,
 0.0029024071991443634,
 0.0029278136789798737,
 0.00244631664827466,
 0.002949655055999756,
 0.002493905834853649,
 0.002969691064208746,
 0.0024755364283919334,
 0.00242034

In [22]:
nn_res = pl.DataFrame(hist.history)
nn_res.tail()

loss
f64
0.001687
0.001851
0.001812
0.001889
0.002092


In [22]:

n = 10
px.line(
    x=np.arange(len(hist.history["loss"]) - n),
    y=hist.history["loss"][n:]
)

### PyTorch

In [23]:
class Model(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = torch.nn.Linear(1, 256)
        self.fc2 = torch.nn.Linear(256, 1)
        self.relu = torch.nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.fc2(x)  # linear output
        return x

model_torch = Model()
arr_tr = torch.tensor(arr, dtype=torch.float32).unsqueeze(1)
arg_tr = torch.tensor(arg, dtype=torch.float32).unsqueeze(1)

criterion = torch.nn.MSELoss()
learning_rate = 1e-2
optimizer = torch.optim.RMSprop(model_torch.parameters(), lr=learning_rate)

for t in range(1500):
    optimizer.zero_grad()
    y_pred = model_torch(arg_tr)

    loss = criterion(y_pred, arr_tr)
    if t % 100 == 0:
        print(t, loss.item())

    loss.backward()
    optimizer.step()

0 0.03892764076590538
100 0.005328556522727013
200 0.0037431996315717697
300 0.0032286029309034348
400 0.0030278293415904045
500 0.0029980719555169344
600 0.003040362847968936
700 0.0032488012220710516
800 0.24657826125621796
900 0.032309096306562424
1000 0.002680244855582714
1100 0.003174303565174341
1200 0.0024613207206130028
1300 0.01050848513841629
1400 0.0027722185477614403


In [25]:
value_torch = model_torch(arg_tr)

fig = go.Figure()
fig.add_trace(go.Scatter(x=arg, y=arr, name="przykładowe dane"))
fig.add_trace(go.Scatter(x=arg, y=res_reg, name="regresja"))
fig.add_trace(go.Scatter(x=arg, y=value_torch.detach().numpy().squeeze(), name="nn torch"))
fig.add_trace(go.Scatter(x=arg, y=res_nn_tf, name="nn tf"))
fig.show()

# Capacity (Pojemność)

In [33]:
reg_dict = {}
values = rate["EUR="].to_numpy()
for d in range(1, 12, 2):
    reg_dict[d] = np.polyfit(arg, values, deg=d)
    p = np.polyval(reg_dict[d], arg)
    mse = MSE(values, p)
    print(f'{d:2d} | MSE={mse}')

 1 | MSE=0.005322474034260403
 3 | MSE=0.004353110724143184
 5 | MSE=0.0034166422957371016
 7 | MSE=0.0027389501772354012
 9 | MSE=0.0014119616263308452
11 | MSE=0.0012651237868752318


In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=arg, y=values,line={"width": 3}, name="dane"))
for d in reg_dict:
    p = np.polyval(reg_dict[d], arg)
    fig.add_trace(go.Scatter(x=arg, y=p, line={"width": 3}, name=f"regresja {d}"))

fig.show()

In [45]:
def create_dnn_model(hl=1, hu=256):
    ''' Function to create Keras DNN model.
    
    Parameters
    ==========
    hl: int
        number of hidden layers
    hu: int
        number of hidden units (per layer)
    '''
    model = Sequential()
    for _ in range(hl):
        model.add(Dense(hu, activation='relu', input_dim=1))
    model.add(Dense(1, activation='linear'))
    model.compile(loss='mse', optimizer='rmsprop')
    return model

In [46]:
model_tf2_dict = {}
for hl in range(1, 6):
    model_tf2_dict[hl] = create_dnn_model(hl, 128)
    model_tf2_dict[hl].summary()

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_26 (Dense)            (None, 128)               256       
                                                                 
 dense_27 (Dense)            (None, 1)                 129       
                                                                 
Total params: 385 (1.50 KB)
Trainable params: 385 (1.50 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_28 (Dense)            (None, 128)               256       
                                                                 
 dense_29 (Dense)            (None, 128)               16512     
                                                                 
 d

In [47]:
%time 
for hl in model_tf2_dict:
    model_tf2_dict[hl].fit(arg, values, epochs=2500, verbose=False)
    p = model_tf2_dict[hl].predict(arg).flatten()
    print(hl, MSE(values, p))

CPU times: user 6 μs, sys: 0 ns, total: 6 μs
Wall time: 10.5 μs
1 0.002464864335125876
2 0.0013641593343381871
3 0.0008857427222490154
4 0.0014922434805919695
5 0.0005861728020879642


In [48]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=arg, y=values,line={"width": 3}, name="dane"))
for hl in model_tf2_dict:
    p = model_tf2_dict[hl].predict(arg).flatten()
    fig.add_trace(go.Scatter(x=arg, y=p, line={"width": 3}, name=f"model_tf2 {hl}"))

fig.show()



In [52]:

arg_tr = torch.tensor(arg, dtype=torch.float32).unsqueeze(1)
arr_tr = torch.tensor(values, dtype=torch.float32).unsqueeze(1)
criterion = torch.nn.MSELoss()
learning_rate = 3e-3

def get_tr_res(hl: int=3, hu: int=256):
    class Model(torch.nn.Module):
        def __init__(self):
            super().__init__()
            self.fc1 = torch.nn.Linear(1, hu)
            self.hidden_layers = {}
            for _hl in range(hl):
                self.hidden_layers[_hl] = torch.nn.Linear(hu, hu)
            self.fc2 = torch.nn.Linear(hu, 1)
            self.relu = torch.nn.ReLU()

        def forward(self, x):
            x = self.relu(self.fc1(x))
            for _hl in range(hl):
                x = self.relu(self.hidden_layers[_hl](x))
            x = self.fc2(x)  # linear output
            return x

    model_torch = Model()

    optimizer = torch.optim.RMSprop(model_torch.parameters(), lr=learning_rate)

    for t in range(1500):

        optimizer.zero_grad()
        y_pred = model_torch(arg_tr)

        loss = criterion(y_pred, arr_tr)
        if t % 100 == 0:
            print(t, loss.item())

        loss.backward()
        optimizer.step()
    return model_torch(arg_tr).detach().numpy().squeeze()

In [53]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=arg, y=values,line={"width": 3}, name="dane"))
for hl in range(0, 6):
    p = get_tr_res(hl=hl)
    fig.add_trace(go.Scatter(x=arg, y=p, line={"width": 3}, name=f"model_torch2 {hl}"))

fig.show()

0 1.7440651655197144
100 0.0027648049872368574
200 0.013381325639784336
300 0.04397282749414444
400 0.03262290731072426
500 0.035369571298360825
600 0.013453006744384766
700 0.006898708641529083
800 0.00934787467122078
900 0.01376061700284481
1000 0.008919800631701946
1100 0.008167962543666363
1200 0.009238382801413536
1300 0.007823734544217587
1400 0.0069000981748104095
0 1.4828107357025146
100 0.0046185762621462345
200 0.0052610221318900585
300 0.004590817727148533
400 0.004566740710288286
500 0.00430466141551733
600 0.004066392779350281
700 0.0039138589054346085
800 0.00381565117277205
900 0.0036315363831818104
1000 0.0034981942735612392
1100 0.0032937501091510057
1200 0.003223394975066185
1300 0.0031189764849841595
1400 0.002897758036851883
0 1.729259967803955
100 0.0025588569696992636
200 0.0013469947734847665
300 0.001804886618629098
400 0.001807402353733778
500 0.0018250781577080488
600 0.0017742931377142668
700 0.0017215980915352702
800 0.0016539643984287977
900 0.0016226245788

# Evaluation (Ocena)

In [55]:
te = int(0.25 * len(arg))
va = int(0.25 * len(arg))
np.random.seed(100)
ind = np.arange(len(arg))
np.random.shuffle(ind)

ind_te = np.sort(ind[:te])
ind_va = np.sort(ind[te:te + va])
ind_tr = np.sort(ind[te + va:])

f_te = arg[ind_te]
f_va = arg[ind_va]
f_tr = arg[ind_tr]

values_te = values[ind_te]
values_va = values[ind_va]
values_tr = values[ind_tr]

In [56]:
reg = {}
mse = {}
for d in range(1, 22, 4):
    reg[d] = np.polyfit(f_tr, values_tr, deg=d)
    p = np.polyval(reg[d], f_tr)
    mse_tr = MSE(values_tr, p)
    p = np.polyval(reg[d], f_va)
    mse_va = MSE(values_va, p)
    mse[d] = (mse_tr, mse_va)
    print(f'{d:2d} | MSE_tr={mse_tr:7.5f} | MSE_va={mse_va:7.5f}')

 1 | MSE_tr=0.00574 | MSE_va=0.00492
 5 | MSE_tr=0.00375 | MSE_va=0.00273
 9 | MSE_tr=0.00132 | MSE_va=0.00243
13 | MSE_tr=0.00094 | MSE_va=0.00183
17 | MSE_tr=0.00060 | MSE_va=0.00153
21 | MSE_tr=0.00046 | MSE_va=0.00837


In [63]:
fig = make_subplots(rows=2, cols=1)
fig.add_trace(go.Scatter(x=arg, y=values,line={"width": 4}, name="training data"), row=1, col=1)
fig.add_trace(go.Scatter(x=arg, y=values,line={"width": 4}, name="validation data"), row=2, col=1)

for d in reg:
    p = np.polyval(reg[d], f_tr)
    fig.add_trace(go.Scatter(x=f_tr, y=p, line={"width": 3}, name=f'deg={d} (tr)'), row=1, col=1)
    p = np.polyval(reg[d], f_va)
    fig.add_trace(go.Scatter(x=f_va, y=p, line={"width": 3}, name=f'deg={d} (va)'), row=2, col=1)
fig.show()

In [60]:
model = create_dnn_model(2, 256)

In [61]:
callbacks = [EarlyStopping(monitor='loss',
                           patience=100,
                          restore_best_weights=True)]

In [62]:
%%time
hist = model.fit(f_tr, values_tr, epochs=3000, verbose=False,
          validation_data=(f_va, values_va),
          callbacks=callbacks)

CPU times: user 8.97 s, sys: 650 ms, total: 9.62 s
Wall time: 8.34 s


In [None]:
fig = make_subplots(rows=2, cols=1)
fig.add_trace(go.Scatter(x=f_tr, y=values_tr, line={"width": 3}, name="training data"), row=1, col=1)
fig.add_trace(go.Scatter(x=f_tr, y=values_tr, line={"width": 3}, name="validation data"), row=2, col=1)

for d in reg:
    p = np.polyval(reg[d], f_tr)
    fig.add_trace(go.Scatter(x=f_tr, y=p, line={"width": 3}, name=f'deg={d} (tr)'), row=1, col=1)
    p = np.polyval(reg[d], f_va)
    fig.add_trace(go.Scatter(x=f_va, y=p, line={"width": 3}, name=f'deg={d} (va)'), row=2, col=1)
fig.show()

In [None]:
fig, ax = plt.subplots(2, 1, sharex=True, figsize=(10, 8))
ax[0].plot(f_tr, l_tr, 'ro', label='training data')
p = model.predict(f_tr)
ax[0].plot(f_tr, p, '--', label=f'DNN (tr)')
ax[0].legend()
ax[1].plot(f_va, l_va, 'go', label='validation data')
p = model.predict(f_va)
ax[1].plot(f_va, p, '--', label=f'DNN (va)')
ax[1].legend();

In [None]:
res = pd.DataFrame(hist.history)

In [None]:
res.iloc[35::25].plot(figsize=(10, 6))
plt.ylabel('MSE')
plt.xlabel('epochs');

In [None]:
p_ols = np.polyval(reg[5], f_te)
p_dnn = model.predict(f_te).flatten()

In [None]:
MSE(l_te, p_ols)

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(f_te, l_te, 'ro', label='test data')
plt.plot(f_te, p_ols, '--', label='OLS prediction')
plt.plot(f_te, p_dnn, '-.', label='DNN prediction');
plt.legend();

# Bias & Variance (Obciązenie i wariancja)

In [None]:
f_tr = f[:20:2]
l_tr = l[:20:2]

In [None]:
f_va = f[1:20:2]
l_va = l[1:20:2]

In [None]:
reg_b = np.polyfit(f_tr, l_tr, deg=1)

In [None]:
reg_v = np.polyfit(f_tr, l_tr, deg=9, full=True)[0]

In [None]:
f_ = np.linspace(f_tr.min(), f_va.max(), 75)

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(f_tr, l_tr, 'ro', label='training data')
plt.plot(f_va, l_va, 'go', label='validation data')
plt.plot(f_, np.polyval(reg_b, f_), '--', label='high bias')
plt.plot(f_, np.polyval(reg_v, f_), '--', label='high variance')
plt.ylim(-0.2)
plt.legend(loc=2);

In [None]:
def evaluate(reg, f, l):
    p = np.polyval(reg, f)
    bias = np.abs(l - p).mean()
    var = p.var()
    msg = f'MSE={MSE(l, p):.4f} | R2={r2_score(l, p):9.4f} | '
    msg += f'bias={bias:.4f} | var={var:.4f}'
    print(msg)

In [None]:
evaluate(reg_b, f_tr, l_tr)

In [None]:
evaluate(reg_b, f_va, l_va)

In [None]:
evaluate(reg_v, f_tr, l_tr)

In [None]:
evaluate(reg_v, f_va, l_va)

# Cross-Validation (Walidacja krzyżowa)

In [None]:
def PolynomialRegression(degree=None, **kwargs):
    return make_pipeline(PolynomialFeatures(degree),
                        LinearRegression(**kwargs))

In [None]:
np.set_printoptions(suppress=True,
        formatter={'float': lambda x: f'{x:12.2f}'})

In [None]:
print('\nCross-validation scores')
print(74 * '=')
for deg in range(0, 10, 1):
    model = PolynomialRegression(deg)
    cvs = cross_val_score(model, f.reshape(-1, 1), l, cv=5)
    print(f'deg={deg} | ' + str(cvs.round(2)))

In [None]:
np.random.seed(100)
tf.random.set_seed(100)

In [None]:
model = KerasRegressor(model=create_dnn_model,
                      verbose=False, epochs=1000,
                      hl=1, hu=36)

In [None]:
%%time 
cross_val_score(model, f.reshape(-1, 1), l, cv=5)

In [None]:
model = KerasRegressor(model=create_dnn_model,
                      verbose=False, epochs=1000,
                      hl=3, hu=256)

In [None]:
%%time 
cross_val_score(model, f.reshape(-1, 1), l, cv=5)