In [1]:
import polars as pl
import numpy as np
import os
import gc

USE_GPU = False
if not USE_GPU:
    os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

In [2]:
from prj.config import DATA_DIR
from prj.data.data_loader import DataConfig, DataLoader

data_args = {'zero_fill': True}
config = DataConfig(**data_args)
loader = DataLoader(data_dir=DATA_DIR, config=config)

2024-12-18 14:45:15.395998: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-12-18 14:45:15.396033: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-12-18 14:45:15.397296: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-12-18 14:45:15.404089: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
start_dt, end_dt = 1020, 1529
val_ratio = 0.2
es_ratio = 0.1
early_stopping = True

train_ds, val_ds = loader.load_train_and_val(start_dt=start_dt, end_dt=end_dt, val_ratio=val_ratio)        
es_ds = None
if early_stopping:
    train_dates = train_ds.select('date_id').unique().collect().to_series().sort()
    split_point = int(len(train_dates) * (1 - es_ratio))
    split_date = train_dates[split_point]
    es_ds = train_ds.filter(pl.col('date_id').ge(split_date))
    train_ds = train_ds.filter(pl.col('date_id').lt(split_date))

n_rows_train = train_ds.select(pl.len()).collect().item()
n_dates_train = train_ds.select('date_id').unique().collect().count().item()
n_rows_es = es_ds.select(pl.len()).collect().item() if early_stopping else 0
n_dates_es = es_ds.select('date_id').unique().collect().count().item() if early_stopping else 0
n_rows_val = val_ds.select(pl.len()).collect().item()
n_dates_val = val_ds.select('date_id').unique().collect().count().item()
print(f'N rows train: {n_rows_train}, ES: {n_rows_es}, VAL: {n_rows_val}')
print(f'N dates train: {n_dates_train}, ES: {n_dates_es}, VAL: {n_dates_val}')

N rows train: 13495856, ES: 1457808, VAL: 3692920
N dates train: 366, ES: 41, VAL: 102


In [4]:
from prj.model.keras.mlp import Mlp


model = Mlp(
    input_dim=(len(loader.features),),
    n_layers=3,
    start_units=256,
    units_decay=1.5
)

In [5]:
X_train, y_train, w_train, _ = loader._build_splits(train_ds)
X_es, y_es, w_es, _ = loader._build_splits(es_ds) if early_stopping else (None, None, None, None)
X_train.shape, y_train.shape, w_train.shape, X_es.shape, y_es.shape, w_es.shape

((13495856, 79),
 (13495856,),
 (13495856,),
 (1457808, 79),
 (1457808,),
 (1457808,))

In [7]:
from keras import optimizers as tfko
from keras import metrics as tfkm
from keras import callbacks as tfkc


optimizer = tfko.Adam(learning_rate=1e-4)
loss = 'mse'
metrics = [tfkm.R2Score(), tfkm.MeanSquaredError()]
batch_size = 1024


model.fit(
    X_train, y_train,
    sample_weight=w_train,
    validation_data=(X_es, y_es, w_es) if early_stopping else None,
    batch_size=batch_size,
    epochs=50,
    loss=loss,
    optimizer=optimizer,
    metrics=metrics,
)

2024-12-18 14:45:30.059869: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:274] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2024-12-18 14:45:30.059899: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:129] retrieving CUDA diagnostic information for host: campanas.duckdns.org
2024-12-18 14:45:30.059908: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:136] hostname: campanas.duckdns.org
2024-12-18 14:45:30.060012: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:159] libcuda reported version is: 555.42.6
2024-12-18 14:45:30.060036: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:163] kernel reported version is: 555.42.6
2024-12-18 14:45:30.060043: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:241] kernel version seems to match DSO: 555.42.6


Training with early stopping patience 5
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 79)]              0         
                                                                 
 Dense0 (Dense)              (None, 256)               20480     
                                                                 
 BatchNormalization0 (Batch  (None, 256)               1024      
 Normalization)                                                  
                                                                 
 Activation0 (Activation)    (None, 256)               0         
                                                                 
 Dropout0 (Dropout)          (None, 256)               0         
                                                                 
 Dense1 (Dense)              (None, 170)               43690     
                     

2024-12-18 14:45:31.679712: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 4264690496 exceeds 10% of free system memory.


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Fit complete after 14


In [8]:
del X_train, y_train, w_train, X_es, y_es, w_es
gc.collect()

1692

In [9]:
X_val, y_val, w_val, _ = loader._build_splits(val_ds)
y_hat = model.predict(X_val)
y_val.shape, y_hat.shape


    1/14426 [..............................] - ETA: 32:19

2024-12-18 15:14:37.939146: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 1166962720 exceeds 10% of free system memory.




((3692920,), (3692920,))

In [10]:
from prj.metrics import weighted_mae, weighted_mse, weighted_r2, weighted_rmse

{
    'r2_w': weighted_r2(y_val, y_hat, weights=w_val),
    'mae_w': weighted_mae(y_val, y_hat, weights=w_val),
    'mse_w': weighted_mse(y_val, y_hat, weights=w_val),
    'rmse_w': weighted_rmse(y_val, y_hat, weights=w_val),
}

{'r2_w': 0.007820487022399902,
 'mae_w': 0.50105387,
 'mse_w': 0.60630876,
 'rmse_w': 0.77865833}