In [None]:
import polars as pl
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import gc
from prj.config import DATA_DIR


BASE_PATH = DATA_DIR / 'train'


partition=5
train_ds = pl.concat([
    pl.read_parquet(BASE_PATH / f'partition_id={i}' / 'part-0.parquet')
    for i in range(partition, partition + 1)
]).sort('date_id', 'time_id', 'symbol_id')
features = [col for col in train_ds.columns if col.startswith('feature_')]
target_feature = 'responder_6'


In [None]:
features = [f'feature_00', 'feature_01', 'feature_02']
numerical_features = features
categorical_features = []
time_cols = ['date_id', 'time_id']

train_ds = train_ds.select(time_cols + ['symbol_id', 'weight'] + features + [target_feature])
train_ds.head(3)

In [None]:
from prj.utils import build_rolling_stats


rolling_stats = build_rolling_stats(train_ds, cols=numerical_features, window=30)
rolling_stats.head(3)

In [None]:
from prj.utils import moving_z_score_norm

train_ds = moving_z_score_norm(train_ds, rolling_stats_df=rolling_stats, cols=numerical_features, clip_bound=None)
train_ds.head(3)

In [None]:
from prj.model.nn.mlp import MLP


model = MLP(
    categorical_features=categorical_features,
    numerical_features=numerical_features,
    target_feature=target_feature,
    numerical_transform=None,
)
# model._build()
# model.summary()

In [None]:
X = train_ds.select(features).to_pandas()
y = train_ds[target_feature].to_numpy()
w = train_ds['weight'].to_numpy()

X_train = X.iloc[:int(0.8 * len(X))]
y_train = y[:int(0.8 * len(y))]
w_train = w[:int(0.8 * len(w))]
X_val = X.iloc[int(0.8 * len(X)):]
y_val = y[int(0.8 * len(y)):]
w_val = w[int(0.8 * len(w)):]

X_train.shape, y_train.shape, w_train.shape, X_val.shape, y_val.shape, w_val.shape

In [None]:
from keras import optimizers as tfko
from prj.model.nn.losses import WeightedZeroMeanR2Loss 
optimizer = tfko.Adam(learning_rate=1e-4)
loss = WeightedZeroMeanR2Loss()
metrics = []

model.fit(
    X_train, y_train,
        sample_weight=w_train,
        validation_data=(X_val, y_val, w_val),
        metrics=metrics,
        optimizer=optimizer, 
        loss=loss, 
        early_stopping_rounds=5,
        epochs=10
    )

In [None]:
model.plot()