In [1]:
import sgml, sgutil, sgpp, dproc
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import root_mean_squared_error

In [2]:
from proc_v2 import p
from ml_v1 import X_cat, X_num, X_all, target, kf, ss, config
from ml_v1 import xgb_adapter, lgb_adapter, cb_adapter, lr_adapter

In [3]:
sc = sgutil.SGCache('img', 'result', 'model')
df_train = p.fit_transform(['data/train.csv']).assign(
    Calories_Log = lambda x: np.log(x['Calories'] + 1)
)
df_test = p.transform(['data/test.csv'])

# LGB 1

- Duration 대신에 Duration_log를 사용하고 나머지 변수를 넣습니다.

In [26]:
hparams = {
    'model_params': {'n_estimators': 2000, 'colsample_bytree': 0.9, 'num_leaves': 63, 'learning_rate': 0.03},
    'X_num': ['Sex', 'Age', 'Height', 'Weight', 'Duration_log', 'Heart_Rate', 'Body_Temp'], 
    #'validation_fraction': 0.1, 
}
result = sc.cv_result('lgb1', df_train, kf, hparams, config, lgb_adapter, rerun = 0)
np.mean(result['valid_scores']), result['valid_scores']

Fold:   0%|          | 0/4 [00:00<?, ?it/s]

(np.float64(0.06018180399679065),
 [np.float64(0.060033420123306855),
  np.float64(0.06099143888982595),
  np.float64(0.058924739577219026),
  np.float64(0.06077761739681075)])

# LGB 2

In [22]:
hparams = {
    'model_params': {'n_estimators': 2500, 'colsample_bytree': 0.9, 'num_leaves': 7, 'learning_rate': 0.07},
    'X_num': ['Sex', 'Age', 'Height', 'Weight', 'Duration_log', 'Heart_Rate', 'Body_Temp'], 
    #'validation_fraction': 0.1, 
}
result = sc.cv_result('lgb2', df_train, kf, hparams, config, lgb_adapter, rerun = 0)
np.mean(result['valid_scores']), result['valid_scores']

Fold:   0%|          | 0/4 [00:00<?, ?it/s]

(np.float64(0.06020315289786632),
 [np.float64(0.060040232592896405),
  np.float64(0.061106934418696204),
  np.float64(0.05899023386480507),
  np.float64(0.0606752107150676)])

In [122]:
root_mean_squared_error(
    df_train[target],
    sc.read_prds(['lgb2', 'lgb1'], index = df_train.index).dot([0.5, 0.5])
)

0.05982246533502571

# XGB1

In [20]:
hparams = {
    'model_params': {'n_estimators': 2000, 'colsample_bytree': 0.85, 'learning_rate': 0.05, 'max_depth': 4},
    'X_num': ['Sex', 'Age', 'Height', 'Weight', 'Duration_log', 'Heart_Rate', 'Body_Temp',
              'Heart_Rate_div_Weight_sqrt'], 
    #'validation_fraction': 0.1, 
}
result = sc.cv_result('xgb1', df_train, kf, hparams, config, xgb_adapter, rerun = 0, use_gpu = True)
np.mean(result['valid_scores']), result['valid_scores']

Fold:   0%|          | 0/4 [00:00<?, ?it/s]

(np.float32(0.060198575),
 [np.float32(0.05991964),
  np.float32(0.061047498),
  np.float32(0.058998078),
  np.float32(0.060829088)])

# XGB2

In [46]:
hparams = {
    'model_params': {'n_estimators': 5000, 'colsample_bytree': 0.85, 'learning_rate': 0.03, 'max_depth': 3},
    'X_num': ['Sex', 'Age', 'Height', 'Weight', 'Duration_log', 'Heart_Rate', 'Body_Temp',
              'Heart_Rate_div_Weight_sqrt'], 
    #'validation_fraction': 0.1, 
}
result = sc.cv_result('xgb2', df_train, kf, hparams, config, xgb_adapter, rerun = 1, use_gpu = True)
np.mean(result['valid_scores']), result['valid_scores']

Fold:   0%|          | 0/4 [00:00<?, ?it/s]

(np.float32(0.060188733),
 [np.float32(0.059934042),
  np.float32(0.060982246),
  np.float32(0.05904163),
  np.float32(0.06079701)])

# CB1

In [23]:
hparams = {
    'model_params': {'max_depth': 5, 'n_estimators': 2000, 'learning_rate': 0.1}, 
    'X_num': ['Sex', 'Age', 'Height', 'Weight', 'Duration_log', 'Heart_Rate', 'Body_Temp'], 
}

result = sc.cv_result('cb1', df_train, kf, hparams, config, cb_adapter, rerun = 0)
np.mean(result['valid_scores']), result['valid_scores']

Fold:   0%|          | 0/4 [00:00<?, ?it/s]

(np.float64(0.05969808474501868),
 [np.float64(0.059346441384857535),
  np.float64(0.06059677405795864),
  np.float64(0.05848842161718217),
  np.float64(0.06036070192007636)])

# CB2

In [24]:
hparams = {
    'model_params': {'max_depth': 6, 'n_estimators': 2000, 'learning_rate': 0.1}, 
    'X_num': ['Sex', 'Age', 'Height', 'Weight', 'Duration_log', 'Heart_Rate', 'Body_Temp'], 
}

result = sc.cv_result('cb2', df_train, kf, hparams, config, cb_adapter, rerun = 0)
np.mean(result['valid_scores']), result['valid_scores']

Fold:   0%|          | 0/4 [00:00<?, ?it/s]

(np.float64(0.05974115485636679),
 [np.float64(0.05941062477285254),
  np.float64(0.06052264736955689),
  np.float64(0.05859811744844113),
  np.float64(0.0604332298346166)])

# CB3

In [60]:
hparams = {
    'model_params': {'max_depth': 7, 'n_estimators': 2000, 'learning_rate': 0.07, 'colsample_bylevel': 0.8}, 
    'X_num': ['Sex', 'Age', 'Height', 'Weight', 'Duration_log', 'Heart_Rate', 'Body_Temp'], 
}

result = sc.cv_result('cb3', df_train, kf, hparams, config, cb_adapter, use_gpu = 0, rerun = 0)
np.mean(result['valid_scores']), result['valid_scores']

Fold:   0%|          | 0/4 [00:00<?, ?it/s]

(np.float64(0.05961975651945828),
 [np.float64(0.059529832249262615),
  np.float64(0.06031693645479558),
  np.float64(0.058389015324299125),
  np.float64(0.06024324204947579)])

# CB4

In [69]:
hparams = {
    'model_params': {'max_depth': 8, 'n_estimators': 4000, 'learning_rate': 0.02}, 
    'X_num': ['Sex', 'Age', 'Height', 'Weight', 'Duration_log', 'Heart_Rate', 'Body_Temp'], 
}

result = sc.cv_result('cb4', df_train, kf, hparams, config, cb_adapter, use_gpu = 1, rerun = 0)
np.mean(result['valid_scores']), result['valid_scores']

Fold:   0%|          | 0/4 [00:00<?, ?it/s]

(np.float64(0.05960923051318617),
 [np.float64(0.059468639958028674),
  np.float64(0.06038440595457838),
  np.float64(0.058357414369228436),
  np.float64(0.06022646177090921)])

# 

In [4]:
import sgnn
nn_adapter = sgnn.NNAdapter(sgnn.NNRegressor, progress = 100)

2025-05-16 10:50:32.521486: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747392632.532845   12798 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747392632.536381   12798 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1747392632.545401   12798 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1747392632.545412   12798 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1747392632.545414   12798 computation_placer.cc:177] computation placer alr

In [7]:
nn_params = {
    'config':  [
        {'unit': 64, 'activation': 'swish', 'batch_norm': False},
        {'unit': 128, 'activation': 'swish', 'batch_norm': False},
        {'unit': 128, 'activation': 'swish', 'batch_norm': False},
        {'unit': 64, 'activation': 'swish', 'batch_norm': False},
    ]
}

hparams = {
    'model_params': {
        'model_params': nn_params,
        'epochs': 30,
        'optimizer': ('Adam', {'learning_rate': 0.0001}),
        'batch_size': 128, 'shuffle_size': 102400,
        'early_stopping': None, 'reduce_lr_on_plateau': None, 'lr_scheduler': None
    }, 'X_std': ['Age', 'Height', 'Weight', 'Duration', 'Duration_log', 'Heart_Rate', 'Heart_Rate_sqrt_d', 'Body_Temp'], 'X_num': ['Sex']
}

sgml.cv(df_train, ss, hparams, config, nn_adapter)

Fold:   0%|          | 0/1 [00:00<?, ?it/s]

Epoch:   0%|          | 0/30 [00:00<?, ?it/s]

Step:   0%|          | 0/5274 [00:00<?, ?it/s]

{'valid_scores': [np.float32(0.05950092)],
 'valid_prd': id
 24        5.259694
 50        3.451422
 122       5.339328
 127       4.584826
 146       1.720613
             ...   
 749987    4.311688
 749991    5.162928
 749995    5.406791
 749997    5.454646
 749999    4.616103
 Length: 75000, dtype: float32,
 'model_result': [{'history':         loss
   0   0.252236
   1   0.004672
   2   0.004135
   3   0.003979
   4   0.003891
   5   0.003842
   6   0.003803
   7   0.003765
   8   0.003738
   9   0.003716
   10  0.003700
   11  0.003687
   12  0.003675
   13  0.003656
   14  0.003652
   15  0.003644
   16  0.003633
   17  0.003626
   18  0.003620
   19  0.003621
   20  0.003610
   21  0.003610
   22  0.003600
   23  0.003598
   24  0.003594
   25  0.003585
   26  0.003588
   27  0.003583
   28  0.003578
   29  0.003578,
   'variables': array(['std__Age', 'std__Height', 'std__Weight', 'std__Duration',
          'std__Duration_log', 'std__Heart_Rate', 'std__Heart_Rate_sqrt_d',
      

In [None]:
hparams = {
    'model_params': {'max_depth': 8, 'n_estimators': 4000, 'learning_rate': 0.02}, 
    'X_num': ['Sex', 'Age', 'Height', 'Weight', 'Duration_log', 'Heart_Rate', 'Body_Temp'], 
}

result = sc.cv_result('cb4', df_train, kf, hparams, config, cb_adapter, use_gpu = 1, rerun = 0)
np.mean(result['valid_scores']), result['valid_scores']

# Ensemble

In [6]:
df_stk = sc.read_prds(
    ['lgb2', 'lgb1', 'xgb1', 'xgb2', 'cb1', 'cb2', 'cb3', 'cb4'], index = df_train.index
).assign(
    Calories_Log = df_train[target]
)

In [10]:
root_mean_squared_error(
    df_train[target], df_stk.iloc[:, :-1].dot([0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.2, 0.2])
)

np.float64(0.05944463194760985)

In [13]:
hparams = {
    'X_num': df_stk.columns[:-1].tolist()
}
result = sgml.cv(df_stk, kf, hparams, config, lr_adapter)
np.mean(result['valid_scores'])

Fold:   0%|          | 0/4 [00:00<?, ?it/s]

np.float64(0.05943634055101435)

In [8]:
df_stk

Unnamed: 0_level_0,lgb2,lgb1,xgb1,xgb2,cb1,cb2,cb3,cb4,Calories_Log
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,4.990682,4.995060,4.993209,4.996872,4.999116,4.997972,4.999042,4.999959,5.017280
1,3.612145,3.608516,3.610460,3.613190,3.608996,3.609393,3.611230,3.611207,3.555348
2,3.388654,3.392682,3.413141,3.385291,3.404134,3.401662,3.398638,3.395333,3.401197
3,4.931136,4.929353,4.933776,4.935688,4.931032,4.934934,4.934037,4.930398,4.948760
4,4.985825,4.990148,4.999208,4.985096,4.987772,4.990553,4.990468,4.987012,4.990433
...,...,...,...,...,...,...,...,...,...
749995,5.397452,5.390379,5.395508,5.395901,5.402192,5.406062,5.404190,5.405901,5.442418
749996,4.552933,4.551194,4.557213,4.551951,4.560265,4.555336,4.556162,4.556599,4.574711
749997,5.462295,5.457239,5.462897,5.472661,5.458836,5.461267,5.451686,5.462544,5.402678
749998,4.691373,4.668897,4.687819,4.685112,4.679910,4.677949,4.680012,4.676657,4.700480
