- base: https://www.kaggle.com/code/vadimkamaev/postprocessin-ensemble
- select feats by importances and 3 ensembles (no postprocess) 
- start pytorch lightning 
- define pytorch lightning custom data module 
- add test predictions *

# common class

In [1]:
import sys
import optuna
import numpy as np
import pandas as pd
import lightgbm as lgb
import xgboost as xgb
from catboost import CatBoost
from catboost import Pool
from lightgbm.sklearn import LGBMClassifier
sys.path.append('../input/iterativestratification')
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
from sklearn.model_selection import StratifiedKFold, KFold

class TreeModelling:
    def __init__(self, config):
        self.config = config 

    def lgb_train_and_pred(self, x_tr, y_tr, x_val, y_val, test, feats, split):
    
        feat_imp_df = pd.DataFrame(feats, columns = ["feature"])
        model = lgb.train(self.config["lgb_params"], lgb.Dataset(x_tr, y_tr), self.config["lgb_num_round"], valid_sets=[lgb.Dataset(x_val, y_val)], 
                      callbacks = [lgb.early_stopping(stopping_rounds=self.config["lgb_es_round"], verbose=False)]) 
        feat_imp_df["importance"+str(split)] = model.feature_importance()
        valid_pred = model.predict(x_val)
        test_pred = model.predict(test) 
    
        return test_pred, valid_pred, feat_imp_df
    
    def lgbcat_train_and_pred(self, x_tr, y_tr, x_val, y_val, test, feats, split):
    
        feat_imp_df = pd.DataFrame(feats, columns = ["feature"])
        model = LGBMClassifier(**self.config["lgbcat_params"], n_estimators = 1000)
        model.fit(x_tr, y_tr, eval_set=[(x_val, y_val)], callbacks=[lgb.early_stopping(stopping_rounds=self.config["lgb_es_round"], verbose=0), lgb.log_evaluation(period=0)])
        feat_imp_df["importance"+str(split)] = model.feature_importances_
        valid_pred = model.predict_proba(x_val)[:,1]
        test_pred = model.predict_proba(test)[:,1]
    
        return test_pred, valid_pred, feat_imp_df

    def xgb_train_and_pred(self, x_tr, y_tr, x_val, y_val, test, feats, split):

        xgb_eval = xgb.DMatrix(x_val, label = y_val)
        model = xgb.train(self.config["xgb_params"], xgb.DMatrix(x_tr, label = y_tr), self.config["xgb_num_round"], evals = [(xgb_eval, "eval")], 
                          early_stopping_rounds=self.config["xgb_es_round"], verbose_eval = 100)
        feat_imp_df = pd.DataFrame(model.get_score(importance_type="total_gain").items(), columns =["feature", "importance"+str(split)])
        valid_pred = model.predict(xgb_eval, iteration_range=(0, model.best_ntree_limit))
        dtest = xgb.DMatrix(test)
        test_pred = model.predict(dtest, iteration_range=(0, model.best_ntree_limit))
    
        return test_pred, valid_pred, feat_imp_df

    def cb_train_and_pred(self, x_tr, y_tr, x_val, y_val, test, feats, split):

        feat_imp_df = pd.DataFrame(feats, columns = ["feature"])        
        tr_pool = Pool(x_tr, label=y_tr)
        val_pool = Pool(x_val, label=y_val)
        model = CatBoost(self.config["cb_params"])
        model.fit(tr_pool, eval_set=[val_pool], use_best_model=True, early_stopping_rounds = self.config["cb_es_round"], verbose_eval = 100)
        valid_pred = model.predict(val_pool, prediction_type='Probability')[:,1] #
        test_pred = model.predict(test, prediction_type='Probability')[:,1] #
        feat_imp_df["importance"+str(split)] = model.get_feature_importance()
    
        return test_pred, valid_pred, feat_imp_df
        
    def cv_execute(self, model_name):
    
        if model_name == "lgb":
            func = self.lgb_train_and_pred
            feats = self.config["lgb_feats"]
            
        elif model_name == "lgbcat":
            func = self.lgbcat_train_and_pred
            feats = self.config["lgbcat_feats"]

        elif model_name == "xgb":
            func = self.xgb_train_and_pred
            feats = self.config["xgb_feats"]
        
        elif model_name == "cb":
            func = self.cb_train_and_pred
            feats = self.config["cb_feats"]
    
        valid_output = np.zeros(len(self.config["target"]))
        cv = self.config["cv"]
    
        for split, (tr_idx, val_idx) in enumerate(cv.split(self.config["train_data"], self.config["stratify"])): 
            x = self.config["train_data"][feats]
            y = self.config["target"]
            test = self.config["test_data"][feats]
            x_tr, x_val = x.iloc[tr_idx], x.iloc[val_idx]
            y_tr, y_val = y.iloc[tr_idx], y.iloc[val_idx]
            if split == 0:
                test_output, valid_pred, feat_imp_df = func(x_tr, y_tr, x_val, y_val, test, feats, split)
            else:
                tmp_output, valid_pred, tmp_imp_df = func(x_tr, y_tr, x_val, y_val, test, feats, split)
                test_output += tmp_output
                feat_imp_df = pd.merge(feat_imp_df, tmp_imp_df, on = "feature", how = "inner")
            valid_output[val_idx] = valid_pred
    
        test_output /= (split + 1)
        feat_imp_df["sum"] = feat_imp_df.iloc[:,1:].sum(axis=1)
        
        val_score = self.config["metric_function"](self.config["target"], valid_output)
        print(val_score)
    
        return test_output, valid_output, feat_imp_df

    def lgb_objective(self, trial):
        # https://lightgbm.readthedocs.io/en/latest/Parameters.html
        check_params = {
            'num_leaves': trial.suggest_int('num_leaves', 10, 100),
            'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.5),
            'feature_fraction': trial.suggest_float('feature_fraction', 0.1, 1.0),
            'bagging_fraction': trial.suggest_float('bagging_fraction', 0.1, 1.0),
            'min_chil1_samples': trial.suggest_int('min_child_samples', 1, 10),
            'lambda_l2': trial.suggest_float('lambda_l2', 0, 1.0),
            'lambda_l1': trial.suggest_float('lambda_l1', 0, 1.0),
        }
    
        params = self.config["lgb_params"].copy()
        params.update(check_params)
        _, val_output, _ = self.cv_execute("lgb")
        score = self.config["metric_function"](self.config["target"], val_output)
        return score
    

    def lgbcat_objective(self, trial):
        # https://lightgbm.readthedocs.io/en/latest/Parameters.html
        check_params = {
            'num_leaves': trial.suggest_int('num_leaves', 10, 100),
            'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.5),
            'feature_fraction': trial.suggest_float('feature_fraction', 0.5, 1.0),
            'bagging_fraction': trial.suggest_float('bagging_fraction', 0.5, 1.0),
            'min_child_samples': trial.suggest_int('min_child_samples', 1, 10),
            'lambda_l2': trial.suggest_float('lambda_l2', 0, 0.1),
            'lambda_l1': trial.suggest_float('lambda_l1', 0, 0.1),
        }
    
        params = self.config["lgbcat_params"].copy()
        params.update(check_params)
        _, val_output, _ = self.cv_execute("lgbcat")
        score = self.config["metric_function"](self.config["target"], val_output)
        return score

    
    def xgb_objective(self, trial):
        # https://xgboost.readthedocs.io/en/stable/parameter.html
        check_params = {
            'gamma': trial.suggest_float('gamma', 0, 1),
            'max_depth': trial.suggest_int('max_depth', 1, 10),
            'num_leaves': trial.suggest_int('num_leaves', 10, 100),
            'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.5),
            'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
            'max_delta_step': trial.suggest_int('max_delta_step', 0, 10),
            'subsample': trial.suggest_float('subsample', 0, 1),
            'lambda': trial.suggest_float('lambda', 0, 1),
            'alpha': trial.suggest_float('alpha', 0, 1),
        }
    
        params = self.config["xgb_params"].copy()
        params.update(check_params)
        _, val_output, _ = self.cv_execute("xgb")
        score = self.config["metric_function"](self.config["target"], val_output)
        return score

    
    def cb_objective(self, trial):
        # https://catboost.ai/en/docs/references/training-parameters/
        check_params = {
            'max_depth': trial.suggest_int('max_depth', 1, 10),
            #'num_leaves': trial.suggest_int('num_leaves', 10, 100),
            'reg_lambda': trial.suggest_float('reg_lambda', 0, 1.0),
            'learning_rate': trial.suggest_float('learning_rate', 0.05, 0.5),
            'min_child_samples': trial.suggest_int('min_child_samples', 1, 10),
            'colsample_bylevel': trial.suggest_float('reg_lambda', 0, 1.0),
        }
    
        params = self.config["cb_params"].copy()
        params.update(check_params)
        _, val_output, _ = self.cv_execute("cb")
        score = self.config["metric_function"](self.config["target"], val_output) 
        return score

    
    def param_tuning(self, model_name, option = "minimize"):

        if model_name == "lgb":
            objective = self.lgb_objective
        elif model_name == "lgbcat":
            objective = self.lgbcat_objective
        elif model_name == "xgb":
            objective = self.xgb_objective
        elif model_name == "cb":
            objective = self.cb_objective
            
        study = optuna.create_study(direction=option) 
        study.optimize(objective, n_trials=self.config["optuna_trial_num"])
        trial = study.best_trial
        print('Value: ', trial.value)
        return trial.params



# preprocess

In [2]:
train = pd.read_csv('/kaggle/input/icr-identify-age-related-conditions/train.csv')
test = pd.read_csv('/kaggle/input/icr-identify-age-related-conditions/test.csv')
sample = pd.read_csv('/kaggle/input/icr-identify-age-related-conditions/sample_submission.csv')
greeks = pd.read_csv('/kaggle/input/icr-identify-age-related-conditions/greeks.csv')

def add_feats(cols, tr, te, i, j):
    tr = tr.copy()
    te = te.copy()
    tr[i+"/"+j] = tr[i] / (tr[j] + 1)
    te[i+"/"+j] = te[i] / (te[j] + 1)
    cols.append(i + "/" + j)
    
    tr[i+"-"+j] = tr[i] - tr[j]
    te[i+"-"+j] = te[i] - te[j]
    cols.append(i + "-" + j)
    return tr, te, cols

import itertools
from datetime import datetime
from sklearn.decomposition import PCA

train['EJ'] = train['EJ'].map({'A': 0, 'B': 1})
test['EJ']  = test['EJ'].map({'A': 0, 'B': 1})

# process epsilon
train = pd.merge(train, greeks, on = "Id", how = "inner")
train_stratify = train[["Class", "Beta", "Delta", "Gamma"]] 
train["Epsilon_ordinal"] = train["Epsilon"].map(lambda x: datetime.strptime(x,'%m/%d/%Y').toordinal() if x != "Unknown" else np.nan)

org_features = [n for n in train.columns if n not in ['Class', 'Id', 'Alpha', "Beta", "Gamma", "Delta", "Epsilon"]]
test_times = pd.DataFrame([train.Epsilon_ordinal.max() + 1] * len(test), columns = ["Epsilon_ordinal"])
final_test = pd.concat((test, test_times), axis=1)

# fill missing value
train.fillna(-999, inplace=True)
final_test.fillna(-999, inplace=True)

# make divide feats
divide_cols = []
comb_feats = list(itertools.combinations([i for i in org_features if i!= "Epsilon_ordinal"], 2))
for i in comb_feats:
    train, final_test, divide_cols = add_feats(divide_cols, train, final_test, i[0], i[1])

# add pca columns
pca_feat_num = 15
pca_cols = ["pca"+str(i+1) for i in range(pca_feat_num)]
pca = PCA(n_components=pca_feat_num,random_state=42)
pca_train = pca.fit_transform(train[org_features])
pca_test = pca.transform(final_test[org_features])
pca_train = pd.DataFrame(pca_train, columns=pca_cols)
pca_test = pd.DataFrame(pca_test, columns=pca_cols)
train = pd.concat([train, pca_train],axis=1)
final_test = pd.concat([final_test, pca_test],axis=1)

print(train.shape, final_test.shape)

(617, 3159) (5, 3153)


# config

In [3]:
from sklearn.metrics import log_loss
def balanced_log_loss(y_true, y_pred):
    nc = np.bincount(y_true)
    return log_loss(y_true, y_pred, sample_weight = 1/nc[y_true], eps=1e-15)

lgb_feats = ['DA/DU', 'DH/DU', 'Epsilon_ordinal', 'AB/EE', 'DU-EE', 'AB-CR',
       'AB-FD ', 'pca1', 'CR/EH', 'BC-CC', 'AB-DU', 'BQ/CB', 'BQ/DE',
       'BQ-GH', 'DU-FI', 'DU/FI', 'CC/CD ', 'CU/DU', 'CR/DU', 'AB/DU',
       'DU/EP', 'CR/DV', 'DA-DU', 'AB/CR', 'CR-EH', 'AB/CS', 'DH/DI',
       'BQ-GE', 'BQ-FI', 'BC-DA', 'AB/CU', 'BC/CC', 'BQ-DA', 'AM/DU',
       'CD /DL', 'AB/DN', 'AB/FL', 'BQ-EP', 'BQ/FI', 'EP/FL', 'CR/FR',
       'CD -DL', 'AF/EP', 'AF/DL', 'CC/DU', 'AF/CR', 'DL/DU', 'DU-EP',
       'CD /DE', 'CD /FD ', 'CD /DN', 'AM/BQ', 'DU/FD ', 'BQ/EE', 'BC/DE',
       'AY/DU', 'BQ/GF', 'DL-EB', 'FL/GF', 'CR/FE', 'DL-FL', 'AB/DE',
       'AB-EH', 'CR-DV', 'FI-FR', 'CH/DI', 'DH/DV', 'BQ-FC', 'BQ-DN',
       'BQ/CS', 'EJ/FL', 'DA-FL', 'DI/GF', 'DU/EG', 'EL/GF', 'FL/GE',
       'AF/GE', 'AF/DN', 'DL/EB', 'DL/DY', 'AX/FL', 'BQ-DE', 'AF/CU',
       'BD /CU', 'AX/DU', 'GH-GL', 'DU/EB', 'AR/CC', 'BQ/GE', 'CR/DI']

config = {
    "cv": MultilabelStratifiedKFold(5,shuffle=True,random_state=42),
    "xgb_feats": org_features + pca_cols,
    "lgb_feats": org_features + pca_cols,
    "lgbcat_feats": lgb_feats,
    "cb_feats": org_features + pca_cols,
    "lgb_num_round" : 1000,
    "lgb_es_round" : 20,
    "xgb_num_round" : 1000,
    "xgb_es_round" : 50,
    "cb_es_round" : 40,
    
    "lgb_params" : {
    'objective': 'binary', 
    'metric': 'binary_logloss', 
    'boosting': 'goss',
    'verbose': -1,
    'seed': 42},
    
    "lgbcat_params" : {
    'objective': 'binary', 
    'metric': 'binary_logloss', 
    'boosting': 'goss',
    'verbose': -1,
    'seed': 42,
    'importance_type' : "gain",
    'is_unbalance': True
     },
    
    "xgb_params" : {
    'objective': 'binary:logistic',
    'eval_metric': 'logloss',
    'verbosity': 0,
    'seed': 42,},       
    
    "cb_params" : {
    'loss_function': 'Logloss',
    'num_boost_round': 1000,  
    'random_seed': 42,},
    
    "train_data": train,
    "stratify": train_stratify,
    "test_data": final_test,
    "target": train.Class,
    "metric_function" : balanced_log_loss,
    "optuna_trial_num": 5,
}

# tree model

In [4]:
treemodel = TreeModelling(config)

In [5]:
_, lgb_val_output, lgb_imp_df = treemodel.cv_execute("lgb")
lgb_best_params = treemodel.param_tuning("lgb")
lgb_best_params

[32m[I 2023-08-11 01:54:37,021][0m A new study created in memory with name: no-name-62e49ceb-2e3f-49e8-bde9-0b3f18e224ed[0m


0.2903756860829748


[32m[I 2023-08-11 01:54:38,580][0m Trial 0 finished with value: 0.2903756860829748 and parameters: {'num_leaves': 96, 'learning_rate': 0.061996633417748595, 'feature_fraction': 0.3821113454959134, 'bagging_fraction': 0.9162109592991454, 'min_child_samples': 3, 'lambda_l2': 0.05253297714037808, 'lambda_l1': 0.9244694643526455}. Best is trial 0 with value: 0.2903756860829748.[0m


0.2903756860829748


[32m[I 2023-08-11 01:54:39,967][0m Trial 1 finished with value: 0.2903756860829748 and parameters: {'num_leaves': 47, 'learning_rate': 0.28307218636014503, 'feature_fraction': 0.5827928903645333, 'bagging_fraction': 0.33567062946092086, 'min_child_samples': 1, 'lambda_l2': 0.5370821443011249, 'lambda_l1': 0.5634927774426004}. Best is trial 0 with value: 0.2903756860829748.[0m


0.2903756860829748


[32m[I 2023-08-11 01:54:41,324][0m Trial 2 finished with value: 0.2903756860829748 and parameters: {'num_leaves': 67, 'learning_rate': 0.30886578929291664, 'feature_fraction': 0.5823008079319248, 'bagging_fraction': 0.8467433694450515, 'min_child_samples': 3, 'lambda_l2': 0.8560162626191398, 'lambda_l1': 0.9751179454816934}. Best is trial 0 with value: 0.2903756860829748.[0m


0.2903756860829748


[32m[I 2023-08-11 01:54:42,687][0m Trial 3 finished with value: 0.2903756860829748 and parameters: {'num_leaves': 42, 'learning_rate': 0.21834860383985655, 'feature_fraction': 0.9943034345906194, 'bagging_fraction': 0.8269410048332082, 'min_child_samples': 7, 'lambda_l2': 0.19127147979920445, 'lambda_l1': 0.23622457540483022}. Best is trial 0 with value: 0.2903756860829748.[0m


0.2903756860829748


[32m[I 2023-08-11 01:54:44,022][0m Trial 4 finished with value: 0.2903756860829748 and parameters: {'num_leaves': 62, 'learning_rate': 0.14564986070688293, 'feature_fraction': 0.2389156823433704, 'bagging_fraction': 0.20327163533335174, 'min_child_samples': 8, 'lambda_l2': 0.6019657313435858, 'lambda_l1': 0.058884149502885896}. Best is trial 0 with value: 0.2903756860829748.[0m


0.2903756860829748
Value:  0.2903756860829748


{'num_leaves': 96,
 'learning_rate': 0.061996633417748595,
 'feature_fraction': 0.3821113454959134,
 'bagging_fraction': 0.9162109592991454,
 'min_child_samples': 3,
 'lambda_l2': 0.05253297714037808,
 'lambda_l1': 0.9244694643526455}

In [6]:
_, lgbcat_val_output, lgbcat_imp_df = treemodel.cv_execute("lgbcat")
lgbcat_best_params = treemodel.param_tuning("lgbcat")
lgbcat_best_params



[32m[I 2023-08-11 01:54:45,601][0m A new study created in memory with name: no-name-f55dc064-7c06-49c5-9fb9-e6c23179ba7d[0m


0.1789250597971124


[32m[I 2023-08-11 01:54:47,288][0m Trial 0 finished with value: 0.1789250597971124 and parameters: {'num_leaves': 53, 'learning_rate': 0.40685932242393086, 'feature_fraction': 0.9393203506019769, 'bagging_fraction': 0.6980567468745632, 'min_child_samples': 5, 'lambda_l2': 0.06390388286269853, 'lambda_l1': 0.06900065038942628}. Best is trial 0 with value: 0.1789250597971124.[0m


0.1789250597971124


[32m[I 2023-08-11 01:54:48,828][0m Trial 1 finished with value: 0.1789250597971124 and parameters: {'num_leaves': 95, 'learning_rate': 0.07968645922834551, 'feature_fraction': 0.5211003663337501, 'bagging_fraction': 0.6502246676387438, 'min_child_samples': 2, 'lambda_l2': 0.09185701045102088, 'lambda_l1': 0.008402400318213233}. Best is trial 0 with value: 0.1789250597971124.[0m


0.1789250597971124


[32m[I 2023-08-11 01:54:51,475][0m Trial 2 finished with value: 0.1789250597971124 and parameters: {'num_leaves': 70, 'learning_rate': 0.465290822527012, 'feature_fraction': 0.6872658466906432, 'bagging_fraction': 0.8665510325191972, 'min_child_samples': 10, 'lambda_l2': 0.03273407708576011, 'lambda_l1': 0.0335140375900183}. Best is trial 0 with value: 0.1789250597971124.[0m


0.1789250597971124


[32m[I 2023-08-11 01:54:53,095][0m Trial 3 finished with value: 0.1789250597971124 and parameters: {'num_leaves': 30, 'learning_rate': 0.34888085651931156, 'feature_fraction': 0.5237464487854974, 'bagging_fraction': 0.6357020867739488, 'min_child_samples': 3, 'lambda_l2': 0.04209607201222455, 'lambda_l1': 0.0034092660270257924}. Best is trial 0 with value: 0.1789250597971124.[0m


0.1789250597971124


[32m[I 2023-08-11 01:54:54,750][0m Trial 4 finished with value: 0.1789250597971124 and parameters: {'num_leaves': 31, 'learning_rate': 0.4296562687284909, 'feature_fraction': 0.8035629424620956, 'bagging_fraction': 0.9658795372006814, 'min_child_samples': 4, 'lambda_l2': 0.07636304983784588, 'lambda_l1': 0.09709771828982003}. Best is trial 0 with value: 0.1789250597971124.[0m


0.1789250597971124
Value:  0.1789250597971124


{'num_leaves': 53,
 'learning_rate': 0.40685932242393086,
 'feature_fraction': 0.9393203506019769,
 'bagging_fraction': 0.6980567468745632,
 'min_child_samples': 5,
 'lambda_l2': 0.06390388286269853,
 'lambda_l1': 0.06900065038942628}

In [7]:
_, xgb_val_output, xgb_imp_df = treemodel.cv_execute("xgb")
xgb_best_params = treemodel.param_tuning("xgb")
xgb_best_params

[0]	eval-logloss:0.50062
[100]	eval-logloss:0.07995
[200]	eval-logloss:0.07652
[293]	eval-logloss:0.07732
[0]	eval-logloss:0.52655
[79]	eval-logloss:0.15150
[0]	eval-logloss:0.50637
[74]	eval-logloss:0.16255
[0]	eval-logloss:0.52571
[100]	eval-logloss:0.15744
[145]	eval-logloss:0.15881
[0]	eval-logloss:0.52212
[73]	eval-logloss:0.20678


[32m[I 2023-08-11 01:54:56,971][0m A new study created in memory with name: no-name-df0b343b-66a9-4e57-b656-953154b8fd17[0m


0.31216516835643554
[0]	eval-logloss:0.50062
[100]	eval-logloss:0.07995
[200]	eval-logloss:0.07652
[293]	eval-logloss:0.07732
[0]	eval-logloss:0.52655
[79]	eval-logloss:0.15150
[0]	eval-logloss:0.50637
[74]	eval-logloss:0.16255
[0]	eval-logloss:0.52571
[100]	eval-logloss:0.15744
[144]	eval-logloss:0.15872
[0]	eval-logloss:0.52212
[72]	eval-logloss:0.20594


[32m[I 2023-08-11 01:54:59,134][0m Trial 0 finished with value: 0.31216516835643554 and parameters: {'gamma': 0.97666776962272, 'max_depth': 5, 'num_leaves': 46, 'learning_rate': 0.3135075827995886, 'min_child_weight': 1, 'max_delta_step': 4, 'subsample': 0.38346658972405734, 'lambda': 0.1177813198651324, 'alpha': 0.5643996122524646}. Best is trial 0 with value: 0.31216516835643554.[0m


0.31216516835643554
[0]	eval-logloss:0.50062
[100]	eval-logloss:0.07995
[200]	eval-logloss:0.07652
[294]	eval-logloss:0.07731
[0]	eval-logloss:0.52655
[79]	eval-logloss:0.15150
[0]	eval-logloss:0.50637
[74]	eval-logloss:0.16255
[0]	eval-logloss:0.52571
[100]	eval-logloss:0.15744
[144]	eval-logloss:0.15872
[0]	eval-logloss:0.52212
[73]	eval-logloss:0.20678


[32m[I 2023-08-11 01:55:01,256][0m Trial 1 finished with value: 0.31216516835643554 and parameters: {'gamma': 0.8982202373918696, 'max_depth': 6, 'num_leaves': 38, 'learning_rate': 0.3773058415038481, 'min_child_weight': 2, 'max_delta_step': 8, 'subsample': 0.04557892817398701, 'lambda': 0.9696337884526688, 'alpha': 0.27728271304715235}. Best is trial 0 with value: 0.31216516835643554.[0m


0.31216516835643554
[0]	eval-logloss:0.50062
[100]	eval-logloss:0.07995
[200]	eval-logloss:0.07652
[294]	eval-logloss:0.07731
[0]	eval-logloss:0.52655
[78]	eval-logloss:0.15152
[0]	eval-logloss:0.50637
[74]	eval-logloss:0.16255
[0]	eval-logloss:0.52571
[100]	eval-logloss:0.15744
[144]	eval-logloss:0.15872
[0]	eval-logloss:0.52212
[72]	eval-logloss:0.20594


[32m[I 2023-08-11 01:55:03,414][0m Trial 2 finished with value: 0.31216516835643554 and parameters: {'gamma': 0.043452093194405794, 'max_depth': 8, 'num_leaves': 63, 'learning_rate': 0.4283789116730501, 'min_child_weight': 6, 'max_delta_step': 3, 'subsample': 0.3324587467972243, 'lambda': 0.4590186238761027, 'alpha': 0.04099611387950386}. Best is trial 0 with value: 0.31216516835643554.[0m


0.31216516835643554
[0]	eval-logloss:0.50062
[100]	eval-logloss:0.07995
[200]	eval-logloss:0.07652
[294]	eval-logloss:0.07731
[0]	eval-logloss:0.52655
[78]	eval-logloss:0.15152
[0]	eval-logloss:0.50637
[74]	eval-logloss:0.16255
[0]	eval-logloss:0.52571
[100]	eval-logloss:0.15744
[144]	eval-logloss:0.15872
[0]	eval-logloss:0.52212
[72]	eval-logloss:0.20594


[32m[I 2023-08-11 01:55:05,541][0m Trial 3 finished with value: 0.31216516835643554 and parameters: {'gamma': 0.8064294306606424, 'max_depth': 9, 'num_leaves': 73, 'learning_rate': 0.34609115446647154, 'min_child_weight': 10, 'max_delta_step': 6, 'subsample': 0.06874661359308909, 'lambda': 0.7531315297542616, 'alpha': 0.9552491611718354}. Best is trial 0 with value: 0.31216516835643554.[0m


0.31216516835643554
[0]	eval-logloss:0.50062
[100]	eval-logloss:0.07995
[200]	eval-logloss:0.07652
[294]	eval-logloss:0.07731
[0]	eval-logloss:0.52655
[79]	eval-logloss:0.15150
[0]	eval-logloss:0.50637
[74]	eval-logloss:0.16255
[0]	eval-logloss:0.52571
[100]	eval-logloss:0.15744
[144]	eval-logloss:0.15872
[0]	eval-logloss:0.52212
[73]	eval-logloss:0.20678


[32m[I 2023-08-11 01:55:07,763][0m Trial 4 finished with value: 0.31216516835643554 and parameters: {'gamma': 0.3148347416964572, 'max_depth': 8, 'num_leaves': 46, 'learning_rate': 0.4493936747799238, 'min_child_weight': 7, 'max_delta_step': 8, 'subsample': 0.42932988721107834, 'lambda': 0.6153820085673013, 'alpha': 0.9887168131548862}. Best is trial 0 with value: 0.31216516835643554.[0m


0.31216516835643554
Value:  0.31216516835643554


{'gamma': 0.97666776962272,
 'max_depth': 5,
 'num_leaves': 46,
 'learning_rate': 0.3135075827995886,
 'min_child_weight': 1,
 'max_delta_step': 4,
 'subsample': 0.38346658972405734,
 'lambda': 0.1177813198651324,
 'alpha': 0.5643996122524646}

In [8]:
_, cb_val_output, cb_imp_df = treemodel.cv_execute("cb")
cb_best_params = treemodel.param_tuning("cb")
cb_best_params

Learning rate set to 0.026623
0:	learn: 0.6658456	test: 0.6657945	best: 0.6657945 (0)	total: 69.8ms	remaining: 1m 9s
100:	learn: 0.1123480	test: 0.1832610	best: 0.1832610 (100)	total: 866ms	remaining: 7.71s
200:	learn: 0.0492788	test: 0.1381917	best: 0.1381917 (200)	total: 1.66s	remaining: 6.6s
300:	learn: 0.0253975	test: 0.1166001	best: 0.1166001 (300)	total: 2.46s	remaining: 5.72s
400:	learn: 0.0143491	test: 0.1034707	best: 0.1034707 (400)	total: 3.26s	remaining: 4.88s
500:	learn: 0.0094645	test: 0.0988917	best: 0.0988917 (500)	total: 4.05s	remaining: 4.03s
600:	learn: 0.0068158	test: 0.0956670	best: 0.0952825 (577)	total: 4.83s	remaining: 3.21s
700:	learn: 0.0052091	test: 0.0923589	best: 0.0923589 (700)	total: 5.61s	remaining: 2.39s
800:	learn: 0.0042617	test: 0.0891180	best: 0.0891180 (800)	total: 6.41s	remaining: 1.59s
Stopped by overfitting detector  (40 iterations wait)

bestTest = 0.08903141659
bestIteration = 803

Shrink model to first 804 iterations.
Learning rate set to 0.02

[32m[I 2023-08-11 01:55:28,823][0m A new study created in memory with name: no-name-da484ebd-d767-48d6-9e56-25c8d103f422[0m


300:	learn: 0.0218487	test: 0.1931048	best: 0.1908997 (267)	total: 2.38s	remaining: 5.52s
Stopped by overfitting detector  (40 iterations wait)

bestTest = 0.1908997058
bestIteration = 267

Shrink model to first 268 iterations.
0.3645940857907282
Learning rate set to 0.026623
0:	learn: 0.6658456	test: 0.6657945	best: 0.6657945 (0)	total: 9.36ms	remaining: 9.35s
100:	learn: 0.1123480	test: 0.1832610	best: 0.1832610 (100)	total: 803ms	remaining: 7.15s
200:	learn: 0.0492788	test: 0.1381917	best: 0.1381917 (200)	total: 1.61s	remaining: 6.4s
300:	learn: 0.0253975	test: 0.1166001	best: 0.1166001 (300)	total: 2.4s	remaining: 5.57s
400:	learn: 0.0143491	test: 0.1034707	best: 0.1034707 (400)	total: 3.17s	remaining: 4.74s
500:	learn: 0.0094645	test: 0.0988917	best: 0.0988917 (500)	total: 3.96s	remaining: 3.94s
600:	learn: 0.0068158	test: 0.0956670	best: 0.0952825 (577)	total: 4.74s	remaining: 3.15s
700:	learn: 0.0052091	test: 0.0923589	best: 0.0923589 (700)	total: 5.52s	remaining: 2.35s
800:	lea

[32m[I 2023-08-11 01:55:49,180][0m Trial 0 finished with value: 0.3645940857907282 and parameters: {'max_depth': 6, 'reg_lambda': 0.24299886003053794, 'learning_rate': 0.27044726704536026, 'min_child_samples': 5}. Best is trial 0 with value: 0.3645940857907282.[0m


300:	learn: 0.0218487	test: 0.1931048	best: 0.1908997 (267)	total: 2.42s	remaining: 5.63s
Stopped by overfitting detector  (40 iterations wait)

bestTest = 0.1908997058
bestIteration = 267

Shrink model to first 268 iterations.
0.3645940857907282
Learning rate set to 0.026623
0:	learn: 0.6658456	test: 0.6657945	best: 0.6657945 (0)	total: 8.46ms	remaining: 8.46s
100:	learn: 0.1123480	test: 0.1832610	best: 0.1832610 (100)	total: 821ms	remaining: 7.3s
200:	learn: 0.0492788	test: 0.1381917	best: 0.1381917 (200)	total: 1.61s	remaining: 6.39s
300:	learn: 0.0253975	test: 0.1166001	best: 0.1166001 (300)	total: 2.43s	remaining: 5.63s
400:	learn: 0.0143491	test: 0.1034707	best: 0.1034707 (400)	total: 3.43s	remaining: 5.13s
500:	learn: 0.0094645	test: 0.0988917	best: 0.0988917 (500)	total: 4.43s	remaining: 4.42s
600:	learn: 0.0068158	test: 0.0956670	best: 0.0952825 (577)	total: 5.24s	remaining: 3.48s
700:	learn: 0.0052091	test: 0.0923589	best: 0.0923589 (700)	total: 6.04s	remaining: 2.58s
800:	le

[32m[I 2023-08-11 01:56:10,180][0m Trial 1 finished with value: 0.3645940857907282 and parameters: {'max_depth': 3, 'reg_lambda': 0.06688477207034971, 'learning_rate': 0.4475620495042772, 'min_child_samples': 10}. Best is trial 0 with value: 0.3645940857907282.[0m


300:	learn: 0.0218487	test: 0.1931048	best: 0.1908997 (267)	total: 2.4s	remaining: 5.58s
Stopped by overfitting detector  (40 iterations wait)

bestTest = 0.1908997058
bestIteration = 267

Shrink model to first 268 iterations.
0.3645940857907282
Learning rate set to 0.026623
0:	learn: 0.6658456	test: 0.6657945	best: 0.6657945 (0)	total: 7.95ms	remaining: 7.94s
100:	learn: 0.1123480	test: 0.1832610	best: 0.1832610 (100)	total: 814ms	remaining: 7.25s
200:	learn: 0.0492788	test: 0.1381917	best: 0.1381917 (200)	total: 1.6s	remaining: 6.38s
300:	learn: 0.0253975	test: 0.1166001	best: 0.1166001 (300)	total: 2.4s	remaining: 5.58s
400:	learn: 0.0143491	test: 0.1034707	best: 0.1034707 (400)	total: 3.18s	remaining: 4.75s
500:	learn: 0.0094645	test: 0.0988917	best: 0.0988917 (500)	total: 3.97s	remaining: 3.95s
600:	learn: 0.0068158	test: 0.0956670	best: 0.0952825 (577)	total: 4.75s	remaining: 3.15s
700:	learn: 0.0052091	test: 0.0923589	best: 0.0923589 (700)	total: 5.56s	remaining: 2.37s
800:	lear

[32m[I 2023-08-11 01:56:30,943][0m Trial 2 finished with value: 0.3645940857907282 and parameters: {'max_depth': 4, 'reg_lambda': 0.7761158876769911, 'learning_rate': 0.08997796275022865, 'min_child_samples': 3}. Best is trial 0 with value: 0.3645940857907282.[0m


300:	learn: 0.0218487	test: 0.1931048	best: 0.1908997 (267)	total: 2.36s	remaining: 5.48s
Stopped by overfitting detector  (40 iterations wait)

bestTest = 0.1908997058
bestIteration = 267

Shrink model to first 268 iterations.
0.3645940857907282
Learning rate set to 0.026623
0:	learn: 0.6658456	test: 0.6657945	best: 0.6657945 (0)	total: 8.76ms	remaining: 8.75s
100:	learn: 0.1123480	test: 0.1832610	best: 0.1832610 (100)	total: 814ms	remaining: 7.24s
200:	learn: 0.0492788	test: 0.1381917	best: 0.1381917 (200)	total: 1.63s	remaining: 6.46s
300:	learn: 0.0253975	test: 0.1166001	best: 0.1166001 (300)	total: 2.42s	remaining: 5.62s
400:	learn: 0.0143491	test: 0.1034707	best: 0.1034707 (400)	total: 3.21s	remaining: 4.79s
500:	learn: 0.0094645	test: 0.0988917	best: 0.0988917 (500)	total: 4s	remaining: 3.98s
600:	learn: 0.0068158	test: 0.0956670	best: 0.0952825 (577)	total: 4.84s	remaining: 3.21s
700:	learn: 0.0052091	test: 0.0923589	best: 0.0923589 (700)	total: 5.7s	remaining: 2.43s
800:	learn

[32m[I 2023-08-11 01:56:51,585][0m Trial 3 finished with value: 0.3645940857907282 and parameters: {'max_depth': 4, 'reg_lambda': 0.05547927309217571, 'learning_rate': 0.08044582232836761, 'min_child_samples': 7}. Best is trial 0 with value: 0.3645940857907282.[0m


300:	learn: 0.0218487	test: 0.1931048	best: 0.1908997 (267)	total: 2.4s	remaining: 5.58s
Stopped by overfitting detector  (40 iterations wait)

bestTest = 0.1908997058
bestIteration = 267

Shrink model to first 268 iterations.
0.3645940857907282
Learning rate set to 0.026623
0:	learn: 0.6658456	test: 0.6657945	best: 0.6657945 (0)	total: 8.12ms	remaining: 8.12s
100:	learn: 0.1123480	test: 0.1832610	best: 0.1832610 (100)	total: 812ms	remaining: 7.23s
200:	learn: 0.0492788	test: 0.1381917	best: 0.1381917 (200)	total: 1.63s	remaining: 6.49s
300:	learn: 0.0253975	test: 0.1166001	best: 0.1166001 (300)	total: 2.43s	remaining: 5.64s
400:	learn: 0.0143491	test: 0.1034707	best: 0.1034707 (400)	total: 3.21s	remaining: 4.8s
500:	learn: 0.0094645	test: 0.0988917	best: 0.0988917 (500)	total: 4.17s	remaining: 4.16s
600:	learn: 0.0068158	test: 0.0956670	best: 0.0952825 (577)	total: 5.24s	remaining: 3.48s
700:	learn: 0.0052091	test: 0.0923589	best: 0.0923589 (700)	total: 6.04s	remaining: 2.57s
800:	lea

[32m[I 2023-08-11 01:57:12,567][0m Trial 4 finished with value: 0.3645940857907282 and parameters: {'max_depth': 8, 'reg_lambda': 0.3438242487753146, 'learning_rate': 0.3580798471146119, 'min_child_samples': 1}. Best is trial 0 with value: 0.3645940857907282.[0m


300:	learn: 0.0218487	test: 0.1931048	best: 0.1908997 (267)	total: 2.42s	remaining: 5.62s
Stopped by overfitting detector  (40 iterations wait)

bestTest = 0.1908997058
bestIteration = 267

Shrink model to first 268 iterations.
0.3645940857907282
Value:  0.3645940857907282


{'max_depth': 6,
 'reg_lambda': 0.24299886003053794,
 'learning_rate': 0.27044726704536026,
 'min_child_samples': 5}

# NN

In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
from pytorch_lightning import LightningModule, LightningDataModule, Trainer, seed_everything

class MyDataset(Dataset):
    def __init__(self, df, target, feats, phase = "train"):
        self.phase = phase 
        self.feats = feats
        self.data = df[feats]
        self.target = target
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        if self.phase in ['train', "valid"]:
            return self.data.values[index].astype(float), self.target.values[index].astype(float)
        elif self.phase == 'test':
            return self.data.values[index].astype(float), 0
        
class MyModel(nn.Module):
    def __init__(self, num_columns, last_num):
        super(MyModel, self).__init__()
        self.batch_norm1 = nn.BatchNorm1d(num_columns)
        self.dropout1 = nn.Dropout(0.1)
        self.dense1 = nn.utils.weight_norm(nn.Linear(num_columns, 256))
        self.relu1 = nn.LeakyReLU()
        
        self.batch_norm2 = nn.BatchNorm1d(256)
        self.dropout2 = nn.Dropout(0.1)
        self.dense2 = nn.utils.weight_norm(nn.Linear(256, 256))
        self.relu2 = nn.LeakyReLU()
        
        self.batch_norm3 = nn.BatchNorm1d(256)
        self.dropout3 = nn.Dropout(0.1)
        self.dense3 = nn.utils.weight_norm(nn.Linear(256, last_num))
    
    def forward(self, x):
        x = self.batch_norm1(x)
        x = self.dropout1(x)
        x = self.relu1(self.dense1(x))
        
        x = self.batch_norm2(x)
        x = self.dropout2(x)
        x = self.relu2(self.dense2(x))
        
        x = self.batch_norm3(x)
        x = self.dropout3(x)
        x = self.dense3(x)
        x = torch.squeeze(x)
        x = nn.Sigmoid()(x)
        
        return x
             
class MyModule(LightningModule):
    def __init__(self, lr, num_columns, last_num = 1):
        super(MyModule, self).__init__()
        self.lr = lr
        self.num_columns = num_columns
        self.last_num = last_num
        self.loss_fn = nn.BCELoss()
        self.model = MyModel(num_columns = self.num_columns, last_num = self.last_num)
        self.log_outputs = {}
        self.validation_step_outputs = []
        self.train_step_outputs = []
        
    def forward(self, x):
        x = self.model(x)
        return x
    
    def training_step(self, batch, batch_idx):
        inputs, targets = batch
        preds = self.forward(inputs)        
        loss = self.loss_fn(preds, targets)        
        self.train_step_outputs.append(loss)

        return {"loss": loss}
    
    def validation_step(self, batch, batch_idx):
        inputs, targets = batch
        preds = self.forward(inputs)
        loss = self.loss_fn(preds, targets)
        output = {"targets": targets.detach(), "preds": preds.detach(), "loss": loss.detach()}
        self.validation_step_outputs.append(output)
                
        return output
    
    def predict_step(self, batch, batch_idx):
        inputs, targets = batch
        preds = self.forward(inputs)
                
        return preds
    
    def on_train_start(self) -> None:
        self.print(f"Train start")
        return super().on_train_start()
    
    def on_train_epoch_end(self) -> None:
        train_loss = torch.stack([x for x in self.train_step_outputs]).mean()
        self.log_dict({"loss": train_loss})
        self.log_outputs["loss"] = train_loss
        
        train_loss     = self.log_outputs["loss"]
        valid_loss     = self.log_outputs["valid_loss"]
        self.print(f"loss: {train_loss:.3f} - val_loss: {valid_loss:.3f}")
        
        return super().on_train_epoch_end()
        
    def on_validation_epoch_end(self) -> None:
        valid_loss = torch.stack([x["loss"] for x in self.validation_step_outputs]).mean()
        self.log_dict({"valid_loss": valid_loss})
        self.log_outputs["valid_loss"] = valid_loss
        return super().on_validation_epoch_end()
        
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.model.parameters(), lr = self.lr)
        return optimizer
    
class MyDataModule(LightningModule):
    def __init__(self, train, test, target, feats, fold, batch_size = 32):
        super(MyDataModule, self).__init__()
        self.batch_size = batch_size
        self.train = train
        self.target = target
        self.input_test = test
        self.feats = feats
        self.fold = fold
        self.x_train = None
        self.x_valid = None
        self.y_train = None
        self.y_valid = None
        
    def split_train_valid_df(self):
        skf = StratifiedKFold(n_splits = 5, shuffle = True, random_state = 42)
        for n, (tr_index, val_index) in enumerate(skf.split(self.train, self.target)):
            if n == self.fold: 
                x_train = self.train.loc[tr_index].reset_index(drop=True)
                x_valid = self.train.loc[val_index].reset_index(drop=True)
                y_train = self.target.loc[tr_index].reset_index(drop=True)
                y_valid = self.target.loc[val_index].reset_index(drop=True)
        
        return x_train, x_valid, y_train, y_valid
    
    def setup(self, stage):
        x_tr, x_va, y_tr, y_va = self.split_train_valid_df()
        self.x_train = x_tr
        self.x_valid = x_va
        self.y_train = y_tr 
        self.y_valid = y_va
        self.test = self.input_test
        
    def get_dataframe(self, phase):
        assert phase in ["train", "valid", "test"]
        if phase == "train":
            return self.x_train, self.y_train
        elif phase == "valid":
            return self.x_valid, self.y_valid
        elif phase == "test":
            return self.test, None
        
    def get_ds(self, phase):
        x, y = self.get_dataframe(phase)
        return MyDataset(df = x, target = y, feats = self.feats, phase = phase)
        
    def get_loader(self, phase):
        assert phase in ["train", "valid", "test"]
        ds = self.get_ds(phase = phase)
        return DataLoader(ds, batch_size = self.batch_size, num_workers = 4,
                        shuffle = True if phase == "train" else False,
                        drop_last = True if phase == "train" else False)
        
    def train_dataloader(self):
        return self.get_loader("train")
    
    def val_dataloader(self):
        return self.get_loader("valid")
    
    def predict_dataloader(self):
        return self.get_loader("test")

In [10]:
scalar = MinMaxScaler()
normalize_train = scalar.fit_transform(train[config["lgb_feats"]])
normalize_train = pd.DataFrame(normalize_train, columns = config["lgb_feats"])
normalize_test = scalar.transform(final_test[config["lgb_feats"]])
normalize_test = pd.DataFrame(normalize_test, columns = config["lgb_feats"])
seed_everything(42, workers=True)
    
for fold in range(5):
    callbacks = []
    es_callback = EarlyStopping(monitor='valid_loss', patience=3)
    checkpoint_callback = ModelCheckpoint(monitor="valid_loss", dirpath="./checkpoints-{fold:02d}", filename="model-{fold:02d}", save_top_k=1, mode="min",)
    callbacks.append(es_callback)
    callbacks.append(checkpoint_callback)

    # train and valid
    trainer = Trainer(max_epochs = 5, callbacks=callbacks,)
    model = MyModule(lr = 0.01, num_columns = len(config["lgb_feats"]), last_num = 1).to("cpu", dtype=float)
    data_module = MyDataModule(train = normalize_train, test = normalize_test, feats = config["lgb_feats"], 
                           fold = fold, target = train.Class, batch_size = 8)
    trainer.fit(model, datamodule = data_module)
    preds = trainer.predict(model, datamodule = data_module, ckpt_path = "best")
    print(preds)

Sanity Checking: 0it [00:00, ?it/s]



Training: 0it [00:00, ?it/s]

Train start


Validation: 0it [00:00, ?it/s]

loss: 0.436 - val_loss: 0.402


Validation: 0it [00:00, ?it/s]

loss: 0.347 - val_loss: 0.408


Validation: 0it [00:00, ?it/s]

loss: 0.311 - val_loss: 0.473


Validation: 0it [00:00, ?it/s]

loss: 0.296 - val_loss: 0.459




Predicting: 0it [00:00, ?it/s]

[tensor([0.1359, 0.1359, 0.1359, 0.1359, 0.1359], dtype=torch.float64)]


  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")


Sanity Checking: 0it [00:00, ?it/s]



Training: 0it [00:00, ?it/s]

Train start


Validation: 0it [00:00, ?it/s]

loss: 0.455 - val_loss: 0.394


Validation: 0it [00:00, ?it/s]

loss: 0.363 - val_loss: 0.520


Validation: 0it [00:00, ?it/s]

loss: 0.329 - val_loss: 0.458


Validation: 0it [00:00, ?it/s]

loss: 0.297 - val_loss: 0.435




Predicting: 0it [00:00, ?it/s]

[tensor([0.0835, 0.0835, 0.0835, 0.0835, 0.0835], dtype=torch.float64)]


  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")


Sanity Checking: 0it [00:00, ?it/s]



Training: 0it [00:00, ?it/s]

Train start


Validation: 0it [00:00, ?it/s]

loss: 0.497 - val_loss: 0.405


Validation: 0it [00:00, ?it/s]

loss: 0.390 - val_loss: 0.347


Validation: 0it [00:00, ?it/s]

loss: 0.344 - val_loss: 0.366


Validation: 0it [00:00, ?it/s]

loss: 0.312 - val_loss: 0.384


Validation: 0it [00:00, ?it/s]

loss: 0.295 - val_loss: 0.396




Predicting: 0it [00:00, ?it/s]

[tensor([0.0933, 0.0933, 0.0933, 0.0933, 0.0933], dtype=torch.float64)]


  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")


Sanity Checking: 0it [00:00, ?it/s]



Training: 0it [00:00, ?it/s]

Train start


Validation: 0it [00:00, ?it/s]

loss: 0.430 - val_loss: 0.295


Validation: 0it [00:00, ?it/s]

loss: 0.338 - val_loss: 0.283


Validation: 0it [00:00, ?it/s]

loss: 0.301 - val_loss: 0.280


Validation: 0it [00:00, ?it/s]

loss: 0.274 - val_loss: 0.269


Validation: 0it [00:00, ?it/s]

loss: 0.264 - val_loss: 0.278




Predicting: 0it [00:00, ?it/s]

[tensor([0.0444, 0.0444, 0.0444, 0.0444, 0.0444], dtype=torch.float64)]


  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")


Sanity Checking: 0it [00:00, ?it/s]



Training: 0it [00:00, ?it/s]

Train start


Validation: 0it [00:00, ?it/s]

loss: 0.483 - val_loss: 0.319


Validation: 0it [00:00, ?it/s]

loss: 0.371 - val_loss: 0.340


Validation: 0it [00:00, ?it/s]

loss: 0.325 - val_loss: 0.314


Validation: 0it [00:00, ?it/s]

loss: 0.296 - val_loss: 0.312


Validation: 0it [00:00, ?it/s]

loss: 0.281 - val_loss: 0.308




Predicting: 0it [00:00, ?it/s]

[tensor([0.1750, 0.1750, 0.1750, 0.1750, 0.1750], dtype=torch.float64)]
