## LightGBM + FT_transformer

In [1]:
import numpy as np
import pandas as pd
from copy import deepcopy
import matplotlib.pyplot as plt
import seaborn as sns
import tqdm
import gc
import lightgbm as lgb
import os
import warnings
import torch
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import KFold
from scipy.stats import pearsonr
from sklearn.metrics import mean_absolute_error, r2_score

warnings.filterwarnings("ignore")

In [2]:
def get_feature(df):

    X, Y = df.iloc[:,:-6], df.iloc[:, -6:]
    X = X.fillna(-1)

    S = X*np.log(X)
    S.fillna(0, inplace=True)
    s = S.sum(axis = 1).to_frame(name = 'mixing entropy')

    poly = PolynomialFeatures(include_bias=False, interaction_only =True)
    H = poly.fit_transform(X)
    new_col_names = poly.get_feature_names(X.columns.tolist())[12:]

    h = pd.DataFrame(data = H[:, 12:], columns = new_col_names, index= X.index)
    phs = h.multiply(s.values, axis=0).add_prefix('phs_')
    final_df = pd.concat([X, s, h, phs, Y], axis=1)

    return final_df

def cal_score(true, pred):
    mae = mean_absolute_error(true, pred)
    r2 = r2_score(true, pred)
    rmse = np.sqrt(((true - pred) ** 2).mean())
    pccs = pearsonr(true, pred)[0]
    mae, r2, rmse, pccs = np.around([mae, r2, rmse, pccs], decimals=4)

    return mae, r2, rmse, pccs

def eva_all(true, train, val, true_test, test):
    print('train')
    mae, r2, rmse, pccs = cal_score(true, train)
    print(f'mae:{mae}, rmse:{rmse}, r2:{r2}, pccs:{pccs}')
    print('valid')
    mae, r2, rmse, pccs = cal_score(true, val)
    print(f'mae:{mae}, rmse:{rmse}, r2:{r2}, pccs:{pccs}')
    print('test')
    mae, r2, rmse, pccs = cal_score(true_test, test)
    print(f'mae:{mae}, rmse:{rmse}, r2:{r2}, pccs:{pccs}')

In [3]:
train = pd.read_csv('missing_data/mis_train.csv')
test = pd.read_csv('missing_data/mis_test.csv')

df_train = get_feature(train)
df_test = get_feature(test)

In [4]:
Y = train.iloc[:,-6:]
Y

Unnamed: 0,property 1,property 2,property 3,property 4,property 5,property 6
0,0.317895,0.589088,0.752391,0.328302,-0.206129,0.442830
1,-0.108256,-0.574822,0.009474,0.009474,0.022768,-0.682320
2,-0.092087,1.220121,0.834930,1.319043,-0.018376,0.442830
3,,,-0.568540,-0.930921,0.023050,
4,,-0.833978,,,0.242608,0.217800
...,...,...,...,...,...,...
1367,0.386099,-0.501252,-0.243823,-0.904183,0.123940,-0.524799
1368,0.542143,0.166928,0.654849,-0.361795,-1.115771,1.943030
1369,-0.108256,1.237477,0.836668,1.384191,0.026685,
1370,-0.108256,,-0.034687,-0.929547,,0.442830


In [5]:
Y_test = test.iloc[:, -6:]

In [6]:
X = df_train.iloc[:,:-6]
X_test = df_test.iloc[:,:-6]

In [7]:
X.shape

(1372, 145)

In [8]:
models = os.listdir('p2_final_lgb_models_3fold')
lgb_X = []
lgb_X_test = []
gbm_leaf_value = {}
columns = []
for model in models:
    gbm = lgb.Booster(model_file='final_lgb_models_3fold/'+ model)
    model_name =model.split('.')[0]
    gbm_leaf_value[model_name] = {}
    for i in range(gbm.num_trees()):
        gbm_leaf_value[model_name][i] = {}
        columns.append(model_name + '_' + str(i))
        for j in range(31):
            try:
                gbm_leaf_value[model_name][i][j] = gbm.get_leaf_output(i, j)
            except:
                continue
    lgb_X.append(gbm.predict(X, pred_leaf = True))
    lgb_X_test.append(gbm.predict(X_test, pred_leaf = True))

[LightGBM] [Fatal] Check failed: leaf_idx >= 0 && leaf_idx < models_[tree_idx]->num_leaves() at /__w/1/s/python-package/compile/src/boosting/gbdt.h, line 380 .

[LightGBM] [Fatal] Check failed: leaf_idx >= 0 && leaf_idx < models_[tree_idx]->num_leaves() at /__w/1/s/python-package/compile/src/boosting/gbdt.h, line 380 .

[LightGBM] [Fatal] Check failed: leaf_idx >= 0 && leaf_idx < models_[tree_idx]->num_leaves() at /__w/1/s/python-package/compile/src/boosting/gbdt.h, line 380 .

[LightGBM] [Fatal] Check failed: leaf_idx >= 0 && leaf_idx < models_[tree_idx]->num_leaves() at /__w/1/s/python-package/compile/src/boosting/gbdt.h, line 380 .

[LightGBM] [Fatal] Check failed: leaf_idx >= 0 && leaf_idx < models_[tree_idx]->num_leaves() at /__w/1/s/python-package/compile/src/boosting/gbdt.h, line 380 .

[LightGBM] [Fatal] Check failed: leaf_idx >= 0 && leaf_idx < models_[tree_idx]->num_leaves() at /__w/1/s/python-package/compile/src/boosting/gbdt.h, line 380 .

[LightGBM] [Fatal] Check failed: l

In [9]:
lgb_X = [pd.DataFrame(i) for i in lgb_X]
lgb_X_test = [pd.DataFrame(i) for i in lgb_X_test]

all_lgb_X = pd.concat(lgb_X, axis=1)
all_lgb_X.columns = columns

all_lgb_X_test = pd.concat(lgb_X_test, axis=1)
all_lgb_X_test.columns = columns

In [10]:
all_lgb_X

Unnamed: 0,property_1_fold3_model_0,property_1_fold3_model_1,property_1_fold3_model_2,property_1_fold3_model_3,property_1_fold3_model_4,property_1_fold3_model_5,property_1_fold3_model_6,property_1_fold3_model_7,property_1_fold3_model_8,property_1_fold3_model_9,...,property_6_fold3_model_100,property_6_fold3_model_101,property_6_fold3_model_102,property_6_fold3_model_103,property_6_fold3_model_104,property_6_fold3_model_105,property_6_fold3_model_106,property_6_fold3_model_107,property_6_fold3_model_108,property_6_fold3_model_109
0,3,3,3,0,2,2,1,0,2,2,...,5,0,6,4,6,2,10,4,4,3
1,0,0,0,3,4,0,6,3,0,0,...,7,5,6,0,5,2,12,3,4,4
2,0,0,0,3,4,0,6,4,0,0,...,2,0,7,2,4,2,12,3,4,4
3,0,0,0,3,4,0,6,4,0,0,...,6,4,3,5,4,2,12,3,4,4
4,4,4,4,4,0,1,0,4,1,1,...,8,0,3,5,3,0,0,2,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1367,2,2,2,2,2,2,5,2,2,2,...,2,5,7,2,3,2,6,3,4,4
1368,2,2,2,2,3,2,2,2,2,2,...,7,0,4,2,5,2,6,3,4,4
1369,0,0,0,3,4,0,6,4,0,0,...,2,0,7,2,4,3,1,3,4,4
1370,0,0,0,3,4,0,6,4,0,0,...,8,0,2,2,3,2,1,3,4,4


In [11]:
new_df = {}
for name in columns:
    col_leaf_index = all_lgb_X[name].values.tolist()
    model_name, tree_name = name[:22], int(name[23:])
    col_leaf_value = [gbm_leaf_value[model_name][tree_name][i] for i in col_leaf_index]
    new_df[name] = col_leaf_value

new_df = pd.DataFrame(new_df)

In [12]:
new_df_test = {}
for name in columns:
    col_leaf_index = all_lgb_X_test[name].values.tolist()
    model_name, tree_name = name[:22], int(name[23:])
    col_leaf_value = [gbm_leaf_value[model_name][tree_name][i] for i in col_leaf_index]
    new_df_test[name] = col_leaf_value
    
new_df_test = pd.DataFrame(new_df_test)

In [13]:
new_df_test

Unnamed: 0,property_1_fold3_model_0,property_1_fold3_model_1,property_1_fold3_model_2,property_1_fold3_model_3,property_1_fold3_model_4,property_1_fold3_model_5,property_1_fold3_model_6,property_1_fold3_model_7,property_1_fold3_model_8,property_1_fold3_model_9,...,property_6_fold3_model_100,property_6_fold3_model_101,property_6_fold3_model_102,property_6_fold3_model_103,property_6_fold3_model_104,property_6_fold3_model_105,property_6_fold3_model_106,property_6_fold3_model_107,property_6_fold3_model_108,property_6_fold3_model_109
0,0.137971,-0.002392,-0.002368,-0.002321,-0.002257,-0.002307,-0.002301,-0.002258,-0.002236,-0.002213,...,-0.002446,-0.001780,0.002205,-0.002193,-0.002299,-0.005707,0.011184,-0.001616,0.000218,-0.000774
1,0.137971,-0.002392,-0.002368,-0.002321,-0.002257,-0.002307,-0.002301,-0.002258,-0.002236,-0.002213,...,0.004162,-0.001780,0.001191,0.000080,0.003549,0.000429,0.001026,-0.001616,0.000218,-0.000774
2,0.137971,-0.002392,-0.002368,-0.002321,-0.002257,-0.002307,-0.002301,-0.002258,-0.002236,-0.002213,...,-0.002446,-0.001780,0.001191,0.000080,-0.002299,0.000429,-0.016409,-0.001616,0.000218,-0.000774
3,0.137971,-0.002392,-0.002368,-0.002321,-0.002257,-0.002307,-0.002301,0.016253,-0.002236,-0.002213,...,-0.005358,0.005257,0.003612,-0.002193,-0.018872,0.010917,-0.006647,-0.011322,0.002053,-0.000774
4,0.137971,-0.002392,-0.002368,-0.002321,-0.002257,-0.002307,-0.002301,-0.002258,-0.002236,-0.002213,...,-0.005358,-0.001780,0.009833,-0.002193,0.004693,0.000429,0.000524,-0.001616,0.000218,-0.000774
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
583,0.140288,-0.000098,-0.000097,0.000014,0.000372,0.002106,-0.000716,-0.001043,0.002021,0.002072,...,-0.005358,-0.001780,-0.006510,0.000080,-0.018872,0.000429,0.014105,-0.001616,0.000218,-0.000774
584,0.137971,-0.002392,-0.002368,-0.002321,-0.002257,-0.002307,-0.002301,-0.002258,-0.002236,-0.002213,...,0.009524,-0.001780,0.002205,-0.002193,0.009012,0.000429,-0.006647,0.002403,0.000218,0.009167
585,0.137971,-0.002392,-0.002368,-0.002321,-0.002257,-0.002307,-0.002301,-0.002258,-0.002236,-0.002213,...,-0.002446,-0.001780,0.002205,0.000080,-0.002299,0.000429,0.001026,0.002403,0.000218,-0.000774
586,0.190397,0.049510,0.049015,0.048525,0.015339,0.024317,0.007339,0.016253,0.023756,0.023519,...,0.009524,0.000994,0.002205,0.008863,0.009012,-0.005707,0.006196,0.008150,0.002053,-0.000774


In [14]:

ori_train = df_train.iloc[:,:-6]
ori_test = df_test.iloc[:,:-6]
ori_train

Unnamed: 0,element 1,element 2,element 3,element 4,element 5,element 6,element 7,element 8,element 9,element 10,...,phs_element 8 element 9,phs_element 8 element 10,phs_element 8 element 11,phs_element 8 element 12,phs_element 9 element 10,phs_element 9 element 11,phs_element 9 element 12,phs_element 10 element 11,phs_element 10 element 12,phs_element 11 element 12
0,0.673956,0.174380,0.018852,0.122537,0.009426,0.000000,0.000424,0.000283,0.000141,0.000000,...,-3.811991e-08,-0.000000,-0.000000,-0.000000,-0.000000,-0.000000,-0.000000e+00,-0.000000,-0.000000,-0.000000e+00
1,0.010422,0.000000,0.009474,0.312648,0.001421,0.661298,0.000000,0.000000,0.000000,0.000000,...,-0.000000e+00,-0.000000,-0.000000,-0.000000,-0.000000,-0.000000,-0.000000e+00,-0.000000,-0.000000,-0.000000e+00
2,-1.000000,0.004798,0.016314,0.009596,0.004798,0.002879,0.000192,-1.000000,0.000048,0.000000,...,8.729698e-06,0.000000,0.000000,-0.181936,-0.000000,-0.000000,8.729698e-06,-0.000000,0.000000,0.000000e+00
3,0.002357,0.000000,-1.000000,0.009428,-1.000000,0.867352,0.000283,0.000000,0.000754,0.113133,...,-0.000000e+00,-0.000000,-0.000000,-0.000000,-0.000040,-0.000002,-1.669355e-08,-0.000351,-0.000003,-1.460686e-07
4,0.004180,0.000000,-1.000000,0.010450,0.000052,0.856934,0.015676,-1.000000,-1.000000,0.104504,...,-5.410412e-01,0.056541,0.003958,0.000028,0.056541,0.003958,2.827052e-05,-0.000414,-0.000003,-2.068070e-07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1367,0.009466,-1.000000,-1.000000,0.482772,-1.000000,-1.000000,0.000189,0.000947,0.000189,0.000000,...,-7.267310e-08,-0.000000,-0.000000,0.000384,-0.000000,-0.000000,7.677186e-05,-0.000000,0.000000,0.000000e+00
1368,0.443867,0.250337,-1.000000,0.259965,0.006740,0.019257,0.000289,0.000193,0.000096,0.000000,...,-2.173199e-08,-0.000000,0.000226,0.000226,-0.000000,0.000113,1.128542e-04,0.000000,0.000000,-1.172104e+00
1369,-1.000000,0.000000,0.007925,0.000000,0.019812,0.000000,0.000248,-1.000000,0.000248,0.000000,...,2.975288e-05,0.000000,0.000000,0.000000,-0.000000,-0.000000,-0.000000e+00,-0.000000,-0.000000,-0.000000e+00
1370,0.023177,0.000000,-1.000000,0.000000,0.000000,0.845002,0.003380,0.000000,0.000000,0.123612,...,-0.000000e+00,-0.000000,0.000000,-0.000000,-0.000000,0.000000,-0.000000e+00,0.062698,-0.000000,0.000000e+00


In [15]:
#concatenate origin feature 

X = new_df.join(ori_train)
X_test = new_df_test.join(ori_test)
X.shape, X_test.shape

((1372, 1709), (588, 1709))

In [16]:
os.environ["CUDA_DEVICE_ORDER"] = 'PCI_BUS_ID'
os.environ["CUDA_VISIBLE_DEVICES"] = '2'

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [17]:
TRUE_Y = pd.read_csv('missing_data/p10_train.csv')
TRUE_Y_TEST = pd.read_csv('missing_data/p10_test.csv')
TRUE_Y = TRUE_Y.iloc[:,-6:]
TRUE_Y_TEST = TRUE_Y_TEST.iloc[:, -6:]


In [18]:
TRUE_Y

Unnamed: 0,property 1,property 2,property 3,property 4,property 5,property 6
0,0.317895,0.589088,0.752391,0.328302,-0.206129,0.442830
1,-0.108256,-0.574822,-0.221364,-0.926633,0.022768,-0.682320
2,-0.092087,1.220121,0.834930,1.319043,-0.018376,0.442830
3,-0.106166,-0.858744,-0.568540,-0.930921,0.023050,1.042910
4,1.642237,-0.833978,-0.434720,-0.930877,0.242608,0.217800
...,...,...,...,...,...,...
1367,0.386099,-0.501252,-0.243823,-0.904183,0.123940,-0.524799
1368,0.542143,0.166928,0.654849,-0.361795,-1.115771,1.943030
1369,-0.108256,1.237477,0.836668,1.384191,0.026685,-1.207390
1370,-0.108256,-0.813868,-0.034687,-0.929547,0.022773,0.442830


In [24]:
X_test

Unnamed: 0,property_1_fold3_model_0,property_1_fold3_model_1,property_1_fold3_model_2,property_1_fold3_model_3,property_1_fold3_model_4,property_1_fold3_model_5,property_1_fold3_model_6,property_1_fold3_model_7,property_1_fold3_model_8,property_1_fold3_model_9,...,phs_element 8 element 9,phs_element 8 element 10,phs_element 8 element 11,phs_element 8 element 12,phs_element 9 element 10,phs_element 9 element 11,phs_element 9 element 12,phs_element 10 element 11,phs_element 10 element 12,phs_element 11 element 12
0,0.137971,-0.002392,-0.002368,-0.002321,-0.002257,-0.002307,-0.002301,-0.002258,-0.002236,-0.002213,...,-1.708719e-07,-0.000000,-0.000000,-0.000000e+00,-0.000000,-0.000000,-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000
1,0.137971,-0.002392,-0.002368,-0.002321,-0.002257,-0.002307,-0.002301,-0.002258,-0.002236,-0.002213,...,-4.849766e-08,-0.000000,-0.000000,-9.699532e-08,-0.000000,-0.000000,-8.082944e-09,-0.000000e+00,-0.000000e+00,-0.000000
2,0.137971,-0.002392,-0.002368,-0.002321,-0.002257,-0.002307,-0.002301,-0.002258,-0.002236,-0.002213,...,-1.239750e-07,-0.000000,-0.000000,-2.066249e-07,-0.000000,-0.000000,-2.695108e-08,-0.000000e+00,-0.000000e+00,-0.000000
3,0.137971,-0.002392,-0.002368,-0.002321,-0.002257,-0.002307,-0.002301,0.016253,-0.002236,-0.002213,...,5.973356e-04,-0.533222,0.000000,-5.332216e-01,0.000597,-0.000000,5.973356e-04,0.000000e+00,-5.332216e-01,0.000000
4,0.137971,-0.002392,-0.002368,-0.002321,-0.002257,-0.002307,-0.002301,-0.002258,-0.002236,-0.002213,...,-0.000000e+00,-0.000000,-0.000000,0.000000e+00,-0.000000,-0.000000,0.000000e+00,-1.647951e-09,8.272712e-06,0.000006
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
583,0.140288,-0.000098,-0.000097,0.000014,0.000372,0.002106,-0.000716,-0.001043,0.002021,0.002072,...,-9.019736e-09,-0.000000,-0.000000,9.061227e-05,-0.000000,-0.000000,4.530613e-05,-0.000000e+00,0.000000e+00,0.000000
584,0.137971,-0.002392,-0.002368,-0.002321,-0.002257,-0.002307,-0.002301,-0.002258,-0.002236,-0.002213,...,0.000000e+00,0.002204,0.000000,4.408972e-05,-0.000000,-0.000000,-0.000000e+00,-0.000000e+00,-2.121534e-07,-0.000000
585,0.137971,-0.002392,-0.002368,-0.002321,-0.002257,-0.002307,-0.002301,-0.002258,-0.002236,-0.002213,...,1.697137e-03,-0.000000,-0.000000,-0.000000e+00,0.000000,0.000000,0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000
586,0.190397,0.049510,0.049015,0.048525,0.015339,0.024317,0.007339,0.016253,0.023756,0.023519,...,-5.189978e-07,-0.000000,-0.000009,-0.000000e+00,-0.000000,-0.000002,-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000


In [26]:
from sklearn.model_selection import KFold
from loss_utils.data import MyDataset
from torch.utils.data import DataLoader
from loss_utils.trainer import trainer
from models.ft_transformer import FT_Transformer



kf = KFold(n_splits=5, random_state=2022, shuffle=True)
oof_train = torch.zeros((X.shape[0], 6))
oof_real_train = torch.zeros((X.shape[0], 6))

use_cat=False
pred = torch.zeros((Y_test.shape))


for now_fold, (trn_idx, val_idx) in enumerate(kf.split(X, Y)):
    print(f"Fold: {now_fold}")
    X_train, X_val = np.array(X.iloc[trn_idx]), np.array(X.iloc[val_idx])
    y_train, y_val = np.array(Y.iloc[trn_idx]), np.array(Y.iloc[val_idx])
    TRUEY_train, TRUEY_val = np.array(TRUE_Y.iloc[trn_idx]), np.array(TRUE_Y.iloc[val_idx])
    
    X_train, X_val = torch.tensor(X_train).float(), torch.tensor(X_val).float()
    y_train, y_val = torch.tensor(y_train).float(), torch.tensor(y_val).float()
    TRUEY_train, TRUEY_val = torch.tensor(y_train).float(), torch.tensor(y_val).float()
    
    train_dataset = MyDataset(X_train, y_train, TRUEY_train)
    valid_dataset = MyDataset(X_val, y_val, TRUEY_val)
    
    train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True)
    valid_dataloader = DataLoader(valid_dataset, batch_size=128, shuffle=True)
    

    model = FT_Transformer(in_dim=X.shape[1], d_numerical=256, categories=None, token_bias=True, n_layers=3, d_token=256, n_heads=4,
                           d_ffn_factor=3.0, attention_dropout=0.2, ffn_dropout=0.2,residual_dropout=0.2)
                      
    final_model = trainer(model, train_dataloader, valid_dataloader, device, max_epochs=2000, early_stop=20, use_cat=use_cat)
    final_model = final_model.to('cpu')
    if use_cat:
        oof_train[val_idx]= final_model(X_val[:,:-12], X_val[:,-12:].int()).detach()
        pred += final_model(
            torch.Tensor(X_test.values[:,:-12]), torch.Tensor(X_test.values[:,-12:]).int()
                            ).detach() / 5
    else:
        oof_train[val_idx] = final_model(X_val, None).detach()
        pred += final_model(torch.Tensor(np.array(X_test)).float(), None).detach() /5
        oof_real_train[trn_idx] += final_model(X_train, None).detach() / 4

Fold: 0
epoch:    1 | train_loss: 8.3929 | train_mae: 0.5675 | train_r2: -0.1644 | train_rmse: 0.9491 | train_pccs: -0.0156
epoch:    1 | valid_loss: 7.6521 | valid_mae: 0.5093 | valid_r2: 0.0025 | valid_rmse: 1.5094 | valid_pccs: 0.1072
epoch:    2 | train_loss: 7.0767 | train_mae: 0.4851 | train_r2: -0.0032 | train_rmse: 0.8841 | train_pccs: 0.0743
epoch:    2 | valid_loss: 7.4845 | valid_mae: 0.5069 | valid_r2: 0.0173 | valid_rmse: 1.5019 | valid_pccs: 0.1297
epoch:    3 | train_loss: 7.3689 | train_mae: 0.4835 | train_r2: 0.0081 | train_rmse: 0.8794 | train_pccs: 0.0806
epoch:    3 | valid_loss: 7.6923 | valid_mae: 0.5098 | valid_r2: 0.0129 | valid_rmse: 1.5040 | valid_pccs: 0.1311
epoch:    4 | train_loss: 6.9677 | train_mae: 0.4882 | train_r2: 0.0100 | train_rmse: 0.8784 | train_pccs: 0.0953
epoch:    4 | valid_loss: 7.4804 | valid_mae: 0.5240 | valid_r2: 0.0025 | valid_rmse: 1.5085 | valid_pccs: 0.1210
epoch:    5 | train_loss: 7.3426 | train_mae: 0.4880 | train_r2: 0.0103 | tra

In [29]:
from scipy.stats import pearsonr
from sklearn.metrics import mean_absolute_error, r2_score
print("train")

'''
for i in range(Y.shape[1]):
    mae = mean_absolute_error(Y.iloc[:,i], oof_real_train[:,i])
    r2 = r2_score(Y.iloc[:,i], oof_real_train[:,i])
    rmse = np.sqrt(((Y.iloc[:, i] - np.array(oof_real_train[:,i])) ** 2).mean())
    pccs = pearsonr(Y.iloc[:, i], oof_real_train[:,i])[0]
    mae, r2, rmse, pccs = np.around([mae, r2, rmse, pccs], decimals=4)
    print(f'property{i+1}: mae:{mae}, rmse:{rmse}, r2:{r2}, pccs:{pccs}')

print("valid")

for i in range(Y.shape[1]):
    mae = mean_absolute_error(Y.iloc[:,i], oof_train[:,i])
    r2 = r2_score(Y.iloc[:,i], oof_train[:,i])
    rmse = np.sqrt(((Y.iloc[:, i] - np.array(oof_train[:,i])) ** 2).mean())
    pccs = pearsonr(Y.iloc[:, i], oof_train[:,i])[0]
    mae, r2, rmse, pccs = np.around([mae, r2, rmse, pccs], decimals=4)
    print(f'property{i+1}: mae:{mae}, rmse:{rmse}, r2:{r2}, pccs:{pccs}')
'''

print("test")
for i in range(TRUE_Y_TEST.shape[1]):
    mae = mean_absolute_error(TRUE_Y_TEST.iloc[:,i], pred[:,i])
    r2 = r2_score(TRUE_Y_TEST.iloc[:,i], pred[:,i])
    rmse = np.sqrt(((TRUE_Y_TEST.iloc[:, i] - np.array(pred[:,i])) ** 2).mean())
    pccs = pearsonr(TRUE_Y_TEST.iloc[:, i], pred[:,i])[0]
    mae, r2, rmse, pccs = np.around([mae, r2, rmse, pccs], decimals=4)
    print(f'property{i+1}: mae:{mae}, rmse:{rmse}, r2:{r2}, pccs:{pccs}')

train
test
property1: mae:0.2549, rmse:0.3624, r2:-0.1263, pccs:0.1702
property2: mae:0.8248, rmse:0.913, r2:0.0034, pccs:0.6534
property3: mae:0.6062, rmse:0.7525, r2:0.0035, pccs:0.5796
property4: mae:0.8829, rmse:0.9604, r2:-0.0015, pccs:0.6074
property5: mae:0.1062, rmse:0.5411, r2:-0.0007, pccs:0.0241
property6: mae:0.7733, rmse:0.9567, r2:-0.0254, pccs:0.3864
