In [8]:
!pip install rdkit

Collecting rdkit
  Downloading rdkit-2023.3.3-cp38-cp38-win_amd64.whl (20.6 MB)
Installing collected packages: rdkit
Successfully installed rdkit-2023.3.3


In [291]:
import os
import pandas as pd
import random
import numpy as np
from rdkit import DataStructs
from rdkit import rdBase, Chem
from rdkit.Chem import AllChem, PandasTools, Descriptors
from rdkit.Chem.Draw import IPythonConsole

from autogluon.tabular import TabularDataset, TabularPredictor
import autogluon.core as ag

import warnings
warnings.filterwarnings("ignore")

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(42) 

In [292]:
train=pd.read_csv("C:/Users/user/Desktop/데이콘 신약대회/open/train.csv")
test=pd.read_csv("C:/Users/user/Desktop/데이콘 신약대회/open/test.csv")


In [293]:
train.isna().sum()

id                            0
SMILES                        0
MLM                           0
HLM                           0
AlogP                         2
Molecular_Weight              0
Num_H_Acceptors               0
Num_H_Donors                  0
Num_RotatableBonds            0
LogD                          0
Molecular_PolarSurfaceArea    0
dtype: int64

In [294]:
test.isna().sum()

id                            0
SMILES                        0
AlogP                         1
Molecular_Weight              0
Num_H_Acceptors               0
Num_H_Donors                  0
Num_RotatableBonds            0
LogD                          0
Molecular_PolarSurfaceArea    0
dtype: int64

In [295]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3498 entries, 0 to 3497
Data columns (total 11 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   id                          3498 non-null   object 
 1   SMILES                      3498 non-null   object 
 2   MLM                         3498 non-null   float64
 3   HLM                         3498 non-null   float64
 4   AlogP                       3496 non-null   float64
 5   Molecular_Weight            3498 non-null   float64
 6   Num_H_Acceptors             3498 non-null   int64  
 7   Num_H_Donors                3498 non-null   int64  
 8   Num_RotatableBonds          3498 non-null   int64  
 9   LogD                        3498 non-null   float64
 10  Molecular_PolarSurfaceArea  3498 non-null   float64
dtypes: float64(6), int64(3), object(2)
memory usage: 300.7+ KB


In [296]:
# 결측치 보간
train['AlogP'] = train['AlogP'].interpolate()
test['AlogP'] = test['AlogP'].interpolate()

In [297]:
PandasTools.AddMoleculeColumnToFrame(train,'SMILES')
PandasTools.AddMoleculeColumnToFrame(test,'SMILES')

In [298]:
print(train.shape)
print(test.shape)

(3498, 12)
(483, 10)


In [299]:
print(train[train.ROMol.isnull()])
print(test[test.ROMol.isnull()])


Empty DataFrame
Columns: [id, SMILES, MLM, HLM, AlogP, Molecular_Weight, Num_H_Acceptors, Num_H_Donors, Num_RotatableBonds, LogD, Molecular_PolarSurfaceArea, ROMol]
Index: []
Empty DataFrame
Columns: [id, SMILES, AlogP, Molecular_Weight, Num_H_Acceptors, Num_H_Donors, Num_RotatableBonds, LogD, Molecular_PolarSurfaceArea, ROMol]
Index: []


In [300]:
for i,j in Descriptors.descList:
    train[i] = train.ROMol.map(j)

In [301]:
for i,j in Descriptors.descList:
    test[i] = test.ROMol.map(j)

In [302]:
print(train.shape)
print(test.shape)

(3498, 221)
(483, 219)


In [303]:
train.head()

Unnamed: 0,id,SMILES,MLM,HLM,AlogP,Molecular_Weight,Num_H_Acceptors,Num_H_Donors,Num_RotatableBonds,LogD,...,fr_sulfide,fr_sulfonamd,fr_sulfone,fr_term_acetylene,fr_tetrazole,fr_thiazole,fr_thiocyan,fr_thiophene,fr_unbrch_alkane,fr_urea
0,TRAIN_0000,CCOc1ccc(CNC(=O)c2cc(-c3sc(C)nc3C)n[nH]2)cc1OCC,26.01,50.68,3.259,400.495,5,2,8,3.259,...,0,0,0,0,0,1,0,0,0,0
1,TRAIN_0001,Cc1nc(C)c(CN2CC(C)C(=O)Nc3ccccc32)s1,29.27,50.59,2.169,301.407,2,1,2,2.172,...,0,0,0,0,0,1,0,0,0,0
2,TRAIN_0002,CCCN1CCN(c2nn3nnnc3c3ccccc23)CC1,5.586,80.892,1.593,297.358,5,0,3,1.585,...,0,0,0,0,1,0,0,0,0,0
3,TRAIN_0003,Cc1ccc(-c2ccc(-n3nc(C)c(S(=O)(=O)N4CCN(C5CCCCC5)CC4)c3C)nn2)cc1,5.71,2.0,4.771,494.652,6,0,5,3.475,...,0,1,0,0,0,0,0,0,0,0
4,TRAIN_0004,Cc1ccc2c(c1)N(C(=O)c1ccncc1)CC(C)O2,93.27,99.99,2.335,268.31,3,0,1,2.337,...,0,0,0,0,0,0,0,0,0,0


In [304]:
train['FP'] = train.apply(lambda x: AllChem.GetMorganFingerprintAsBitVect(x.ROMol, 2, 1024), axis=1)
test['FP'] = test.apply(lambda x: AllChem.GetMorganFingerprintAsBitVect(x.ROMol, 2, 1024), axis=1)


In [305]:
FP_1 = [AllChem.GetMorganFingerprintAsBitVect(mol, 2, 1024) for mol in train.ROMol]
FP_2 = [AllChem.GetMorganFingerprintAsBitVect(mol, 2, 1024) for mol in test.ROMol]
df_FP_1 = pd.DataFrame(np.array(FP_1)) 
df_FP_2 = pd.DataFrame(np.array(FP_2)) 

df_FP_1.index = train.index
df_FP_2.index = test.index

train = pd.concat([train, df_FP_1], axis=1)
test = pd.concat([test, df_FP_2], axis=1)


In [306]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3498 entries, 0 to 3497
Columns: 1246 entries, id to 1023
dtypes: float64(111), int32(1024), int64(107), object(4)
memory usage: 19.6+ MB


In [307]:
test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 483 entries, 0 to 482
Columns: 1244 entries, id to 1023
dtypes: float64(109), int32(1024), int64(107), object(4)
memory usage: 2.7+ MB


In [308]:
train.select_dtypes(include='object')


Unnamed: 0,id,SMILES,ROMol,FP
0,TRAIN_0000,CCOc1ccc(CNC(=O)c2cc(-c3sc(C)nc3C)n[nH]2)cc1OCC,<rdkit.Chem.rdchem.Mol object at 0x00000271C82B75F0>,"[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, ...]"
1,TRAIN_0001,Cc1nc(C)c(CN2CC(C)C(=O)Nc3ccccc32)s1,<rdkit.Chem.rdchem.Mol object at 0x00000271C82B7430>,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...]"
2,TRAIN_0002,CCCN1CCN(c2nn3nnnc3c3ccccc23)CC1,<rdkit.Chem.rdchem.Mol object at 0x00000271C82B7900>,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...]"
3,TRAIN_0003,Cc1ccc(-c2ccc(-n3nc(C)c(S(=O)(=O)N4CCN(C5CCCCC5)CC4)c3C)nn2)cc1,<rdkit.Chem.rdchem.Mol object at 0x00000271C82B7970>,"[0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...]"
4,TRAIN_0004,Cc1ccc2c(c1)N(C(=O)c1ccncc1)CC(C)O2,<rdkit.Chem.rdchem.Mol object at 0x00000271C82B79E0>,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...]"
...,...,...,...,...
3493,TRAIN_3493,Cn1nc(CNC(=O)Cn2nc(C(F)(F)F)c3c2CCC3)c(Cl)c1Cl,<rdkit.Chem.rdchem.Mol object at 0x00000271C7A167B0>,"[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...]"
3494,TRAIN_3494,CCn1[nH]cc/c1=N\C(=O)c1nn(-c2ccccc2)c(=O)c2ccccc12,<rdkit.Chem.rdchem.Mol object at 0x00000271C7A16820>,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...]"
3495,TRAIN_3495,CCOC(=O)CCCc1nc2cc(N)ccc2n1C,<rdkit.Chem.rdchem.Mol object at 0x00000271C7A16890>,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...]"
3496,TRAIN_3496,Nc1cc(C(=O)OCCC2CCOC2=O)cnc1Cl,<rdkit.Chem.rdchem.Mol object at 0x00000271C7A16900>,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...]"


In [309]:
train = train.drop(['id','SMILES', 'ROMol', 'FP'], axis = 1)
test = test.drop(['id', 'SMILES', 'ROMol', 'FP'], axis = 1)

In [310]:
train

Unnamed: 0,MLM,HLM,AlogP,Molecular_Weight,Num_H_Acceptors,Num_H_Donors,Num_RotatableBonds,LogD,Molecular_PolarSurfaceArea,MaxAbsEStateIndex,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
0,26.010,50.680,3.259,400.495,5,2,8,3.259,117.37,12.482330,...,0,0,0,0,0,0,0,0,0,0
1,29.270,50.590,2.169,301.407,2,1,2,2.172,73.47,12.098337,...,0,0,0,0,0,1,0,0,0,0
2,5.586,80.892,1.593,297.358,5,0,3,1.585,62.45,4.646025,...,0,0,0,0,0,0,0,0,0,0
3,5.710,2.000,4.771,494.652,6,0,5,3.475,92.60,13.641142,...,0,0,0,0,0,1,0,1,0,0
4,93.270,99.990,2.335,268.310,3,0,1,2.337,42.43,12.661906,...,0,0,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3493,1.556,3.079,3.409,396.195,3,1,5,3.409,64.74,13.033764,...,0,0,0,0,0,0,0,0,0,0
3494,35.560,47.630,1.912,359.381,4,1,3,1.844,77.37,12.917253,...,0,0,0,0,0,0,0,0,0,0
3495,56.150,1.790,1.941,261.320,3,1,6,2.124,70.14,11.269030,...,0,0,0,0,0,0,0,0,0,0
3496,0.030,2.770,0.989,284.696,5,1,5,0.989,91.51,11.686457,...,0,0,0,0,0,1,0,0,0,0


In [311]:
train_=train.copy()
train_ = train_.drop('HLM', axis = 1)


In [278]:
train_data1 = TabularDataset(train_)
test_data = TabularDataset(test)

label = 'MLM'
eval_metric = 'root_mean_squared_error'
time_limit = 3600

In [279]:
predictor = TabularPredictor(
    label=label, eval_metric=eval_metric
).fit(train_data1,  presets='best_quality', time_limit=time_limit, 
      num_stack_levels=3)

No path specified. Models will be saved in: "AutogluonModels\ag-20230910_115534\"
Presets specified: ['best_quality']
Stack configuration (auto_stack=True): num_stack_levels=3, num_bag_folds=8, num_bag_sets=20
Beginning AutoGluon training ... Time limit = 3600s
AutoGluon will save models to "AutogluonModels\ag-20230910_115534\"
AutoGluon Version:  0.8.2
Python Version:     3.8.8
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.22621
Disk Space Avail:   112.99 GB / 237.11 GB (47.7%)
Train Data Rows:    3498
Train Data Columns: 1242
Label Column: MLM
Preprocessing data ...
AutoGluon infers your prediction problem is: 'regression' (because dtype of label-column == float and many unique label-values observed).
	Label info (max, min, mean, stddev): (131.72, 0.0, 37.38474, 35.69599)
	If 'regression' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multic

Fitting model: KNeighborsDist_BAG_L1 ... Training model for up to 1196.59s of the 3591.41s of remaining time.
		To set the same value for all models, do the following when calling predictor.fit: `predictor.fit(..., ag_args_fit={"ag.max_memory_usage_ratio": VALUE})`
		'NoneType' object has no attribute 'split'
Detailed Traceback:
Traceback (most recent call last):
  File "C:\Users\user\AppData\Roaming\Python\Python38\site-packages\autogluon\core\trainer\abstract_trainer.py", line 1733, in _train_and_save
    model = self._train_single(X, y, model, X_val, y_val, total_resources=total_resources, **model_fit_kwargs)
  File "C:\Users\user\AppData\Roaming\Python\Python38\site-packages\autogluon\core\trainer\abstract_trainer.py", line 1684, in _train_single
    model = model.fit(X=X, y=y, X_val=X_val, y_val=y_val, total_resources=total_resources, **model_fit_kwargs)
  File "C:\Users\user\AppData\Roaming\Python\Python38\site-packages\autogluon\core\models\abstract\abstract_model.py", line 829,

	-30.3957	 = Validation score   (-root_mean_squared_error)
	114.08s	 = Training   runtime
	2.46s	 = Validation runtime
Fitting model: CatBoost_BAG_L2 ... Training model for up to 1041.52s of the 2560.79s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
	-30.0088	 = Validation score   (-root_mean_squared_error)
	96.37s	 = Training   runtime
	0.42s	 = Validation runtime
Fitting model: ExtraTreesMSE_BAG_L2 ... Training model for up to 934.32s of the 2453.63s of remaining time.
	-30.3242	 = Validation score   (-root_mean_squared_error)
	76.77s	 = Training   runtime
	2.53s	 = Validation runtime
Fitting model: NeuralNetFastAI_BAG_L2 ... Training model for up to 854.46s of the 2373.75s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
	-11537730.2544	 = Validation score   (-root_mean_squared_error)
	37.17s	 = Training   runtime
	0.59s	 = Validation runtime
Fitting model: XGBoost_BAG_

In [280]:
predictor.leaderboard(silent = True)

Unnamed: 0,model,score_val,pred_time_val,fit_time,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,WeightedEnsemble_L3,-29.92956,16.976287,1113.251006,0.000996,0.401389,3,True,20
1,WeightedEnsemble_L2,-29.94883,8.725274,737.121148,0.000997,0.366491,2,True,10
2,CatBoost_BAG_L2,-30.00877,9.899927,871.808294,0.417876,96.372906,2,True,14
3,WeightedEnsemble_L4,-30.06113,23.608909,1635.91521,0.0,0.422915,4,True,30
4,CatBoost_BAG_L3,-30.13729,18.897458,1419.917083,0.385293,96.691997,3,True,24
5,LightGBMXT_BAG_L2,-30.21232,9.782707,791.799095,0.300657,16.363707,2,True,11
6,WeightedEnsemble_L5,-30.25312,35.769815,2452.548143,0.0,0.322819,5,True,40
7,CatBoost_BAG_L4,-30.28377,28.103187,2006.710318,0.439432,101.476405,4,True,34
8,LightGBMXT_BAG_L1,-30.28589,0.322187,21.273682,0.322187,21.273682,1,True,1
9,LightGBMXT_BAG_L3,-30.30219,18.797245,1340.730571,0.285079,17.505485,3,True,21


In [281]:
model_to_use = predictor.get_model_best()
model_pred= predictor.predict(test_data, model=model_to_use)

In [282]:
model_pred

0      23.822208
1      73.787849
2      42.253929
3      48.650108
4      58.813797
         ...    
478    14.309298
479    72.339966
480    35.346153
481    57.559196
482    19.285055
Name: MLM, Length: 483, dtype: float32

In [283]:
train_2=train.copy()
train_2 = train_2.drop('MLM', axis = 1)

In [284]:
train_data2 = TabularDataset(train_2)
test_data = TabularDataset(test)

label = 'HLM'
eval_metric = 'root_mean_squared_error'
time_limit = 3600

In [285]:
predictor = TabularPredictor(
    label=label, eval_metric=eval_metric
).fit(train_data2,  presets='best_quality', time_limit=time_limit, 
      num_stack_levels=3)

No path specified. Models will be saved in: "AutogluonModels\ag-20230910_124311\"
Presets specified: ['best_quality']
Stack configuration (auto_stack=True): num_stack_levels=3, num_bag_folds=8, num_bag_sets=20
Beginning AutoGluon training ... Time limit = 3600s
AutoGluon will save models to "AutogluonModels\ag-20230910_124311\"
AutoGluon Version:  0.8.2
Python Version:     3.8.8
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.22621
Disk Space Avail:   108.89 GB / 237.11 GB (45.9%)
Train Data Rows:    3498
Train Data Columns: 1242
Label Column: HLM
Preprocessing data ...
AutoGluon infers your prediction problem is: 'regression' (because dtype of label-column == float and many unique label-values observed).
	Label info (max, min, mean, stddev): (135.336, 0.0, 53.09021, 36.08008)
	If 'regression' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multi

Fitting model: KNeighborsDist_BAG_L1 ... Training model for up to 1196.54s of the 3591.23s of remaining time.
		'NoneType' object has no attribute 'split'
Detailed Traceback:
Traceback (most recent call last):
  File "C:\Users\user\AppData\Roaming\Python\Python38\site-packages\autogluon\core\trainer\abstract_trainer.py", line 1733, in _train_and_save
    model = self._train_single(X, y, model, X_val, y_val, total_resources=total_resources, **model_fit_kwargs)
  File "C:\Users\user\AppData\Roaming\Python\Python38\site-packages\autogluon\core\trainer\abstract_trainer.py", line 1684, in _train_single
    model = model.fit(X=X, y=y, X_val=X_val, y_val=y_val, total_resources=total_resources, **model_fit_kwargs)
  File "C:\Users\user\AppData\Roaming\Python\Python38\site-packages\autogluon\core\models\abstract\abstract_model.py", line 829, in fit
    out = self._fit(**kwargs)
  File "C:\Users\user\AppData\Roaming\Python\Python38\site-packages\autogluon\core\models\ensemble\stacker_ensemble_mo

Fitting model: ExtraTreesMSE_BAG_L2 ... Training model for up to 1038.36s of the 2670.96s of remaining time.
	-31.2371	 = Validation score   (-root_mean_squared_error)
	68.04s	 = Training   runtime
	2.5s	 = Validation runtime
Fitting model: NeuralNetFastAI_BAG_L2 ... Training model for up to 967.24s of the 2599.84s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
	-615920501.822	 = Validation score   (-root_mean_squared_error)
	34.64s	 = Training   runtime
	0.65s	 = Validation runtime
Fitting model: XGBoost_BAG_L2 ... Training model for up to 921.32s of the 2553.9s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
	-31.7036	 = Validation score   (-root_mean_squared_error)
	46.67s	 = Training   runtime
	0.28s	 = Validation runtime
Fitting model: NeuralNetTorch_BAG_L2 ... Training model for up to 863.3s of the 2495.88s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | F

In [286]:
predictor.leaderboard(silent = True)

Unnamed: 0,model,score_val,pred_time_val,fit_time,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,WeightedEnsemble_L2,-30.7938,5.486246,427.477603,0.001993,0.40785,2,True,10
1,WeightedEnsemble_L3,-30.89444,13.795841,835.486931,0.000997,0.502891,3,True,20
2,WeightedEnsemble_L4,-30.99495,25.518313,1402.194689,0.001003,0.416661,4,True,30
3,CatBoost_BAG_L2,-31.02377,9.210119,667.613365,0.408563,96.848867,2,True,14
4,CatBoost_BAG_L3,-31.0544,18.207913,1150.951809,0.477597,100.494096,3,True,24
5,ExtraTreesMSE_BAG_L1,-31.09904,2.487624,61.22936,2.487624,61.22936,1,True,5
6,WeightedEnsemble_L5,-31.13157,36.765188,2242.171789,0.001073,0.303904,5,True,40
7,CatBoost_BAG_L4,-31.21871,28.288748,1852.170768,0.425475,102.269989,4,True,34
8,LightGBMXT_BAG_L1,-31.22923,0.249075,17.588624,0.249075,17.588624,1,True,1
9,ExtraTreesMSE_BAG_L2,-31.23715,11.302874,638.805577,2.501318,68.041079,2,True,15


In [287]:
model_to_use2 = predictor.get_model_best()
model_pred2= predictor.predict(test_data, model=model_to_use2)

In [288]:
model_pred2

0      50.388046
1      83.939453
2      50.138241
3      73.569153
4      75.254433
         ...    
478    30.239071
479    77.045631
480    63.704082
481    68.682076
482    63.780975
Name: HLM, Length: 483, dtype: float32

In [289]:
sample=pd.read_csv("C:/Users/user/Desktop/데이콘 신약대회/open/sample_submission.csv")

sample['MLM']=model_pred
sample['HLM']=model_pred2
sample

Unnamed: 0,id,MLM,HLM
0,TEST_000,23.822208,50.388046
1,TEST_001,73.787849,83.939453
2,TEST_002,42.253929,50.138241
3,TEST_003,48.650108,73.569153
4,TEST_004,58.813797,75.254433
...,...,...,...
478,TEST_478,14.309298,30.239071
479,TEST_479,72.339966,77.045631
480,TEST_480,35.346153,63.704082
481,TEST_481,57.559196,68.682076


In [290]:
sample.to_csv("C:/Users/user/Desktop/데이콘 신약대회/open/파생1hourstack3.csv",index=False)

In [117]:
sample.to_csv("C:/Users/user/Desktop/데이콘 신약대회/open/4housstack3.csv",index=False)

In [104]:
sample.to_csv("C:/Users/user/Desktop/데이콘 신약대회/open/20minstack3.csv",index=False)

In [90]:
sample.to_csv("C:/Users/user/Desktop/데이콘 신약대회/open/20minstack5.csv",index=False)

In [111]:
label = 'MLM'
eval_metric = 'root_mean_squared_error'

In [112]:
from sklearn.model_selection import train_test_split, KFold

test_data = TabularDataset(test)
kfold = KFold(n_splits=15, shuffle=True)
pres = np.zeros(len(test))
for fold, (train_idx, val_idx) in enumerate(kfold.split(train_)):
    print(f'=========={fold+1}fold/15 folds==========')
    train_data1 = train_.copy()
    train_x = train_data1.iloc[train_idx]
    val_x = train_data1.iloc[val_idx]
    
    train_x =TabularDataset(train_x)
    val_x =TabularDataset(val_x)
    
    predictor = TabularPredictor(
        label=label, eval_metric=eval_metric
    ).fit(train_x,val_x)  
    pre = predictor.predict(test_data)
    pres += (pre/15)
    


No path specified. Models will be saved in: "AutogluonModels\ag-20230829_115445\"
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels\ag-20230829_115445\"
AutoGluon Version:  0.8.2
Python Version:     3.8.8
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.22621
Disk Space Avail:   21.13 GB / 237.11 GB (8.9%)
Train Data Rows:    3264
Train Data Columns: 7
Tuning Data Rows:    234
Tuning Data Columns: 7
Label Column: MLM
Preprocessing data ...
AutoGluon infers your prediction problem is: 'regression' (because dtype of label-column == float and many unique label-values observed).
	Label info (max, min, mean, stddev): (131.72, 0.0, 37.22306, 35.62323)
	If 'regression' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureG



	0.01s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: LightGBMXT ...
	-33.1902	 = Validation score   (-root_mean_squared_error)
	0.42s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: LightGBM ...
	-33.8397	 = Validation score   (-root_mean_squared_error)
	0.35s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-34.1789	 = Validation score   (-root_mean_squared_error)
	1.55s	 = Training   runtime
	0.06s	 = Validation runtime
Fitting model: CatBoost ...
	-32.9673	 = Validation score   (-root_mean_squared_error)
	1.78s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-34.2625	 = Validation score   (-root_mean_squared_error)
	0.62s	 = Training   runtime
	0.1s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
No improvement since epoch 2: early stopping
	-32.8829	 = Validation score   (-root_mean_squared_error)
	2.96s	 = Training   runtime
	0.01s	 = Validation runtime
Fitting mo



	0.01s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: LightGBMXT ...
	-30.8522	 = Validation score   (-root_mean_squared_error)
	0.4s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: LightGBM ...
	-30.9446	 = Validation score   (-root_mean_squared_error)
	0.37s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-31.8744	 = Validation score   (-root_mean_squared_error)
	1.64s	 = Training   runtime
	0.09s	 = Validation runtime
Fitting model: CatBoost ...
	-30.7525	 = Validation score   (-root_mean_squared_error)
	1.83s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-32.2266	 = Validation score   (-root_mean_squared_error)
	0.7s	 = Training   runtime
	0.1s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
	-29.9652	 = Validation score   (-root_mean_squared_error)
	3.64s	 = Training   runtime
	0.01s	 = Validation runtime
Fitting model: XGBoost ...
	-31.3205	 = Validation score 



	-37.6016	 = Validation score   (-root_mean_squared_error)
	0.01s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: LightGBMXT ...
	-33.1525	 = Validation score   (-root_mean_squared_error)
	0.41s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: LightGBM ...
	-33.3983	 = Validation score   (-root_mean_squared_error)
	0.32s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-34.4115	 = Validation score   (-root_mean_squared_error)
	1.83s	 = Training   runtime
	0.05s	 = Validation runtime
Fitting model: CatBoost ...
	-33.0377	 = Validation score   (-root_mean_squared_error)
	1.5s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-33.9604	 = Validation score   (-root_mean_squared_error)
	0.69s	 = Training   runtime
	0.08s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
	-32.8651	 = Validation score   (-root_mean_squared_error)
	3.48s	 = Training   runtime
	0.01s	 = Validation runt



	0.01s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: LightGBMXT ...


[1000]	valid_set's rmse: 31.3118


	-31.2767	 = Validation score   (-root_mean_squared_error)
	0.79s	 = Training   runtime
	0.01s	 = Validation runtime
Fitting model: LightGBM ...
	-31.9044	 = Validation score   (-root_mean_squared_error)
	0.38s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-32.347	 = Validation score   (-root_mean_squared_error)
	1.82s	 = Training   runtime
	0.09s	 = Validation runtime
Fitting model: CatBoost ...
	-31.5753	 = Validation score   (-root_mean_squared_error)
	2.49s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-32.5915	 = Validation score   (-root_mean_squared_error)
	0.68s	 = Training   runtime
	0.06s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
	-31.442	 = Validation score   (-root_mean_squared_error)
	3.52s	 = Training   runtime
	0.01s	 = Validation runtime
Fitting model: XGBoost ...
	-32.3181	 = Validation score   (-root_mean_squared_error)
	0.49s	 = Training   runtime
	0.0s	 = Validation runtime




	0.01s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: LightGBMXT ...
	-32.303	 = Validation score   (-root_mean_squared_error)
	0.53s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: LightGBM ...
	-32.6875	 = Validation score   (-root_mean_squared_error)
	0.32s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-33.8582	 = Validation score   (-root_mean_squared_error)
	1.88s	 = Training   runtime
	0.08s	 = Validation runtime
Fitting model: CatBoost ...
	-32.2321	 = Validation score   (-root_mean_squared_error)
	1.32s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-33.6171	 = Validation score   (-root_mean_squared_error)
	0.75s	 = Training   runtime
	0.06s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
	-31.8149	 = Validation score   (-root_mean_squared_error)
	3.58s	 = Training   runtime
	0.01s	 = Validation runtime
Fitting model: XGBoost ...
	-33.1759	 = Validation scor



	-36.2806	 = Validation score   (-root_mean_squared_error)
	0.01s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: LightGBMXT ...
	-32.7742	 = Validation score   (-root_mean_squared_error)
	0.37s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: LightGBM ...
	-32.9949	 = Validation score   (-root_mean_squared_error)
	0.32s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-33.6216	 = Validation score   (-root_mean_squared_error)
	1.78s	 = Training   runtime
	0.08s	 = Validation runtime
Fitting model: CatBoost ...
	-32.7823	 = Validation score   (-root_mean_squared_error)
	1.07s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-33.7806	 = Validation score   (-root_mean_squared_error)
	0.71s	 = Training   runtime
	0.08s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
	-32.8497	 = Validation score   (-root_mean_squared_error)
	3.45s	 = Training   runtime
	0.01s	 = Validation run



	0.01s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: KNeighborsDist ...
	-33.9728	 = Validation score   (-root_mean_squared_error)
	0.01s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: LightGBMXT ...
	-29.8207	 = Validation score   (-root_mean_squared_error)
	0.36s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: LightGBM ...
	-30.6757	 = Validation score   (-root_mean_squared_error)
	0.35s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-30.8913	 = Validation score   (-root_mean_squared_error)
	1.8s	 = Training   runtime
	0.13s	 = Validation runtime
Fitting model: CatBoost ...
	-29.7228	 = Validation score   (-root_mean_squared_error)
	0.97s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-30.601	 = Validation score   (-root_mean_squared_error)
	0.68s	 = Training   runtime
	0.06s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
No improvement since e



	0.01s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: LightGBMXT ...
	-30.6362	 = Validation score   (-root_mean_squared_error)
	0.37s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: LightGBM ...
	-30.7236	 = Validation score   (-root_mean_squared_error)
	0.42s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-31.718	 = Validation score   (-root_mean_squared_error)
	1.85s	 = Training   runtime
	0.1s	 = Validation runtime
Fitting model: CatBoost ...
	-30.4368	 = Validation score   (-root_mean_squared_error)
	2.57s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-30.9532	 = Validation score   (-root_mean_squared_error)
	0.81s	 = Training   runtime
	0.06s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
	-30.1434	 = Validation score   (-root_mean_squared_error)
	3.79s	 = Training   runtime
	0.01s	 = Validation runtime
Fitting model: XGBoost ...
	-30.9614	 = Validation score



	0.01s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: LightGBMXT ...
	-31.2692	 = Validation score   (-root_mean_squared_error)
	0.41s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: LightGBM ...
	-31.1274	 = Validation score   (-root_mean_squared_error)
	0.35s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-32.1882	 = Validation score   (-root_mean_squared_error)
	1.77s	 = Training   runtime
	0.09s	 = Validation runtime
Fitting model: CatBoost ...
	-31.1423	 = Validation score   (-root_mean_squared_error)
	1.4s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-32.2516	 = Validation score   (-root_mean_squared_error)
	0.72s	 = Training   runtime
	0.06s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
	-30.6658	 = Validation score   (-root_mean_squared_error)
	3.44s	 = Training   runtime
	0.01s	 = Validation runtime
Fitting model: XGBoost ...
	-31.2798	 = Validation scor



	0.01s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: LightGBMXT ...
	-31.314	 = Validation score   (-root_mean_squared_error)
	0.52s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: LightGBM ...
	-31.4155	 = Validation score   (-root_mean_squared_error)
	0.39s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-32.7676	 = Validation score   (-root_mean_squared_error)
	1.79s	 = Training   runtime
	0.05s	 = Validation runtime
Fitting model: CatBoost ...
	-31.5042	 = Validation score   (-root_mean_squared_error)
	1.37s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-32.1472	 = Validation score   (-root_mean_squared_error)
	0.72s	 = Training   runtime
	0.08s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
	-30.3502	 = Validation score   (-root_mean_squared_error)
	3.45s	 = Training   runtime
	0.01s	 = Validation runtime
Fitting model: XGBoost ...
	-31.7934	 = Validation scor



	0.01s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: LightGBMXT ...
	-30.3603	 = Validation score   (-root_mean_squared_error)
	0.34s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: LightGBM ...
	-30.9076	 = Validation score   (-root_mean_squared_error)
	0.33s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-31.0748	 = Validation score   (-root_mean_squared_error)
	1.82s	 = Training   runtime
	0.08s	 = Validation runtime
Fitting model: CatBoost ...
	-30.4001	 = Validation score   (-root_mean_squared_error)
	1.23s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-31.1829	 = Validation score   (-root_mean_squared_error)
	0.7s	 = Training   runtime
	0.06s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
No improvement since epoch 9: early stopping
	-29.8061	 = Validation score   (-root_mean_squared_error)
	3.4s	 = Training   runtime
	0.01s	 = Validation runtime
Fitting mod



	-38.0976	 = Validation score   (-root_mean_squared_error)
	0.01s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: KNeighborsDist ...
	-37.8375	 = Validation score   (-root_mean_squared_error)
	0.02s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: LightGBMXT ...
	-33.6276	 = Validation score   (-root_mean_squared_error)
	0.39s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: LightGBM ...
	-33.5385	 = Validation score   (-root_mean_squared_error)
	0.34s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-33.8281	 = Validation score   (-root_mean_squared_error)
	1.78s	 = Training   runtime
	0.06s	 = Validation runtime
Fitting model: CatBoost ...
	-33.4842	 = Validation score   (-root_mean_squared_error)
	1.18s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-34.0376	 = Validation score   (-root_mean_squared_error)
	0.68s	 = Training   runtime
	0.06s	 = Validation runt



	0.01s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: LightGBMXT ...
	-29.8051	 = Validation score   (-root_mean_squared_error)
	0.33s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: LightGBM ...
	-30.5453	 = Validation score   (-root_mean_squared_error)
	0.36s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-31.6999	 = Validation score   (-root_mean_squared_error)
	1.8s	 = Training   runtime
	0.1s	 = Validation runtime
Fitting model: CatBoost ...
	-29.7425	 = Validation score   (-root_mean_squared_error)
	1.11s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-31.4566	 = Validation score   (-root_mean_squared_error)
	0.75s	 = Training   runtime
	0.08s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
	-29.3675	 = Validation score   (-root_mean_squared_error)
	3.56s	 = Training   runtime
	0.01s	 = Validation runtime
Fitting model: XGBoost ...
	-30.7944	 = Validation score



	0.01s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: LightGBMXT ...
	-31.1767	 = Validation score   (-root_mean_squared_error)
	0.39s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: LightGBM ...
	-31.5999	 = Validation score   (-root_mean_squared_error)
	0.35s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-33.1087	 = Validation score   (-root_mean_squared_error)
	1.78s	 = Training   runtime
	0.09s	 = Validation runtime
Fitting model: CatBoost ...
	-30.9387	 = Validation score   (-root_mean_squared_error)
	1.18s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-33.0157	 = Validation score   (-root_mean_squared_error)
	0.74s	 = Training   runtime
	0.06s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
No improvement since epoch 7: early stopping
	-31.0521	 = Validation score   (-root_mean_squared_error)
	3.38s	 = Training   runtime
	0.01s	 = Validation runtime
Fitting m



	-36.0989	 = Validation score   (-root_mean_squared_error)
	0.01s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: LightGBMXT ...
	-31.5526	 = Validation score   (-root_mean_squared_error)
	0.46s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: LightGBM ...
	-31.3366	 = Validation score   (-root_mean_squared_error)
	0.32s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-31.8865	 = Validation score   (-root_mean_squared_error)
	1.78s	 = Training   runtime
	0.1s	 = Validation runtime
Fitting model: CatBoost ...
	-31.4265	 = Validation score   (-root_mean_squared_error)
	1.64s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-32.1019	 = Validation score   (-root_mean_squared_error)
	0.74s	 = Training   runtime
	0.11s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
	-30.8254	 = Validation score   (-root_mean_squared_error)
	3.74s	 = Training   runtime
	0.01s	 = Validation runt

In [113]:
label = 'HLM'
eval_metric = 'root_mean_squared_error'

In [114]:
from sklearn.model_selection import train_test_split, KFold

test_data = TabularDataset(test)
kfold = KFold(n_splits=15, shuffle=True)
pres2 = np.zeros(len(test))
for fold, (train_idx, val_idx) in enumerate(kfold.split(train_2)):
    print(f'=========={fold+1}fold/15 folds==========')
    train_data2 = train_2.copy()
    train_x = train_data2.iloc[train_idx]
    val_x = train_data2.iloc[val_idx]
    
    train_x =TabularDataset(train_x)
    val_x =TabularDataset(val_x)
    
    predictor = TabularPredictor(
        label=label, eval_metric=eval_metric
    ).fit(train_x,val_x)  
    pre2 = predictor.predict(test_data)
    pres2 += (pre2/15)

No path specified. Models will be saved in: "AutogluonModels\ag-20230829_115912\"
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels\ag-20230829_115912\"
AutoGluon Version:  0.8.2
Python Version:     3.8.8
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.22621
Disk Space Avail:   18.71 GB / 237.11 GB (7.9%)
Train Data Rows:    3264
Train Data Columns: 7
Tuning Data Rows:    234
Tuning Data Columns: 7
Label Column: HLM
Preprocessing data ...
AutoGluon infers your prediction problem is: 'regression' (because dtype of label-column == float and many unique label-values observed).
	Label info (max, min, mean, stddev): (135.336, 0.0, 53.04057, 36.06449)
	If 'regression' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeature



	-37.8938	 = Validation score   (-root_mean_squared_error)
	0.01s	 = Training   runtime
	0.02s	 = Validation runtime
Fitting model: LightGBMXT ...
	-32.5529	 = Validation score   (-root_mean_squared_error)
	0.47s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: LightGBM ...
	-32.1836	 = Validation score   (-root_mean_squared_error)
	0.38s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-32.776	 = Validation score   (-root_mean_squared_error)
	1.82s	 = Training   runtime
	0.11s	 = Validation runtime
Fitting model: CatBoost ...
	-32.399	 = Validation score   (-root_mean_squared_error)
	2.57s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-32.5257	 = Validation score   (-root_mean_squared_error)
	0.7s	 = Training   runtime
	0.08s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
	-32.6599	 = Validation score   (-root_mean_squared_error)
	3.54s	 = Training   runtime
	0.01s	 = Validation runtim



	0.01s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: LightGBMXT ...
	-32.6446	 = Validation score   (-root_mean_squared_error)
	0.38s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: LightGBM ...
	-32.8547	 = Validation score   (-root_mean_squared_error)
	0.37s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-33.9693	 = Validation score   (-root_mean_squared_error)
	1.75s	 = Training   runtime
	0.06s	 = Validation runtime
Fitting model: CatBoost ...
	-32.6539	 = Validation score   (-root_mean_squared_error)
	1.21s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-33.7746	 = Validation score   (-root_mean_squared_error)
	0.73s	 = Training   runtime
	0.06s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
	-32.6782	 = Validation score   (-root_mean_squared_error)
	3.57s	 = Training   runtime
	0.01s	 = Validation runtime
Fitting model: XGBoost ...
	-32.4845	 = Validation sco



	-36.2763	 = Validation score   (-root_mean_squared_error)
	0.01s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: LightGBMXT ...
	-31.4437	 = Validation score   (-root_mean_squared_error)
	0.49s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: LightGBM ...
	-31.4611	 = Validation score   (-root_mean_squared_error)
	0.37s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-31.8458	 = Validation score   (-root_mean_squared_error)
	1.78s	 = Training   runtime
	0.06s	 = Validation runtime
Fitting model: CatBoost ...
	-31.0421	 = Validation score   (-root_mean_squared_error)
	1.8s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-32.3428	 = Validation score   (-root_mean_squared_error)
	0.69s	 = Training   runtime
	0.1s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
No improvement since epoch 3: early stopping
	-31.4903	 = Validation score   (-root_mean_squared_error)
	2.8s	 = T



	-35.1208	 = Validation score   (-root_mean_squared_error)
	0.01s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: LightGBMXT ...
	-32.1057	 = Validation score   (-root_mean_squared_error)
	0.36s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: LightGBM ...
	-32.1407	 = Validation score   (-root_mean_squared_error)
	0.35s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-33.1411	 = Validation score   (-root_mean_squared_error)
	1.82s	 = Training   runtime
	0.08s	 = Validation runtime
Fitting model: CatBoost ...
	-31.9317	 = Validation score   (-root_mean_squared_error)
	1.67s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-32.8525	 = Validation score   (-root_mean_squared_error)
	0.71s	 = Training   runtime
	0.06s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
No improvement since epoch 8: early stopping
	-31.2674	 = Validation score   (-root_mean_squared_error)
	3.25s	 



	0.01s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: LightGBMXT ...
	-33.5544	 = Validation score   (-root_mean_squared_error)
	0.41s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: LightGBM ...
	-33.9972	 = Validation score   (-root_mean_squared_error)
	0.34s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-35.5683	 = Validation score   (-root_mean_squared_error)
	1.77s	 = Training   runtime
	0.09s	 = Validation runtime
Fitting model: CatBoost ...
	-33.4751	 = Validation score   (-root_mean_squared_error)
	1.09s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-35.0762	 = Validation score   (-root_mean_squared_error)
	0.73s	 = Training   runtime
	0.06s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
No improvement since epoch 8: early stopping
	-32.8437	 = Validation score   (-root_mean_squared_error)
	3.43s	 = Training   runtime
	0.01s	 = Validation runtime
Fitting m



	0.01s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: LightGBMXT ...
	-33.0901	 = Validation score   (-root_mean_squared_error)
	0.35s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: LightGBM ...
	-33.4402	 = Validation score   (-root_mean_squared_error)
	0.37s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-34.2637	 = Validation score   (-root_mean_squared_error)
	1.83s	 = Training   runtime
	0.11s	 = Validation runtime
Fitting model: CatBoost ...
	-32.8487	 = Validation score   (-root_mean_squared_error)
	1.0s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-34.3077	 = Validation score   (-root_mean_squared_error)
	0.72s	 = Training   runtime
	0.06s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
	-33.1151	 = Validation score   (-root_mean_squared_error)
	3.5s	 = Training   runtime
	0.01s	 = Validation runtime
Fitting model: XGBoost ...
	-33.8166	 = Validation score



	-36.2122	 = Validation score   (-root_mean_squared_error)
	0.01s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: LightGBMXT ...
	-32.2899	 = Validation score   (-root_mean_squared_error)
	0.32s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: LightGBM ...
	-32.5807	 = Validation score   (-root_mean_squared_error)
	0.34s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-32.7623	 = Validation score   (-root_mean_squared_error)
	1.78s	 = Training   runtime
	0.09s	 = Validation runtime
Fitting model: CatBoost ...
	-32.3242	 = Validation score   (-root_mean_squared_error)
	1.04s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-33.0045	 = Validation score   (-root_mean_squared_error)
	0.7s	 = Training   runtime
	0.06s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
	-32.591	 = Validation score   (-root_mean_squared_error)
	3.56s	 = Training   runtime
	0.01s	 = Validation runti



	-36.4793	 = Validation score   (-root_mean_squared_error)
	0.02s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: LightGBMXT ...
	-32.5019	 = Validation score   (-root_mean_squared_error)
	0.41s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: LightGBM ...
	-32.8894	 = Validation score   (-root_mean_squared_error)
	0.4s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-33.2912	 = Validation score   (-root_mean_squared_error)
	1.81s	 = Training   runtime
	0.07s	 = Validation runtime
Fitting model: CatBoost ...
	-32.085	 = Validation score   (-root_mean_squared_error)
	1.41s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-33.147	 = Validation score   (-root_mean_squared_error)
	0.74s	 = Training   runtime
	0.06s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
	-32.2407	 = Validation score   (-root_mean_squared_error)
	3.5s	 = Training   runtime
	0.01s	 = Validation runtime



	0.01s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: LightGBMXT ...
	-31.0528	 = Validation score   (-root_mean_squared_error)
	0.35s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: LightGBM ...
	-31.158	 = Validation score   (-root_mean_squared_error)
	0.39s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-32.1388	 = Validation score   (-root_mean_squared_error)
	1.83s	 = Training   runtime
	0.09s	 = Validation runtime
Fitting model: CatBoost ...
	-30.8474	 = Validation score   (-root_mean_squared_error)
	1.38s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-32.8825	 = Validation score   (-root_mean_squared_error)
	0.78s	 = Training   runtime
	0.06s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
No improvement since epoch 7: early stopping
	-30.8544	 = Validation score   (-root_mean_squared_error)
	3.21s	 = Training   runtime
	0.01s	 = Validation runtime
Fitting mo



	0.01s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: LightGBMXT ...
	-31.0518	 = Validation score   (-root_mean_squared_error)
	0.39s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: LightGBM ...
	-31.1043	 = Validation score   (-root_mean_squared_error)
	0.32s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-31.7321	 = Validation score   (-root_mean_squared_error)
	1.85s	 = Training   runtime
	0.12s	 = Validation runtime
Fitting model: CatBoost ...
	-30.9559	 = Validation score   (-root_mean_squared_error)
	1.35s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-32.049	 = Validation score   (-root_mean_squared_error)
	0.72s	 = Training   runtime
	0.07s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
No improvement since epoch 7: early stopping
	-30.7413	 = Validation score   (-root_mean_squared_error)
	3.16s	 = Training   runtime
	0.01s	 = Validation runtime
Fitting mo



	-30.2382	 = Validation score   (-root_mean_squared_error)
	0.4s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: LightGBM ...
	-30.8841	 = Validation score   (-root_mean_squared_error)
	0.29s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-30.7012	 = Validation score   (-root_mean_squared_error)
	1.84s	 = Training   runtime
	0.06s	 = Validation runtime
Fitting model: CatBoost ...
	-30.3811	 = Validation score   (-root_mean_squared_error)
	1.51s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-30.5883	 = Validation score   (-root_mean_squared_error)
	0.67s	 = Training   runtime
	0.1s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
No improvement since epoch 8: early stopping
	-30.1828	 = Validation score   (-root_mean_squared_error)
	3.26s	 = Training   runtime
	0.01s	 = Validation runtime
Fitting model: XGBoost ...
	-30.5253	 = Validation score   (-root_mean_squared_error)
	0.38s	 = Tra



	-37.8614	 = Validation score   (-root_mean_squared_error)
	0.01s	 = Training   runtime
	0.04s	 = Validation runtime
Fitting model: KNeighborsDist ...
	-37.6509	 = Validation score   (-root_mean_squared_error)
	0.02s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: LightGBMXT ...
	-32.777	 = Validation score   (-root_mean_squared_error)
	0.56s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: LightGBM ...
	-32.912	 = Validation score   (-root_mean_squared_error)
	0.34s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-32.521	 = Validation score   (-root_mean_squared_error)
	1.83s	 = Training   runtime
	0.06s	 = Validation runtime
Fitting model: CatBoost ...
	-32.6729	 = Validation score   (-root_mean_squared_error)
	0.76s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-32.331	 = Validation score   (-root_mean_squared_error)
	0.75s	 = Training   runtime
	0.1s	 = Validation runtime
F



	-35.3425	 = Validation score   (-root_mean_squared_error)
	0.01s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: KNeighborsDist ...
	-35.4477	 = Validation score   (-root_mean_squared_error)
	0.02s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: LightGBMXT ...
	-32.6924	 = Validation score   (-root_mean_squared_error)
	0.3s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: LightGBM ...
	-32.9933	 = Validation score   (-root_mean_squared_error)
	0.27s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-34.1337	 = Validation score   (-root_mean_squared_error)
	1.81s	 = Training   runtime
	0.1s	 = Validation runtime
Fitting model: CatBoost ...
	-32.7042	 = Validation score   (-root_mean_squared_error)
	1.52s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-34.119	 = Validation score   (-root_mean_squared_error)
	0.74s	 = Training   runtime
	0.1s	 = Validation runtime




	0.01s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: LightGBMXT ...
	-32.465	 = Validation score   (-root_mean_squared_error)
	0.66s	 = Training   runtime
	0.01s	 = Validation runtime
Fitting model: LightGBM ...
	-32.4481	 = Validation score   (-root_mean_squared_error)
	0.34s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-32.7066	 = Validation score   (-root_mean_squared_error)
	1.87s	 = Training   runtime
	0.1s	 = Validation runtime
Fitting model: CatBoost ...
	-32.3768	 = Validation score   (-root_mean_squared_error)
	1.87s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-33.2641	 = Validation score   (-root_mean_squared_error)
	0.7s	 = Training   runtime
	0.06s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
	-32.9661	 = Validation score   (-root_mean_squared_error)
	3.48s	 = Training   runtime
	0.01s	 = Validation runtime
Fitting model: XGBoost ...
	-33.1084	 = Validation score



	0.01s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: KNeighborsDist ...
	-37.859	 = Validation score   (-root_mean_squared_error)
	0.01s	 = Training   runtime
	0.04s	 = Validation runtime
Fitting model: LightGBMXT ...
	-33.5317	 = Validation score   (-root_mean_squared_error)
	0.37s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: LightGBM ...
	-33.6986	 = Validation score   (-root_mean_squared_error)
	0.34s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-34.9812	 = Validation score   (-root_mean_squared_error)
	1.83s	 = Training   runtime
	0.1s	 = Validation runtime
Fitting model: CatBoost ...
	-33.4797	 = Validation score   (-root_mean_squared_error)
	1.51s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-34.9504	 = Validation score   (-root_mean_squared_error)
	0.75s	 = Training   runtime
	0.08s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
	-33.6317	 = Validatio

In [115]:
sample['MLM']=pres
sample['HLM']=pres2
sample.to_csv("C:/Users/user/Desktop/데이콘 신약대회/open/15fold.csv",index=False)
sample

Unnamed: 0,id,MLM,HLM
0,TEST_000,26.040536,46.416742
1,TEST_001,64.226667,78.869173
2,TEST_002,30.357535,54.715568
3,TEST_003,46.323999,65.524218
4,TEST_004,60.384765,76.493461
...,...,...,...
478,TEST_478,5.697068,24.946282
479,TEST_479,80.455739,86.040176
480,TEST_480,45.865274,69.261222
481,TEST_481,67.379379,79.656795
