In [2]:
from pycaret.regression import *
import pandas as pd

# 1. 데이터 불러오기
train_data = pd.read_csv('train_mol2vec.csv')
test_data = pd.read_csv('test_mol2vec.csv')

In [None]:
# 2. HLM을 위한 환경 설정
setup_data_HLM = setup(data = train_data, 
                       target = 'HLM', 
                       # text_features = ['SMILES'], 
                       ignore_features = ['id', 'MLM'],
                       session_id = 123,
                       pca = True, pca_components = 30,
                       normalize = True,
                       feature_selection = True,
                       use_gpu = True,
                       verbose = True
                      )

In [None]:
# 3. 여러 모델 학습 및 비교
best_HLM = compare_models(n_select=3, sort='RMSE')

In [None]:
tuned_HLM = [tune_model(model, optimize='RMSE', n_iter=100) for model in best_HLM]

In [None]:
# 4. 모델 블렌딩
blended_HLM = blend_models(estimator_list = best_HLM)

In [None]:
# 5. test 데이터를 사용하여 HLM 값 예측
predictions_HLM = predict_model(blended_HLM, data = test_data)

In [None]:
# 6. 예측된 HLM 값을 feature로 추가
train_data['HLM_predicted'] = predict_model(blended_HLM, data=train_data)['prediction_label']
test_data['HLM'] = predictions_HLM['prediction_label']

In [None]:
# 7. MLM을 위한 환경 설정
setup_data_MLM = setup(data = train_data, 
                       target = 'MLM', 
                       text_features = ['SMILES'], 
                       ignore_features = ['id'],
                       session_id = 123,
                       # use_gpu = True,
                       pca = True, pca_components = 30,
                       normalize = True,
                       feature_selection = True,
                       verbose = True
                      )

In [None]:
# 8. 여러 모델 학습 및 비교
best_MLM = compare_models(n_select=3)

In [None]:
tuned_MLM = [tune_model(model, optimize='RMSE', n_iter=50) for model in best_MLM]

In [None]:
# 9. 모델 블렌딩
blended_MLM = blend_models(estimator_list = best_MLM)

In [None]:
test_data['HLM'] = predictions_HLM['prediction_label']

In [None]:
# 10. test 데이터를 사용하여 MLM 값 예측
predictions_MLM = predict_model(blended_MLM, data = test_data)

In [None]:
# 11. 최종 예측값을 CSV로 저장
submission = test_data[['id']]
submission['MLM'] = predictions_MLM['prediction_label']
submission['HLM'] = predictions_HLM['prediction_label']
submission.to_csv('submission_mol2vec.csv', index=False)