In [1]:
from rdkit import Chem
from rdkit.Chem import AllChem, Draw, DataStructs
import pandas as pd
from pathlib import Path
import numpy as np
from xgboost import XGBClassifier, plot_importance
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler
from sklearn.model_selection import StratifiedKFold, cross_validate, train_test_split, cross_val_score
from sklearn.metrics import f1_score, accuracy_score
from tqdm import tqdm
import matplotlib.pyplot as plt
from hyperopt import fmin, STATUS_OK, tpe, Trials, hp, space_eval

In [14]:
base_path = Path().cwd().joinpath('content/datasets/')
train_df = pd.read_csv(base_path.joinpath('train_molecular_data.csv'))
test_df = pd.read_csv(base_path.joinpath('test_molecular_data.csv'))

In [16]:
# Removing Silicon based elemets
train_df.drop(index=[10135, 26306, 42332, 47225, 62942, 72002], axis=0, inplace=True)
train_df.reset_index(drop=True, inplace=True)

In [4]:
train_y = train_df.loc[:, 'Expected']
train_X = train_df.drop(['Expected'], axis=1)

In [5]:
skf = StratifiedKFold(random_state=10, n_splits=5, shuffle=True)

In [6]:
submission_path = Path().cwd().joinpath('content/submissions/')

In [17]:
lab = LabelEncoder()
train_yT = lab.fit_transform(train_y)
X_train, X_val, y_train, y_val = train_test_split(train_X, train_yT, test_size=0.25, random_state=10, stratify=train_yT)

In [33]:
standard_params = {
    'n_jobs':4,
    'tree_method': 'hist',
    'booster': 'gbtree',
    'max_bin': 256
}

In [19]:
def objective(space):
    xgb_cl = XGBClassifier(**space, **standard_params, eval_metric="error", early_stopping_rounds=10)
    
    fit_params={'verbose': False, 'eval_set': [[X_val, y_val]]}
    
    score = cross_val_score(estimator=xgb_cl, X=X_train, y=y_train, cv=skf, fit_params=fit_params, scoring='f1').mean()

    return {'loss': -score, 'status':STATUS_OK}

In [20]:
hyper_space = {
    'n_estimators': hp.randint('n_estimators', 100, 1000),
    'learning_rate': hp.uniform('learning_rate', 0.01, 0.3),
    'max_depth': hp.randint('max_depth', 3, 18),
    'subsample': hp.uniform('subsample', 0.5, 1),
    'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1),
    'gamma': hp.uniform ('gamma', 1,9),
    'reg_alpha' : hp.quniform('reg_alpha', 40,180,1),
    'reg_lambda' : hp.uniform('reg_lambda', 0,1),
    'min_child_weight' : hp.quniform('min_child_weight', 0, 10, 1)
}

In [21]:
trials = Trials()
best_params = fmin(fn=objective, space=hyper_space, algo=tpe.suggest, max_evals=100, trials=trials)

100%|██████| 100/100 [07:15<00:00,  4.35s/trial, best loss: -0.9363275303177486]


In [22]:
best_params

{'colsample_bytree': 0.998188934290663,
 'gamma': 1.3894568608014124,
 'learning_rate': 0.2774913094891554,
 'max_depth': 17,
 'min_child_weight': 6.0,
 'n_estimators': 869,
 'reg_alpha': 43.0,
 'reg_lambda': 0.8102486358276241,
 'subsample': 0.9202999279561747}

In [24]:
xgb1 = XGBClassifier(**best_params, **standard_params)
scores = cross_validate(estimator=xgb1, X=train_X, y=train_yT, cv=skf, scoring='f1', n_jobs=4)
print(np.mean(scores['test_score']))

0.9393784167215191


In [25]:
def pred_and_sub(classifier, sub_no):
    classifier.fit(X=train_X, y=train_yT)
    pred_vals = classifier.predict(test_df)
    print(pred_vals)
    new_pred = lab.inverse_transform(pred_vals)
    print(new_pred)
    final_df = pd.read_csv(base_path.joinpath('test_II.csv'))
    final_df['Predicted'] = new_pred
    final_df.columns = ['Id', 'Predicted']
    final_df.to_csv(submission_path.joinpath(f'submission{sub_no}.csv'), index=False)

In [26]:
pred_and_sub(xgb1, 38)

[1 1 1 ... 1 1 1]
[2 2 2 ... 2 2 2]


In [27]:
hyper_space_2 = {
    'n_estimators': hp.randint('n_estimators', 100, 1000),
    'learning_rate': hp.uniform('learning_rate', 0.01, 0.3),
    'max_depth': hp.randint('max_depth', 3, 18),
    'subsample': hp.uniform('subsample', 0.5, 1),
    'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1),
    'gamma': hp.uniform ('gamma', 1,9),
    'min_child_weight' : hp.quniform('min_child_weight', 0, 10, 1)
}
trials = Trials()
best_params = fmin(fn=objective, space=hyper_space_2, algo=tpe.suggest, max_evals=100, trials=trials)

100%|██████| 100/100 [19:54<00:00, 11.95s/trial, best loss: -0.9468337094649553]


In [28]:
best_params

{'colsample_bytree': 0.7750734841491662,
 'gamma': 2.2184283162604066,
 'learning_rate': 0.060082928981384884,
 'max_depth': 15,
 'min_child_weight': 1.0,
 'n_estimators': 332,
 'subsample': 0.8190652140923665}

In [32]:
xgb2 = XGBClassifier(**best_params, **standard_params)
scores = cross_validate(estimator=xgb2, X=train_X, y=train_yT, cv=skf, scoring='f1', n_jobs=4)
print(np.mean(scores['test_score']))

ValueError: 
All the 5 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/rahuldev/miniforge3/envs/toxic/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/rahuldev/miniforge3/envs/toxic/lib/python3.10/site-packages/xgboost/core.py", line 620, in inner_f
    return func(**kwargs)
  File "/Users/rahuldev/miniforge3/envs/toxic/lib/python3.10/site-packages/xgboost/sklearn.py", line 1490, in fit
    self._Booster = train(
  File "/Users/rahuldev/miniforge3/envs/toxic/lib/python3.10/site-packages/xgboost/core.py", line 620, in inner_f
    return func(**kwargs)
  File "/Users/rahuldev/miniforge3/envs/toxic/lib/python3.10/site-packages/xgboost/training.py", line 185, in train
    bst.update(dtrain, i, obj)
  File "/Users/rahuldev/miniforge3/envs/toxic/lib/python3.10/site-packages/xgboost/core.py", line 1918, in update
    _check_call(_LIB.XGBoosterUpdateOneIter(self.handle,
  File "/Users/rahuldev/miniforge3/envs/toxic/lib/python3.10/site-packages/xgboost/core.py", line 279, in _check_call
    raise XGBoostError(py_str(_LIB.XGBGetLastError()))
xgboost.core.XGBoostError: [11:22:01] /Users/runner/miniforge3/conda-bld/xgboost-split_1679035139432/work/src/gbm/../common/common.h:239: XGBoost version not compiled with GPU support.
Stack trace:
  [bt] (0) 1   libxgboost.dylib                    0x000000012a3ee9d8 dmlc::LogMessageFatal::~LogMessageFatal() + 124
  [bt] (1) 2   libxgboost.dylib                    0x000000012a4ddbb0 xgboost::gbm::GBTree::ConfigureUpdaters() + 528
  [bt] (2) 3   libxgboost.dylib                    0x000000012a4d8d40 xgboost::gbm::GBTree::Configure(std::__1::vector<std::__1::pair<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >, std::__1::allocator<std::__1::pair<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > > > const&) + 912
  [bt] (3) 4   libxgboost.dylib                    0x000000012a4f882c xgboost::LearnerConfiguration::Configure() + 1040
  [bt] (4) 5   libxgboost.dylib                    0x000000012a4f8ab0 xgboost::LearnerImpl::UpdateOneIter(int, std::__1::shared_ptr<xgboost::DMatrix>) + 144
  [bt] (5) 6   libxgboost.dylib                    0x000000012a408744 XGBoosterUpdateOneIter + 160
  [bt] (6) 7   libffi.8.dylib                      0x0000000101b5804c ffi_call_SYSV + 76
  [bt] (7) 8   libffi.8.dylib                      0x0000000101b5574c ffi_call_int + 1208
  [bt] (8) 9   _ctypes.cpython-310-darwin.so       0x0000000101b90c58 _ctypes_callproc + 1236



--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/rahuldev/miniforge3/envs/toxic/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/rahuldev/miniforge3/envs/toxic/lib/python3.10/site-packages/xgboost/core.py", line 620, in inner_f
    return func(**kwargs)
  File "/Users/rahuldev/miniforge3/envs/toxic/lib/python3.10/site-packages/xgboost/sklearn.py", line 1490, in fit
    self._Booster = train(
  File "/Users/rahuldev/miniforge3/envs/toxic/lib/python3.10/site-packages/xgboost/core.py", line 620, in inner_f
    return func(**kwargs)
  File "/Users/rahuldev/miniforge3/envs/toxic/lib/python3.10/site-packages/xgboost/training.py", line 185, in train
    bst.update(dtrain, i, obj)
  File "/Users/rahuldev/miniforge3/envs/toxic/lib/python3.10/site-packages/xgboost/core.py", line 1918, in update
    _check_call(_LIB.XGBoosterUpdateOneIter(self.handle,
  File "/Users/rahuldev/miniforge3/envs/toxic/lib/python3.10/site-packages/xgboost/core.py", line 279, in _check_call
    raise XGBoostError(py_str(_LIB.XGBGetLastError()))
xgboost.core.XGBoostError: [11:22:01] /Users/runner/miniforge3/conda-bld/xgboost-split_1679035139432/work/src/gbm/../common/common.h:239: XGBoost version not compiled with GPU support.
Stack trace:
  [bt] (0) 1   libxgboost.dylib                    0x000000011ddc69d8 dmlc::LogMessageFatal::~LogMessageFatal() + 124
  [bt] (1) 2   libxgboost.dylib                    0x000000011deb5bb0 xgboost::gbm::GBTree::ConfigureUpdaters() + 528
  [bt] (2) 3   libxgboost.dylib                    0x000000011deb0d40 xgboost::gbm::GBTree::Configure(std::__1::vector<std::__1::pair<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >, std::__1::allocator<std::__1::pair<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > > > const&) + 912
  [bt] (3) 4   libxgboost.dylib                    0x000000011ded082c xgboost::LearnerConfiguration::Configure() + 1040
  [bt] (4) 5   libxgboost.dylib                    0x000000011ded0ab0 xgboost::LearnerImpl::UpdateOneIter(int, std::__1::shared_ptr<xgboost::DMatrix>) + 144
  [bt] (5) 6   libxgboost.dylib                    0x000000011dde0744 XGBoosterUpdateOneIter + 160
  [bt] (6) 7   libffi.8.dylib                      0x00000001061b004c ffi_call_SYSV + 76
  [bt] (7) 8   libffi.8.dylib                      0x00000001061ad74c ffi_call_int + 1208
  [bt] (8) 9   _ctypes.cpython-310-darwin.so       0x00000001061e8c58 _ctypes_callproc + 1236



--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/rahuldev/miniforge3/envs/toxic/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/rahuldev/miniforge3/envs/toxic/lib/python3.10/site-packages/xgboost/core.py", line 620, in inner_f
    return func(**kwargs)
  File "/Users/rahuldev/miniforge3/envs/toxic/lib/python3.10/site-packages/xgboost/sklearn.py", line 1490, in fit
    self._Booster = train(
  File "/Users/rahuldev/miniforge3/envs/toxic/lib/python3.10/site-packages/xgboost/core.py", line 620, in inner_f
    return func(**kwargs)
  File "/Users/rahuldev/miniforge3/envs/toxic/lib/python3.10/site-packages/xgboost/training.py", line 185, in train
    bst.update(dtrain, i, obj)
  File "/Users/rahuldev/miniforge3/envs/toxic/lib/python3.10/site-packages/xgboost/core.py", line 1918, in update
    _check_call(_LIB.XGBoosterUpdateOneIter(self.handle,
  File "/Users/rahuldev/miniforge3/envs/toxic/lib/python3.10/site-packages/xgboost/core.py", line 279, in _check_call
    raise XGBoostError(py_str(_LIB.XGBGetLastError()))
xgboost.core.XGBoostError: [11:22:01] /Users/runner/miniforge3/conda-bld/xgboost-split_1679035139432/work/src/gbm/../common/common.h:239: XGBoost version not compiled with GPU support.
Stack trace:
  [bt] (0) 1   libxgboost.dylib                    0x000000011f8ca9d8 dmlc::LogMessageFatal::~LogMessageFatal() + 124
  [bt] (1) 2   libxgboost.dylib                    0x000000011f9b9bb0 xgboost::gbm::GBTree::ConfigureUpdaters() + 528
  [bt] (2) 3   libxgboost.dylib                    0x000000011f9b4d40 xgboost::gbm::GBTree::Configure(std::__1::vector<std::__1::pair<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >, std::__1::allocator<std::__1::pair<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > > > const&) + 912
  [bt] (3) 4   libxgboost.dylib                    0x000000011f9d482c xgboost::LearnerConfiguration::Configure() + 1040
  [bt] (4) 5   libxgboost.dylib                    0x000000011f9d4ab0 xgboost::LearnerImpl::UpdateOneIter(int, std::__1::shared_ptr<xgboost::DMatrix>) + 144
  [bt] (5) 6   libxgboost.dylib                    0x000000011f8e4744 XGBoosterUpdateOneIter + 160
  [bt] (6) 7   libffi.8.dylib                      0x00000001060b404c ffi_call_SYSV + 76
  [bt] (7) 8   libffi.8.dylib                      0x00000001060b174c ffi_call_int + 1208
  [bt] (8) 9   _ctypes.cpython-310-darwin.so       0x00000001060ecc58 _ctypes_callproc + 1236



--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/rahuldev/miniforge3/envs/toxic/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/rahuldev/miniforge3/envs/toxic/lib/python3.10/site-packages/xgboost/core.py", line 620, in inner_f
    return func(**kwargs)
  File "/Users/rahuldev/miniforge3/envs/toxic/lib/python3.10/site-packages/xgboost/sklearn.py", line 1490, in fit
    self._Booster = train(
  File "/Users/rahuldev/miniforge3/envs/toxic/lib/python3.10/site-packages/xgboost/core.py", line 620, in inner_f
    return func(**kwargs)
  File "/Users/rahuldev/miniforge3/envs/toxic/lib/python3.10/site-packages/xgboost/training.py", line 185, in train
    bst.update(dtrain, i, obj)
  File "/Users/rahuldev/miniforge3/envs/toxic/lib/python3.10/site-packages/xgboost/core.py", line 1918, in update
    _check_call(_LIB.XGBoosterUpdateOneIter(self.handle,
  File "/Users/rahuldev/miniforge3/envs/toxic/lib/python3.10/site-packages/xgboost/core.py", line 279, in _check_call
    raise XGBoostError(py_str(_LIB.XGBGetLastError()))
xgboost.core.XGBoostError: [11:22:01] /Users/runner/miniforge3/conda-bld/xgboost-split_1679035139432/work/src/gbm/../common/common.h:239: XGBoost version not compiled with GPU support.
Stack trace:
  [bt] (0) 1   libxgboost.dylib                    0x000000012259e9d8 dmlc::LogMessageFatal::~LogMessageFatal() + 124
  [bt] (1) 2   libxgboost.dylib                    0x000000012268dbb0 xgboost::gbm::GBTree::ConfigureUpdaters() + 528
  [bt] (2) 3   libxgboost.dylib                    0x0000000122688d40 xgboost::gbm::GBTree::Configure(std::__1::vector<std::__1::pair<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >, std::__1::allocator<std::__1::pair<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > > > const&) + 912
  [bt] (3) 4   libxgboost.dylib                    0x00000001226a882c xgboost::LearnerConfiguration::Configure() + 1040
  [bt] (4) 5   libxgboost.dylib                    0x00000001226a8ab0 xgboost::LearnerImpl::UpdateOneIter(int, std::__1::shared_ptr<xgboost::DMatrix>) + 144
  [bt] (5) 6   libxgboost.dylib                    0x00000001225b8744 XGBoosterUpdateOneIter + 160
  [bt] (6) 7   libffi.8.dylib                      0x0000000101fd804c ffi_call_SYSV + 76
  [bt] (7) 8   libffi.8.dylib                      0x0000000101fd574c ffi_call_int + 1208
  [bt] (8) 9   _ctypes.cpython-310-darwin.so       0x0000000102010c58 _ctypes_callproc + 1236



--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/rahuldev/miniforge3/envs/toxic/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/rahuldev/miniforge3/envs/toxic/lib/python3.10/site-packages/xgboost/core.py", line 620, in inner_f
    return func(**kwargs)
  File "/Users/rahuldev/miniforge3/envs/toxic/lib/python3.10/site-packages/xgboost/sklearn.py", line 1490, in fit
    self._Booster = train(
  File "/Users/rahuldev/miniforge3/envs/toxic/lib/python3.10/site-packages/xgboost/core.py", line 620, in inner_f
    return func(**kwargs)
  File "/Users/rahuldev/miniforge3/envs/toxic/lib/python3.10/site-packages/xgboost/training.py", line 185, in train
    bst.update(dtrain, i, obj)
  File "/Users/rahuldev/miniforge3/envs/toxic/lib/python3.10/site-packages/xgboost/core.py", line 1918, in update
    _check_call(_LIB.XGBoosterUpdateOneIter(self.handle,
  File "/Users/rahuldev/miniforge3/envs/toxic/lib/python3.10/site-packages/xgboost/core.py", line 279, in _check_call
    raise XGBoostError(py_str(_LIB.XGBGetLastError()))
xgboost.core.XGBoostError: [11:22:02] /Users/runner/miniforge3/conda-bld/xgboost-split_1679035139432/work/src/gbm/../common/common.h:239: XGBoost version not compiled with GPU support.
Stack trace:
  [bt] (0) 1   libxgboost.dylib                    0x000000011f8ca9d8 dmlc::LogMessageFatal::~LogMessageFatal() + 124
  [bt] (1) 2   libxgboost.dylib                    0x000000011f9b9bb0 xgboost::gbm::GBTree::ConfigureUpdaters() + 528
  [bt] (2) 3   libxgboost.dylib                    0x000000011f9b4d40 xgboost::gbm::GBTree::Configure(std::__1::vector<std::__1::pair<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >, std::__1::allocator<std::__1::pair<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > > > const&) + 912
  [bt] (3) 4   libxgboost.dylib                    0x000000011f9d482c xgboost::LearnerConfiguration::Configure() + 1040
  [bt] (4) 5   libxgboost.dylib                    0x000000011f9d4ab0 xgboost::LearnerImpl::UpdateOneIter(int, std::__1::shared_ptr<xgboost::DMatrix>) + 144
  [bt] (5) 6   libxgboost.dylib                    0x000000011f8e4744 XGBoosterUpdateOneIter + 160
  [bt] (6) 7   libffi.8.dylib                      0x00000001060b404c ffi_call_SYSV + 76
  [bt] (7) 8   libffi.8.dylib                      0x00000001060b174c ffi_call_int + 1208
  [bt] (8) 9   _ctypes.cpython-310-darwin.so       0x00000001060ecc58 _ctypes_callproc + 1236




In [30]:
pred_and_sub(xgb2, 39)

[1 1 1 ... 1 1 1]
[2 2 2 ... 2 2 2]
