In [328]:
import numpy as np
import pandas as pd
from scipy.stats import spearmanr
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV, KFold, RandomizedSearchCV, StratifiedKFold
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
from xgboost import XGBRegressor
from sklearn.tree import DecisionTreeRegressor
import warnings 
warnings.filterwarnings("ignore")
from sklearn import ensemble 
from sklearn.svm import SVC

In [286]:
# After downloading the X_train/X_test/Y_train .csv files in your working directory:

X_train = pd.read_csv('X_train.csv')
Y_train = pd.read_csv('Y_train.csv')
X_test = pd.read_csv('X_test.csv')

In [287]:

# Feature Engineering
# Example: Creating a new feature as the product of gas and coal prices
X_train['GAS_COAL_PRODUCT'] = X_train['GAS_RET'] * X_train['COAL_RET']
X_test['GAS_COAL_PRODUCT'] = X_test['GAS_RET'] * X_test['COAL_RET']


In [288]:
# Fill nan with 0 as benchmark
X_train_clean = X_train.drop(['COUNTRY'], axis=1).fillna(0)
X_test_clean = X_test.drop(['COUNTRY'], axis=1).fillna(0)
Y_train_clean = Y_train['TARGET']

In [289]:
# Split into training and validation set
X_train_split, X_val_split, Y_train_split, Y_val_split = train_test_split(X_train_clean, Y_train_clean, test_size=0.2, random_state=42)

## xgb

In [290]:
# XGBoost
xgb_model = XGBRegressor(random_state=42)
xgb_model.fit(X_train_split, Y_train_split)             

In [291]:
# XGBoost   
output_train_xgb = xgb_model.predict(X_train_split)
output_val_xgb = xgb_model.predict(X_val_split)

correlation_train_xgb = spearmanr(output_train_xgb, Y_train_split).correlation
correlation_val_xgb = spearmanr(output_val_xgb, Y_val_split).correlation

print(f"Spearman's correlation for XGBoost on training set: {correlation_train_xgb}")
print(f"Spearman's correlation for XGBoost on validation set: {correlation_val_xgb}")


Spearman's correlation for XGBoost on training set: 0.9902764852720449
Spearman's correlation for XGBoost on validation set: 0.10634374088123724


In [292]:
# XGBoost
param_grid_xgb = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 5, 10],
    'min_child_weight': [1, 3, 5]
}

### grid search

In [293]:
# Initialize k-fold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

In [294]:
grid_search_xgb = GridSearchCV(xgb_model, param_grid_xgb, cv=kf, scoring='neg_mean_squared_error')
grid_search_xgb.fit(X_train_clean, Y_train_clean)
best_grid_xgb_model = grid_search_xgb.best_estimator_

In [295]:
# XGBoost
output_train_best_xgb = best_grid_xgb_model.predict(X_train_split)
output_val_best_xgb = best_grid_xgb_model.predict(X_val_split)

correlation_train_best_xgb = spearmanr(output_train_best_xgb, Y_train_split).correlation
correlation_val_best_xgb = spearmanr(output_val_best_xgb, Y_val_split).correlation

print(f"Spearman's correlation for tuned (grid search) XGBoost on training set: {correlation_train_best_xgb}")
print(f"Spearman's correlation for tuned (grid search) XGBoost on validation set: {correlation_val_best_xgb}")

Spearman's correlation for tuned (grid search) XGBoost on training set: 0.3861589734839395
Spearman's correlation for tuned (grid search) XGBoost on validation set: 0.376128136647876


### randomised search kf

In [296]:
folds = 3
param_comb = 5

# Initialize k-fold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)


In [297]:
random_search_xgb = RandomizedSearchCV(xgb_model, param_distributions=param_grid_xgb, n_iter=param_comb, scoring='roc_auc', n_jobs=4, cv=kf.split(X_train_clean, Y_train_clean), verbose=3, random_state=1001 )
random_search_xgb.fit(X_train_clean, Y_train_clean)
best_random_xgb_model = random_search_xgb.best_estimator_

Fitting 5 folds for each of 5 candidates, totalling 25 fits


  if is_sparse(dtype):
  if is_sparse(dtype):
  if is_sparse(dtype):
  elif is_categorical_dtype(dtype) and enable_categorical:
  elif is_categorical_dtype(dtype) and enable_categorical:
  if is_sparse(dtype):
  elif is_categorical_dtype(dtype) and enable_categorical:
  elif is_categorical_dtype(dtype) and enable_categorical:
  if is_categorical_dtype(dtype)
  if is_categorical_dtype(dtype)
  if is_categorical_dtype(dtype)
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_categorical_dtype(dtype)
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = s

[CV 3/5] END learning_rate=0.01, max_depth=10, min_child_weight=3, n_estimators=200;, score=nan total time=   1.2s
[CV 1/5] END learning_rate=0.01, max_depth=10, min_child_weight=3, n_estimators=200;, score=nan total time=   1.2s
[CV 4/5] END learning_rate=0.01, max_depth=10, min_child_weight=3, n_estimators=200;, score=nan total time=   1.2s
[CV 2/5] END learning_rate=0.01, max_depth=10, min_child_weight=3, n_estimators=200;, score=nan total time=   1.2s


Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 452, in _score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: continuous format is not supported

  if is_sparse(dtype):
  elif is_categorical_dtype(dtype) and enable_categorical:
  if is_categorical_dtype(dtype)
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):
Traceback (most recent call last

[CV 1/5] END learning_rate=0.01, max_depth=10, min_child_weight=5, n_estimators=200;, score=nan total time=   1.1s
[CV 2/5] END learning_rate=0.01, max_depth=10, min_child_weight=5, n_estimators=200;, score=nan total time=   1.2s
[CV 5/5] END learning_rate=0.01, max_depth=10, min_child_weight=3, n_estimators=200;, score=nan total time=   1.2s
[CV 3/5] END learning_rate=0.01, max_depth=10, min_child_weight=5, n_estimators=200;, score=nan total time=   1.1s
[CV 1/5] END learning_rate=0.01, max_depth=5, min_child_weight=5, n_estimators=50;, score=nan total time=   0.1s
[CV 2/5] END learning_rate=0.01, max_depth=5, min_child_weight=5, n_estimators=50;, score=nan total time=   0.2s


Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 452, in _score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: continuous format is not supported

  if is_sparse(dtype):
  elif is_categorical_dtype(dtype) and enable_categorical:
  if is_categorical_dtype(dtype)
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):
Traceback (most recent call last

[CV 3/5] END learning_rate=0.01, max_depth=5, min_child_weight=5, n_estimators=50;, score=nan total time=   0.1s
[CV 4/5] END learning_rate=0.01, max_depth=5, min_child_weight=5, n_estimators=50;, score=nan total time=   0.2s
[CV 5/5] END learning_rate=0.01, max_depth=5, min_child_weight=5, n_estimators=50;, score=nan total time=   0.2s


Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 452, in _score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: continuous format is not supported

  if is_sparse(dtype):
  elif is_categorical_dtype(dtype) and enable_categorical:
  if is_categorical_dtype(dtype)
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):
Traceback (most recent call last

[CV 1/5] END learning_rate=0.2, max_depth=5, min_child_weight=5, n_estimators=200;, score=nan total time=   0.6s
[CV 2/5] END learning_rate=0.2, max_depth=5, min_child_weight=5, n_estimators=200;, score=nan total time=   0.7s
[CV 4/5] END learning_rate=0.01, max_depth=10, min_child_weight=5, n_estimators=200;, score=nan total time=   1.2s
[CV 5/5] END learning_rate=0.01, max_depth=10, min_child_weight=5, n_estimators=200;, score=nan total time=   1.2s
[CV 1/5] END learning_rate=0.2, max_depth=3, min_child_weight=1, n_estimators=50;, score=nan total time=   0.1s


Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 452, in _score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: continuous format is not supported

Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^

[CV 2/5] END learning_rate=0.2, max_depth=3, min_child_weight=1, n_estimators=50;, score=nan total time=   0.1s
[CV 3/5] END learning_rate=0.2, max_depth=3, min_child_weight=1, n_estimators=50;, score=nan total time=   0.1s


Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 452, in _score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: continuous format is not supported

  if is_sparse(dtype):
  elif is_categorical_dtype(dtype) and enable_categorical:
  if is_categorical_dtype(dtype)
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):
Traceback (most recent call last

[CV 4/5] END learning_rate=0.2, max_depth=3, min_child_weight=1, n_estimators=50;, score=nan total time=   0.1s
[CV 3/5] END learning_rate=0.2, max_depth=5, min_child_weight=5, n_estimators=200;, score=nan total time=   0.7s
[CV 5/5] END learning_rate=0.2, max_depth=3, min_child_weight=1, n_estimators=50;, score=nan total time=   0.1s
[CV 4/5] END learning_rate=0.2, max_depth=5, min_child_weight=5, n_estimators=200;, score=nan total time=   0.6s
[CV 5/5] END learning_rate=0.2, max_depth=5, min_child_weight=5, n_estimators=200;, score=nan total time=   0.6s


In [298]:
# XGBoost
output_train_best_random_xgb = best_random_xgb_model.predict(X_train_split)
output_val_best_random_xgb = best_random_xgb_model.predict(X_val_split)

correlation_train_best_random_xgb = spearmanr(output_train_best_random_xgb, Y_train_split).correlation
correlation_val_best_random_xgb = spearmanr(output_val_best_random_xgb, Y_val_split).correlation

print(f"Spearman's correlation for tuned using random search XGBoost on training set: {correlation_train_best_random_xgb}")
print(f"Spearman's correlation for tuned using random search XGBoost on validation set: {correlation_val_best_random_xgb}")

Spearman's correlation for tuned using random search XGBoost on training set: 0.791389854246226
Spearman's correlation for tuned using random search XGBoost on validation set: 0.7667473232924065


## random forest

In [260]:
# Random Forest
rf_model = RandomForestRegressor(random_state=42)
rf_model.fit(X_train_split, Y_train_split)

In [261]:
output_train_rf = rf_model.predict(X_train_split)
output_val_rf = rf_model.predict(X_val_split)

correlation_train_rf = spearmanr(output_train_rf, Y_train_split).correlation
correlation_val_rf = spearmanr(output_val_rf, Y_val_split).correlation

print(f"Spearman's correlation for Random Forest on training set: {correlation_train_rf}")
print(f"Spearman's correlation for Random Forest on validation set: {correlation_val_rf}")

Spearman's correlation for Random Forest on training set: 0.9048025691429354
Spearman's correlation for Random Forest on validation set: 0.1734941976611075


In [262]:
# Random Forest
param_grid_rf = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

In [263]:
# Initialize k-fold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

### grid search

In [264]:
grid_search_rf = GridSearchCV(rf_model, param_grid_rf, cv=kf, scoring='neg_mean_squared_error')
grid_search_rf.fit(X_train_clean, Y_train_clean)
best_rf_model = grid_search_rf.best_estimator_

In [265]:
# Random forest
output_train_best_rf = best_rf_model.predict(X_train_split)
output_val_best_rf = best_rf_model.predict(X_val_split)

correlation_train_best_rf = spearmanr(output_train_best_rf, Y_train_split).correlation
correlation_val_best_rf = spearmanr(output_val_best_rf, Y_val_split).correlation

print(f"Spearman's correlation for tuned Random Forest on training set: {correlation_train_best_rf}")
print(f"Spearman's correlation for tuned Random Forest on validation set: {correlation_val_best_rf}")

Spearman's correlation for tuned Random Forest on training set: 0.5803923406595644
Spearman's correlation for tuned Random Forest on validation set: 0.560739826266526


In [266]:
# Fill nan with 0 as benchmark
X_train_clean = X_train.drop(['COUNTRY'], axis=1).fillna(0)
X_test_clean = X_test.drop(['COUNTRY'], axis=1).fillna(0)
Y_train_clean = Y_train['TARGET']

In [267]:
Y_test_submission = X_test[['ID']].copy()
Y_test_submission['TARGET'] = best_rf_model.predict(X_test_clean)
Y_test_submission.to_csv('sharmin_rf.csv', index=False)

### randomised search

In [268]:
folds = 3
param_comb = 5

In [269]:
random_search_rf = RandomizedSearchCV(rf_model, param_distributions=param_grid_rf, n_iter=param_comb, scoring='roc_auc', n_jobs=4, cv=kf.split(X_train_clean, Y_train_clean), verbose=3, random_state=1001 )

random_search_rf.fit(X_train_clean, Y_train_clean)

best_random_rf_model = random_search_rf.best_estimator_

Fitting 5 folds for each of 5 candidates, totalling 25 fits


Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 452, in _score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: continuous format is not supported

Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^

[CV 2/5] END max_depth=None, min_samples_leaf=4, min_samples_split=5, n_estimators=200;, score=nan total time=   5.0s
[CV 1/5] END max_depth=None, min_samples_leaf=4, min_samples_split=5, n_estimators=200;, score=nan total time=   5.1s
[CV 3/5] END max_depth=None, min_samples_leaf=4, min_samples_split=5, n_estimators=200;, score=nan total time=   5.1s
[CV 4/5] END max_depth=None, min_samples_leaf=4, min_samples_split=5, n_estimators=200;, score=nan total time=   5.1s


Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 452, in _score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: continuous format is not supported

Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^

[CV 1/5] END max_depth=None, min_samples_leaf=4, min_samples_split=10, n_estimators=200;, score=nan total time=   4.9s
[CV 2/5] END max_depth=None, min_samples_leaf=4, min_samples_split=10, n_estimators=200;, score=nan total time=   4.9s
[CV 3/5] END max_depth=None, min_samples_leaf=4, min_samples_split=10, n_estimators=200;, score=nan total time=   4.9s


Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 452, in _score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: continuous format is not supported



[CV 5/5] END max_depth=None, min_samples_leaf=4, min_samples_split=5, n_estimators=200;, score=nan total time=   5.5s


Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 452, in _score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: continuous format is not supported



[CV 1/5] END max_depth=None, min_samples_leaf=2, min_samples_split=10, n_estimators=50;, score=nan total time=   1.5s


Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 452, in _score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: continuous format is not supported



[CV 2/5] END max_depth=None, min_samples_leaf=2, min_samples_split=10, n_estimators=50;, score=nan total time=   1.5s


Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 452, in _score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: continuous format is not supported



[CV 3/5] END max_depth=None, min_samples_leaf=2, min_samples_split=10, n_estimators=50;, score=nan total time=   1.6s


Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 452, in _score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: continuous format is not supported



[CV 4/5] END max_depth=None, min_samples_leaf=2, min_samples_split=10, n_estimators=50;, score=nan total time=   1.5s


Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 452, in _score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: continuous format is not supported



[CV 5/5] END max_depth=None, min_samples_leaf=2, min_samples_split=10, n_estimators=50;, score=nan total time=   1.7s


Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 452, in _score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: continuous format is not supported



[CV 4/5] END max_depth=None, min_samples_leaf=4, min_samples_split=10, n_estimators=200;, score=nan total time=   5.1s


Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 452, in _score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: continuous format is not supported



[CV 5/5] END max_depth=None, min_samples_leaf=4, min_samples_split=10, n_estimators=200;, score=nan total time=   5.7s


Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 452, in _score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: continuous format is not supported



[CV 1/5] END max_depth=20, min_samples_leaf=2, min_samples_split=10, n_estimators=200;, score=nan total time=   5.6s


Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 452, in _score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: continuous format is not supported



[CV 2/5] END max_depth=20, min_samples_leaf=2, min_samples_split=10, n_estimators=200;, score=nan total time=   5.4s


Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 452, in _score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: continuous format is not supported



[CV 3/5] END max_depth=20, min_samples_leaf=2, min_samples_split=10, n_estimators=200;, score=nan total time=   5.5s


Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 452, in _score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: continuous format is not supported



[CV 4/5] END max_depth=20, min_samples_leaf=2, min_samples_split=10, n_estimators=200;, score=nan total time=   5.6s


Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 452, in _score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: continuous format is not supported



[CV 1/5] END max_depth=20, min_samples_leaf=1, min_samples_split=2, n_estimators=50;, score=nan total time=   1.5s


Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 452, in _score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: continuous format is not supported



[CV 2/5] END max_depth=20, min_samples_leaf=1, min_samples_split=2, n_estimators=50;, score=nan total time=   1.5s


Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 452, in _score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: continuous format is not supported



[CV 3/5] END max_depth=20, min_samples_leaf=1, min_samples_split=2, n_estimators=50;, score=nan total time=   1.5s


Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 452, in _score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: continuous format is not supported



[CV 4/5] END max_depth=20, min_samples_leaf=1, min_samples_split=2, n_estimators=50;, score=nan total time=   1.5s
[CV 5/5] END max_depth=20, min_samples_leaf=1, min_samples_split=2, n_estimators=50;, score=nan total time=   1.6s


Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 452, in _score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: continuous format is not supported

Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^

[CV 5/5] END max_depth=20, min_samples_leaf=2, min_samples_split=10, n_estimators=200;, score=nan total time=   5.7s


In [270]:
# XGBoost
output_train_best_random_rf = best_random_rf_model.predict(X_train_split)
output_val_best_random_rf = best_random_rf_model.predict(X_val_split)

correlation_train_best_random_rf = spearmanr(output_train_best_random_rf, Y_train_split).correlation
correlation_val_best_random_rf = spearmanr(output_val_best_random_rf, Y_val_split).correlation

print(f"Spearman's correlation for tuned using random search XGBoost on training set: {correlation_train_best_random_rf}")
print(f"Spearman's correlation for tuned using random search XGBoost on validation set: {correlation_val_best_random_rf}")

Spearman's correlation for tuned using random search XGBoost on training set: 0.8282721816793942
Spearman's correlation for tuned using random search XGBoost on validation set: 0.8284509887544612


In [271]:
Y_test_submission = X_test[['ID']].copy()
Y_test_submission['TARGET'] = best_random_rf_model.predict(X_test_clean)
Y_test_submission.to_csv('sharmin_rf_random.csv', index=False)

## gradient boosting regressor

In [273]:
params = {
    "n_estimators": 1000,
    "max_depth": 20,
    "min_samples_split": 5,
    "learning_rate": 0.001,
    "loss": "squared_error",
    }


In [274]:
# Gradient Boosting Regressor
gbr_model = ensemble.GradientBoostingRegressor(**params)
gbr_model.fit(X_train_split, Y_train_split)

In [275]:
output_train_gbr = gbr_model.predict(X_train_split)
output_val_gbr = gbr_model.predict(X_val_split)

correlation_train_gbr = spearmanr(output_train_gbr, Y_train_split).correlation
correlation_val_gbr = spearmanr(output_val_gbr, Y_val_split).correlation

print(f"Spearman's correlation for Random Forest on training set: {correlation_train_gbr}")
print(f"Spearman's correlation for Random Forest on validation set: {correlation_val_gbr}")

Spearman's correlation for Random Forest on training set: 0.9437870099656085
Spearman's correlation for Random Forest on validation set: 0.15557810150164977


In [276]:
# Random Forest
param_grid_gbr = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

In [277]:
# Initialize k-fold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

### grid search

In [278]:
grid_search_gbr = GridSearchCV(gbr_model, param_grid_gbr, cv=kf, scoring='neg_mean_squared_error')
grid_search_gbr.fit(X_train_clean, Y_train_clean)
best_gbr_model = grid_search_gbr.best_estimator_

In [279]:
# Gradient Boost
output_train_best_gbr = best_gbr_model.predict(X_train_split)
output_val_best_gbr = best_gbr_model.predict(X_val_split)

correlation_train_best_gbr = spearmanr(output_train_best_gbr, Y_train_split).correlation
correlation_val_best_gbr = spearmanr(output_val_best_gbr, Y_val_split).correlation

print(f"Spearman's correlation for tuned Random Forest on training set: {correlation_train_best_gbr}")
print(f"Spearman's correlation for tuned Random Forest on validation set: {correlation_val_best_gbr}")

Spearman's correlation for tuned Random Forest on training set: 0.5622354494365267
Spearman's correlation for tuned Random Forest on validation set: 0.49389255271073584


### random search

In [280]:
folds = 3
param_comb = 5

In [281]:
random_search_gbr = RandomizedSearchCV(gbr_model, param_distributions=param_grid_gbr, n_iter=param_comb, scoring='roc_auc', n_jobs=4, cv=kf.split(X_train_clean, Y_train_clean), verbose=3, random_state=1001 )

random_search_gbr.fit(X_train_clean, Y_train_clean)

best_random_gbr_model = random_search_gbr.best_estimator_

Fitting 5 folds for each of 5 candidates, totalling 25 fits


Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 452, in _score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: continuous format is not supported

Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^

[CV 4/5] END max_depth=None, min_samples_leaf=4, min_samples_split=5, n_estimators=200;, score=nan total time=   8.4s
[CV 1/5] END max_depth=None, min_samples_leaf=4, min_samples_split=5, n_estimators=200;, score=nan total time=   8.4s


Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 452, in _score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: continuous format is not supported



[CV 2/5] END max_depth=None, min_samples_leaf=4, min_samples_split=5, n_estimators=200;, score=nan total time=   9.1s


Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 452, in _score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: continuous format is not supported



[CV 3/5] END max_depth=None, min_samples_leaf=4, min_samples_split=5, n_estimators=200;, score=nan total time=  11.0s


Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 452, in _score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: continuous format is not supported



[CV 1/5] END max_depth=None, min_samples_leaf=4, min_samples_split=10, n_estimators=200;, score=nan total time=   8.9s


Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 452, in _score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: continuous format is not supported



[CV 2/5] END max_depth=None, min_samples_leaf=4, min_samples_split=10, n_estimators=200;, score=nan total time=   9.6s


Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 452, in _score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: continuous format is not supported



[CV 5/5] END max_depth=None, min_samples_leaf=4, min_samples_split=5, n_estimators=200;, score=nan total time=  11.1s


Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 452, in _score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: continuous format is not supported

Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^

[CV 3/5] END max_depth=None, min_samples_leaf=4, min_samples_split=10, n_estimators=200;, score=nan total time=  11.7s
[CV 1/5] END max_depth=None, min_samples_leaf=2, min_samples_split=10, n_estimators=50;, score=nan total time=   3.2s


Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 452, in _score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: continuous format is not supported



[CV 2/5] END max_depth=None, min_samples_leaf=2, min_samples_split=10, n_estimators=50;, score=nan total time=   3.0s


Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 452, in _score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: continuous format is not supported



[CV 3/5] END max_depth=None, min_samples_leaf=2, min_samples_split=10, n_estimators=50;, score=nan total time=   3.4s


Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 452, in _score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: continuous format is not supported



[CV 4/5] END max_depth=None, min_samples_leaf=4, min_samples_split=10, n_estimators=200;, score=nan total time=   9.5s


Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 452, in _score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: continuous format is not supported



[CV 4/5] END max_depth=None, min_samples_leaf=2, min_samples_split=10, n_estimators=50;, score=nan total time=   2.8s


Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 452, in _score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: continuous format is not supported



[CV 5/5] END max_depth=None, min_samples_leaf=2, min_samples_split=10, n_estimators=50;, score=nan total time=   3.1s


Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 452, in _score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: continuous format is not supported



[CV 5/5] END max_depth=None, min_samples_leaf=4, min_samples_split=10, n_estimators=200;, score=nan total time=  11.1s


Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 452, in _score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: continuous format is not supported



[CV 1/5] END max_depth=20, min_samples_leaf=2, min_samples_split=10, n_estimators=200;, score=nan total time=  10.4s


Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 452, in _score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: continuous format is not supported



[CV 2/5] END max_depth=20, min_samples_leaf=2, min_samples_split=10, n_estimators=200;, score=nan total time=  10.0s


Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 452, in _score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: continuous format is not supported

Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^

[CV 3/5] END max_depth=20, min_samples_leaf=2, min_samples_split=10, n_estimators=200;, score=nan total time=  11.1s
[CV 4/5] END max_depth=20, min_samples_leaf=2, min_samples_split=10, n_estimators=200;, score=nan total time=  10.5s


Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 452, in _score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: continuous format is not supported



[CV 1/5] END max_depth=20, min_samples_leaf=1, min_samples_split=2, n_estimators=50;, score=nan total time=   2.9s


Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 452, in _score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: continuous format is not supported



[CV 2/5] END max_depth=20, min_samples_leaf=1, min_samples_split=2, n_estimators=50;, score=nan total time=   2.7s


Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 452, in _score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: continuous format is not supported



[CV 3/5] END max_depth=20, min_samples_leaf=1, min_samples_split=2, n_estimators=50;, score=nan total time=   3.0s


Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 452, in _score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: continuous format is not supported



[CV 4/5] END max_depth=20, min_samples_leaf=1, min_samples_split=2, n_estimators=50;, score=nan total time=   3.0s


Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 452, in _score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: continuous format is not supported



[CV 5/5] END max_depth=20, min_samples_leaf=1, min_samples_split=2, n_estimators=50;, score=nan total time=   2.8s


Traceback (most recent call last):
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sharmin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 452, in _score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: continuous format is not supported



[CV 5/5] END max_depth=20, min_samples_leaf=2, min_samples_split=10, n_estimators=200;, score=nan total time=  10.6s


In [283]:
# XGBoost
output_train_best_random_gbr = best_random_gbr_model.predict(X_train_split)
output_val_best_random_gbr = best_random_gbr_model.predict(X_val_split)

correlation_train_best_random_gbr = spearmanr(output_train_best_random_gbr, Y_train_split).correlation
correlation_val_best_random_gbr = spearmanr(output_val_best_random_gbr, Y_val_split).correlation

print(f"Spearman's correlation for tuned using random search GBR on training set: {correlation_train_best_random_gbr}")
print(f"Spearman's correlation for tuned using random search GBR on validation set: {correlation_val_best_random_gbr}")

Spearman's correlation for tuned using random search GBR on training set: 0.8959391937285467
Spearman's correlation for tuned using random search GBR on validation set: 0.877290142967708


In [284]:
Y_test_submission = X_test[['ID']].copy()
Y_test_submission['TARGET'] = best_random_gbr_model.predict(X_test_clean)
Y_test_submission.to_csv('sharmin_gbr_random.csv', index=False)