In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import sys
!{sys.executable} -m pip install optuna



In [3]:
import sys
!{sys.executable} -m pip install scikit-learn --upgrade

Requirement already up-to-date: scikit-learn in /home/idies/miniconda3/envs/py38/lib/python3.8/site-packages (1.2.2)


In [4]:
import sys
!{sys.executable} -m pip install lightgbm



In [5]:
import sys
!{sys.executable} -m pip install scikit-learn-intelex



In [6]:
from sklearnex import patch_sklearn
patch_sklearn(global_patch=True)

Scikit-learn was successfully globally patched by Intel(R) Extension for Scikit-learn


Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)


In [7]:
import pandas as pd
import numpy as np
import optuna
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import lightgbm as lgb

In [8]:
# Load data and preprocess
df = pd.read_csv('https://gitlab.com/mirsakhawathossain/exodata/-/raw/main/dataset/exodata.csv').sort_index(axis=0)

In [9]:
columns_1 = df.columns[df.mean()==1]
columns_2 = df.columns[df.mean()==0]
df = df.drop(columns=columns_1)
df = df.drop(columns=columns_2)

In [13]:
X = df.drop(['exoplanet'],axis=1)
y = df[['exoplanet']]

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=43,stratify=y)

In [15]:
scaler = StandardScaler()

In [16]:
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [26]:
y_train = y_train.values.reshape(-1,1)
y_test = y_test.values.reshape(-1,1)

In [17]:
import optuna
from sklearn.model_selection import cross_val_score, RepeatedKFold

In [18]:
# Define objective function for hyperparameter tuning
def objective(trial):
    params = {
        'objective': 'binary',
        'metric': 'binary_logloss',
        'boosting_type': 'gbdt',
        'verbosity': -1,
        'n_jobs': -1,
        'num_leaves': trial.suggest_int('num_leaves', 2, 50),
        'max_depth': trial.suggest_int('max_depth', 1, 10),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.1),
        'feature_fraction': trial.suggest_uniform('feature_fraction', 0.1, 1.0),
        'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.1, 1.0),
        'min_child_samples': trial.suggest_int('min_child_samples', 1, 100),
        'min_child_weight': trial.suggest_loguniform('min_child_weight', 1e-5, 1e2),
        'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-5, 10.0),
        'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-5, 10.0),
        'subsample_for_bin': trial.suggest_int('subsample_for_bin', 1000, 50000, 1000),
        'min_split_gain': trial.suggest_loguniform('min_split_gain', 1e-5, 1.0),
    }

    clf = lgb.LGBMClassifier(**params, random_state=43)

    # Use repeated k-fold cross-validation to evaluate the model
    rskf = RepeatedKFold(n_splits=5, n_repeats=10, random_state=41)
    accs = np.mean(cross_val_score(clf, X_train, y_train, cv=rskf))
    return 1 - np.mean(accs)

In [19]:
# Define the study object to manage the optimization process
study = optuna.create_study(direction='minimize',pruner=optuna.pruners.HyperbandPruner(min_resource=1, max_resource='auto', reduction_factor=3))


[32m[I 2023-04-09 08:12:00,661][0m A new study created in memory with name: no-name-910fd1ec-c901-419d-95bd-bee7e68f679a[0m


In [None]:
# Run the hyperparameter optimization
n_trials = 50
n_jobs = -1
timeout= None
study.optimize(objective, n_trials=n_trials, n_jobs=n_jobs, timeout=timeout,show_progress_bar=True,gc_after_trial=True)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=50.0), HTML(value='')))

In [None]:
# Print the best hyperparameters found
print('Best hyperparameters:')
print(study.best_params)

In [None]:
best_score = 1 - study.best_value
print('Best score:', best_score)