Optuna is an open-source, next-generation hyperparameter optimization framework designed to automate the tuning process for machine learning models. It intelligently searches for the best set of parameters using state-of-the-art techniques like Bayesian optimization, while also offering powerful features like:

✨ Dynamic search space definition

⚡ Early stopping of bad trials (pruning)

📊 Built-in visualization tools

🤖 Seamless integration with Scikit-learn, XGBoost, LightGBM, PyTorch, and more

🚀 Parallel and distributed optimization

With its clean, Pythonic interface and efficient performance, Optuna helps developers and data scientists build better models faster—making hyperparameter tuning not just automated, but also enjoyable.

In [53]:
import optuna
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

In [27]:


import xgboost as xgb
print(xgb.__version__)


3.0.0


In [28]:
data=pd.read_csv("https://raw.githubusercontent.com/Chandrakant817/Admission-Prediction/main/Admission_Prediction.csv")

In [29]:
data

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,1,337.0,118.0,4.0,4.5,4.5,9.65,1,0.92
1,2,324.0,107.0,4.0,4.0,4.5,8.87,1,0.76
2,3,,104.0,3.0,3.0,3.5,8.00,1,0.72
3,4,322.0,110.0,3.0,3.5,2.5,8.67,1,0.80
4,5,314.0,103.0,2.0,2.0,3.0,8.21,0,0.65
...,...,...,...,...,...,...,...,...,...
495,496,332.0,108.0,5.0,4.5,4.0,9.02,1,0.87
496,497,337.0,117.0,5.0,5.0,5.0,9.87,1,0.96
497,498,330.0,120.0,5.0,4.5,5.0,9.56,1,0.93
498,499,312.0,103.0,4.0,4.0,5.0,8.43,0,0.73


In [30]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 9 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Serial No.         500 non-null    int64  
 1   GRE Score          485 non-null    float64
 2   TOEFL Score        490 non-null    float64
 3   University Rating  485 non-null    float64
 4   SOP                500 non-null    float64
 5   LOR                500 non-null    float64
 6   CGPA               500 non-null    float64
 7   Research           500 non-null    int64  
 8   Chance of Admit    500 non-null    float64
dtypes: float64(7), int64(2)
memory usage: 35.3 KB


In [31]:
data.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Serial No.,500.0,250.5,144.481833,1.0,125.75,250.5,375.25,500.0
GRE Score,485.0,316.558763,11.274704,290.0,308.0,317.0,325.0,340.0
TOEFL Score,490.0,107.187755,6.112899,92.0,103.0,107.0,112.0,120.0
University Rating,485.0,3.121649,1.14616,1.0,2.0,3.0,4.0,5.0
SOP,500.0,3.374,0.991004,1.0,2.5,3.5,4.0,5.0
LOR,500.0,3.484,0.92545,1.0,3.0,3.5,4.0,5.0
CGPA,500.0,8.57644,0.604813,6.8,8.1275,8.56,9.04,9.92
Research,500.0,0.56,0.496884,0.0,0.0,1.0,1.0,1.0
Chance of Admit,500.0,0.72174,0.14114,0.34,0.63,0.72,0.82,0.97


In [32]:
data.isnull().sum()


Serial No.            0
GRE Score            15
TOEFL Score          10
University Rating    15
SOP                   0
LOR                   0
CGPA                  0
Research              0
Chance of Admit       0
dtype: int64

In [33]:
data["GRE Score"]=data["GRE Score"].fillna(data["GRE Score"].median())
data["TOEFL Score"]=data["TOEFL Score"].fillna(data["TOEFL Score"].median())
data["University Rating"]=data["University Rating"].fillna(data["University Rating"].median())

In [34]:
data.head()

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,1,337.0,118.0,4.0,4.5,4.5,9.65,1,0.92
1,2,324.0,107.0,4.0,4.0,4.5,8.87,1,0.76
2,3,317.0,104.0,3.0,3.0,3.5,8.0,1,0.72
3,4,322.0,110.0,3.0,3.5,2.5,8.67,1,0.8
4,5,314.0,103.0,2.0,2.0,3.0,8.21,0,0.65


In [35]:
X=data.drop(["Serial No.","Chance of Admit"],axis=1)

In [36]:
y=data["Chance of Admit"]

In [37]:
X

Unnamed: 0,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research
0,337.0,118.0,4.0,4.5,4.5,9.65,1
1,324.0,107.0,4.0,4.0,4.5,8.87,1
2,317.0,104.0,3.0,3.0,3.5,8.00,1
3,322.0,110.0,3.0,3.5,2.5,8.67,1
4,314.0,103.0,2.0,2.0,3.0,8.21,0
...,...,...,...,...,...,...,...
495,332.0,108.0,5.0,4.5,4.0,9.02,1
496,337.0,117.0,5.0,5.0,5.0,9.87,1
497,330.0,120.0,5.0,4.5,5.0,9.56,1
498,312.0,103.0,4.0,4.0,5.0,8.43,0


In [38]:
X.isnull().sum()

GRE Score            0
TOEFL Score          0
University Rating    0
SOP                  0
LOR                  0
CGPA                 0
Research             0
dtype: int64

In [39]:
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 7 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   GRE Score          500 non-null    float64
 1   TOEFL Score        500 non-null    float64
 2   University Rating  500 non-null    float64
 3   SOP                500 non-null    float64
 4   LOR                500 non-null    float64
 5   CGPA               500 non-null    float64
 6   Research           500 non-null    int64  
dtypes: float64(6), int64(1)
memory usage: 27.5 KB


In [40]:
for col in X.columns:
    print(f"{col} {X[col].apply(lambda x: isinstance(X,str)).sum()}")

GRE Score 0
TOEFL Score 0
University Rating 0
SOP 0
LOR 0
CGPA 0
Research 0


In [41]:
y

0      0.92
1      0.76
2      0.72
3      0.80
4      0.65
       ... 
495    0.87
496    0.96
497    0.93
498    0.73
499    0.84
Name: Chance of Admit, Length: 500, dtype: float64

In [42]:
X_train.shape

(375, 7)

In [43]:
X_test.shape

(125, 7)

In [44]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.25,random_state=25)

In [45]:
scaler=StandardScaler()

In [46]:
X_train_sc=scaler.fit_transform(X_train)

In [47]:
X_test_sc=scaler.transform(X_test)

In [50]:
def objective(trail,data=X,target=y):
  train_x,test_x,train_y,test_y=train_test_split(data,target,test_size=0.25,random_state=30)
  param={
      "tree_method":"hist",
      'lambda':trail.suggest_loguniform('lambda' , 1e-4,10.0),
      'alpha' :trail.suggest_loguniform('alpha' , 1e-4 , 10.0),
      'colsample_bytree' :trail.suggest_categorical('colsample_bytree' , [.1,.2,.3,.4,.5,.6,.7,.8,.9,1]),
      'subsample' :trail.suggest_categorical('subsample' , [.1,.2,.3,.4,.5,.6,.7,.8,.9,1]),
      'learning_rate' : trail.suggest_categorical('learning_rate' , [.00001,.0003,.008,.02,.01,1,8]),
      'n_estimators' :3000,
      'max_depth' :trail.suggest_categorical('max_depth', [3,4,5,6,7,8,9,10,11,12]),
      'random_state' :trail.suggest_categorical('random_state' , [10,20,30 ,2000 , 3454,243123]),
      'min_child_weight' :trail.suggest_int('min_child_weight' ,1,200)
      }
  Xgb_reg_model=xgb.XGBRegressor(**param)
  Xgb_reg_model.fit(train_x,train_y,eval_set=[(test_x,test_y)],verbose=True)
  pred_xgb=Xgb_reg_model.predict(test_x)
  #mse=mean_squared_error(test_y,pred_xgb)
  #return mse

In [55]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import xgboost as xgb
import optuna

def objective(trial, data=X, target=y):
    try:
        train_x, test_x, train_y, test_y = train_test_split(data, target, test_size=0.25, random_state=30)
        
        param = {
            "tree_method": "hist",
            'lambda': trial.suggest_loguniform('lambda', 1e-4, 10.0),
            'alpha': trial.suggest_loguniform('alpha', 1e-4, 10.0),
            'colsample_bytree': trial.suggest_categorical('colsample_bytree', [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]),
            'subsample': trial.suggest_categorical('subsample', [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]),
            'learning_rate': trial.suggest_categorical('learning_rate', [0.00001, 0.0003, 0.008, 0.02, 0.01, 1, 8]),
            'n_estimators': 3000,
            'max_depth': trial.suggest_categorical('max_depth', list(range(3, 13))),
            'random_state': trial.suggest_categorical('random_state', [10, 20, 30, 2000, 3454, 243123]),
            'min_child_weight': trial.suggest_int('min_child_weight', 1, 200)
        }

        model = xgb.XGBRegressor(**param)
        model.fit(train_x, train_y, eval_set=[(test_x, test_y)], verbose=False)
        
        preds = model.predict(test_x)
        mse = mean_squared_error(test_y, preds)
        return mse  # Return the metric Optuna should minimize
    except Exception as e:
        print(f"Trial failed due to error: {e}")
        return float("inf")  # Return a large value to signal failure


In [56]:
find_params=optuna.create_study()
find_params.optimize(objective,n_trials=10)
find_params.best_trial.params


[I 2025-05-12 18:52:25,137] A new study created in memory with name: no-name-e50f696e-d2ae-455d-ab05-6c3dae403c8d
  'lambda': trial.suggest_loguniform('lambda', 1e-4, 10.0),
  'alpha': trial.suggest_loguniform('alpha', 1e-4, 10.0),
[I 2025-05-12 18:52:26,759] Trial 0 finished with value: 0.004582941518941462 and parameters: {'lambda': 0.00018607748453379675, 'alpha': 1.094720081816784, 'colsample_bytree': 0.2, 'subsample': 0.5, 'learning_rate': 0.02, 'max_depth': 12, 'random_state': 30, 'min_child_weight': 33}. Best is trial 0 with value: 0.004582941518941462.
  'lambda': trial.suggest_loguniform('lambda', 1e-4, 10.0),
  'alpha': trial.suggest_loguniform('alpha', 1e-4, 10.0),
[I 2025-05-12 18:52:28,295] Trial 1 finished with value: 0.02093355035750693 and parameters: {'lambda': 0.9369267943510319, 'alpha': 6.674235688567579, 'colsample_bytree': 0.6, 'subsample': 0.4, 'learning_rate': 1e-05, 'max_depth': 8, 'random_state': 20, 'min_child_weight': 65}. Best is trial 0 with value: 0.00458

Trial failed due to error: could not convert string to float: '-nan(ind)'


[I 2025-05-12 18:52:34,990] Trial 5 finished with value: 0.021002583578711858 and parameters: {'lambda': 2.8989255886936074, 'alpha': 0.0014446912902729538, 'colsample_bytree': 0.4, 'subsample': 0.3, 'learning_rate': 1e-05, 'max_depth': 10, 'random_state': 20, 'min_child_weight': 126}. Best is trial 0 with value: 0.004582941518941462.
  'lambda': trial.suggest_loguniform('lambda', 1e-4, 10.0),
  'alpha': trial.suggest_loguniform('alpha', 1e-4, 10.0),
[I 2025-05-12 18:52:36,647] Trial 6 finished with value: 0.021002616154177176 and parameters: {'lambda': 0.00015951035816762706, 'alpha': 0.07239816597508385, 'colsample_bytree': 0.8, 'subsample': 0.3, 'learning_rate': 8, 'max_depth': 3, 'random_state': 2000, 'min_child_weight': 165}. Best is trial 0 with value: 0.004582941518941462.
  'lambda': trial.suggest_loguniform('lambda', 1e-4, 10.0),
  'alpha': trial.suggest_loguniform('alpha', 1e-4, 10.0),
[I 2025-05-12 18:52:38,719] Trial 7 finished with value: 0.006316192008210595 and parameter

{'lambda': 0.00018607748453379675,
 'alpha': 1.094720081816784,
 'colsample_bytree': 0.2,
 'subsample': 0.5,
 'learning_rate': 0.02,
 'max_depth': 12,
 'random_state': 30,
 'min_child_weight': 33}

In [57]:
best_params={'lambda': 0.00018607748453379675,
 'alpha': 1.094720081816784,
 'colsample_bytree': 0.2,
 'subsample': 0.5,
 'learning_rate': 0.02,
 'max_depth': 12,
 'random_state': 30,
 'min_child_weight': 33}

In [58]:
find_params.trials_dataframe()

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_alpha,params_colsample_bytree,params_lambda,params_learning_rate,params_max_depth,params_min_child_weight,params_random_state,params_subsample,state
0,0,0.004583,2025-05-12 18:52:25.138494,2025-05-12 18:52:26.758731,0 days 00:00:01.620237,1.09472,0.2,0.000186,0.02,12,33,30,0.5,COMPLETE
1,1,0.020934,2025-05-12 18:52:26.760725,2025-05-12 18:52:28.294636,0 days 00:00:01.533911,6.674236,0.6,0.936927,1e-05,8,65,20,0.4,COMPLETE
2,2,0.00816,2025-05-12 18:52:28.296141,2025-05-12 18:52:29.762782,0 days 00:00:01.466641,0.000164,0.8,0.816761,1.0,3,15,243123,1.0,COMPLETE
3,3,0.020165,2025-05-12 18:52:29.764775,2025-05-12 18:52:33.244835,0 days 00:00:03.480060,0.091187,0.8,0.000198,1e-05,8,2,2000,0.3,COMPLETE
4,4,inf,2025-05-12 18:52:33.249336,2025-05-12 18:52:33.346039,0 days 00:00:00.096703,2.634216,0.9,0.000205,8.0,8,174,10,0.7,COMPLETE
5,5,0.021003,2025-05-12 18:52:33.348038,2025-05-12 18:52:34.989194,0 days 00:00:01.641156,0.001445,0.4,2.898926,1e-05,10,126,20,0.3,COMPLETE
6,6,0.021003,2025-05-12 18:52:34.991190,2025-05-12 18:52:36.646813,0 days 00:00:01.655623,0.072398,0.8,0.00016,8.0,3,165,2000,0.3,COMPLETE
7,7,0.006316,2025-05-12 18:52:36.648813,2025-05-12 18:52:38.717407,0 days 00:00:02.068594,0.000137,0.6,0.003985,0.01,12,148,30,0.9,COMPLETE
8,8,0.020977,2025-05-12 18:52:38.720399,2025-05-12 18:52:41.356868,0 days 00:00:02.636469,0.260455,0.2,0.211446,0.008,3,51,30,0.1,COMPLETE
9,9,0.021003,2025-05-12 18:52:41.363855,2025-05-12 18:52:43.773867,0 days 00:00:02.410012,0.006176,0.5,0.231943,0.02,9,197,10,0.2,COMPLETE


In [59]:
optuna.visualization.plot_optimization_history(find_params)

In [60]:
optuna.visualization.plot_slice(find_params)

[W 2025-05-12 18:57:19,222] Trial 4 is omitted in visualization because its objective value is inf or nan.


In [61]:
optuna.visualization.plot_contour(find_params,params=['alpha','lambda'])

[W 2025-05-12 18:57:29,959] Trial 4 is omitted in visualization because its objective value is inf or nan.


In [62]:
model=xgb.XGBRegressor(**best_params)

In [63]:
model.fit(X_train,y_train)

In [64]:
y_pred=model.predict(X_test)

In [65]:
from sklearn.metrics import r2_score
r2score=[0,1]
from sklearn.metrics import r2_score

r2_score(y_test,y_pred)

0.632157316897373

In [66]:
from sklearn.ensemble import RandomForestRegressor
model2=RandomForestRegressor()
model2.fit(X_train,y_train)
y_pred2=model2.predict(X_test)
r2_score(y_test,y_pred2)

0.8171945052950684