- Optuna is a open-source hyperparameter optimization framework designed by Machine Learning.
- Optuna supports various optimization algorithms and integrates with popular machine learning libraries like scikit-learn,Tensorflow,PyTorch and XGBoost.


- https://xgboost.readthedocs.io/en/stable/install.html#python
- https://optuna.readthedocs.io/en/stable/installation.html
- https://optuna.readthedocs.io/en/stable/tutorial/20_recipes/012_artifact_tutorial.html
- https://github.com/dmlc/xgboost/blob/master/doc/parameter.rst

In [42]:
!pip install optuna
!pip install xgboost



In [43]:
import optuna
import xgboost as xgb
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

In [44]:
df = pd.read_csv('/content/Admission_Predict.csv')

In [45]:
df.head()

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,1,337,118,4,4.5,4.5,9.65,1,0.92
1,2,324,107,4,4.0,4.5,8.87,1,0.76
2,3,316,104,3,3.0,3.5,8.0,1,0.72
3,4,322,110,3,3.5,2.5,8.67,1,0.8
4,5,314,103,2,2.0,3.0,8.21,0,0.65


In [46]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 9 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Serial No.         400 non-null    int64  
 1   GRE Score          400 non-null    int64  
 2   TOEFL Score        400 non-null    int64  
 3   University Rating  400 non-null    int64  
 4   SOP                400 non-null    float64
 5   LOR                400 non-null    float64
 6   CGPA               400 non-null    float64
 7   Research           400 non-null    int64  
 8   Chance of Admit    400 non-null    float64
dtypes: float64(4), int64(5)
memory usage: 28.2 KB


In [48]:
df.isnull().sum()

Unnamed: 0,0
Serial No.,0
GRE Score,0
TOEFL Score,0
University Rating,0
SOP,0
LOR,0
CGPA,0
Research,0
Chance of Admit,0


In [51]:
df.drop('Serial No.',axis=1,inplace=True)

In [53]:
df.head()

Unnamed: 0,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,337,118,4,4.5,4.5,9.65,1,0.92
1,324,107,4,4.0,4.5,8.87,1,0.76
2,316,104,3,3.0,3.5,8.0,1,0.72
3,322,110,3,3.5,2.5,8.67,1,0.8
4,314,103,2,2.0,3.0,8.21,0,0.65


In [54]:
X = df.drop('Chance of Admit ', axis=1)
y = df['Chance of Admit ']

In [56]:
X.head()

Unnamed: 0,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research
0,337,118,4,4.5,4.5,9.65,1
1,324,107,4,4.0,4.5,8.87,1
2,316,104,3,3.0,3.5,8.0,1
3,322,110,3,3.5,2.5,8.67,1
4,314,103,2,2.0,3.0,8.21,0


In [57]:
y.head()

Unnamed: 0,Chance of Admit
0,0.92
1,0.76
2,0.72
3,0.8
4,0.65


In [58]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [59]:
sc = StandardScaler()
X_train_sc = sc.fit_transform(X_train)
X_test_sc = sc.transform(X_test)

# Implementation optuna

In [60]:
def objective(trial,data=X,target=y):
  train_X,test_X,train_y,test_y = train_test_split(data,target,test_size=0.25,random_state=42)
  params = {
      'tree_method':'auto',
      'lambda':trial.suggest_loguniform('lambda',1e-4,10.0),
      'alpha':trial.suggest_loguniform('alpha',1e-4,10.0),
      'colsample_bytree':trial.suggest_categorical('colsample_bytree',[0.5,0.7,0.9,1.0]),
      'subsample':trial.suggest_categorical('subsample',[0.5,0.7,0.9,1.0]),
      'learning_rate':trial.suggest_categorical('learning_rate',[0.001,0.01,0.1,0.2,0.3]),
      'n_estimators':trial.suggest_categorical('n_estimators',[100,200,300,400,500]),
      'max_depth':trial.suggest_categorical('max_depth',[3,5,7,9,12,15,17,20]),
      'random_state':trial.suggest_categorical('random_state',[24,48,2020,30,3423,232123,2321]),
      'min_child_weight':trial.suggest_int('min_child_weight',1,10),
      'gamma':trial.suggest_int('gamma',0,10),
      'eval_metric':['rmse']
  }
  model = xgb.XGBRegressor(**params)
  model.fit(train_X,train_y,eval_set=[(test_X,test_y)],verbose=False)
  preds = model.predict(test_X)
  rmse = np.sqrt(mean_squared_error(test_y,preds))
  return rmse


In [61]:
# optuna
find_params = optuna.create_study(direction='minimize')
find_params.optimize(objective,n_trials=100)
print('Number of finished trials: {}'.format(len(find_params.trials)))
print('Best trial:')
trial = find_params.best_trial
print('  Value: {}'.format(trial.value))
print('  Params: ')
for key, value in trial.params.items():
  print('    {}: {}'.format(key, value))


[I 2024-11-10 18:44:35,102] A new study created in memory with name: no-name-49c98d77-68ee-48ee-b00e-3e28354cdf10
  'lambda':trial.suggest_loguniform('lambda',1e-4,10.0),
  'alpha':trial.suggest_loguniform('alpha',1e-4,10.0),
[I 2024-11-10 18:44:35,302] Trial 0 finished with value: 0.12542605188610834 and parameters: {'lambda': 0.022188737495766178, 'alpha': 0.00023465818439947204, 'colsample_bytree': 0.9, 'subsample': 0.9, 'learning_rate': 0.001, 'n_estimators': 500, 'max_depth': 5, 'random_state': 2020, 'min_child_weight': 6, 'gamma': 1}. Best is trial 0 with value: 0.12542605188610834.
  'lambda':trial.suggest_loguniform('lambda',1e-4,10.0),
  'alpha':trial.suggest_loguniform('alpha',1e-4,10.0),
[I 2024-11-10 18:44:35,407] Trial 1 finished with value: 0.15379218443378107 and parameters: {'lambda': 0.0015342188312960838, 'alpha': 0.0016052384602913824, 'colsample_bytree': 1.0, 'subsample': 0.7, 'learning_rate': 0.3, 'n_estimators': 200, 'max_depth': 20, 'random_state': 2321, 'min_chi

Number of finished trials: 100
Best trial:
  Value: 0.06717301206412896
  Params: 
    lambda: 0.2949055792485847
    alpha: 0.000879844925341833
    colsample_bytree: 0.5
    subsample: 0.5
    learning_rate: 0.01
    n_estimators: 400
    max_depth: 12
    random_state: 24
    min_child_weight: 10
    gamma: 0


In [62]:
best_params = {
    'lambda':0.2949055792485847,
    'alpha': 0.000879844925341833,
    'colsample_bytree': 0.5,
    'subsample': 0.5,
    'learning_rate': 0.1,
    'n_estimators': 400,
    'max_depth': 12,
    'random_state': 24,
    'min_child_weight': 10,
    'gamma': 0
}

In [63]:
!pip show optuna

Name: optuna
Version: 4.0.0
Summary: A hyperparameter optimization framework
Home-page: https://optuna.org/
Author: Takuya Akiba
Author-email: 
License: 
Location: /usr/local/lib/python3.10/dist-packages
Requires: alembic, colorlog, numpy, packaging, PyYAML, sqlalchemy, tqdm
Required-by: 


In [64]:
model = xgb.XGBRegressor(**best_params)
model.fit(X_train_sc,y_train)
y_pred = model.predict(X_test_sc)

In [65]:
from sklearn.metrics import r2_score
r2_score(y_test,y_pred) # xgb

0.7760574470925321

In [66]:
from sklearn.ensemble import RandomForestRegressor
model2 = RandomForestRegressor()
model2.fit(X_train_sc,y_train)
y_pred2 = model2.predict(X_test_sc)
r2_score(y_test,y_pred2) # rf

0.8052636956662638

In [67]:
X_train_sc[0]

array([-0.51832699, -0.57399919, -0.95044085, -0.85046036,  0.05717854,
       -0.18943738, -1.0762311 ])

In [68]:
# save the model with pickle file
import pickle
pickle.dump(model,open('model.pkl','wb'))

In [70]:
# load the model and predict
model = pickle.load(open('model.pkl','rb'))
model.predict([[500,150,5,5,5.3,9.5,1]])

array([0.96152633], dtype=float32)

GRE Score= 500	TOEFL Score=150	University Rating=5	SOP=5	LOR=5.3	CGPA=9.5 Research=1