In [2]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.4.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.16.4-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Collecting sqlalchemy>=1.4.2 (from optuna)
  Downloading sqlalchemy-2.0.42-cp310-cp310-win_amd64.whl.metadata (9.8 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading mako-1.3.10-py3-none-any.whl.metadata (2.9 kB)
Collecting greenlet>=1 (from sqlalchemy>=1.4.2->optuna)
  Downloading greenlet-3.2.3-cp310-cp310-win_amd64.whl.metadata (4.2 kB)
Downloading optuna-4.4.0-py3-none-any.whl (395 kB)
Downloading alembic-1.16.4-py3-none-any.whl (247 kB)
Downloading sqlalchemy-2.0.42-cp310-cp310-win_amd64.whl (2.1 MB)
   ---------------------------------------- 0.0/2.1 MB ? eta -:--:--
   ---------------------------------------- 2.1/2.1 MB 14.9 MB/s eta 0:00:00
Downloading greenlet-3.2.3-cp310-cp310-win_amd64.w

In [3]:
import seaborn as sns
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import optuna
from sklearn.model_selection import cross_val_score
import matplotlib.pyplot as plt

In [4]:
healthexp = sns.load_dataset('healthexp')
healthexp.head(100)

Unnamed: 0,Year,Country,Spending_USD,Life_Expectancy
0,1970,Germany,252.311,70.6
1,1970,France,192.143,72.2
2,1970,Great Britain,123.993,71.9
3,1970,Japan,150.437,72.0
4,1970,USA,326.961,70.9
...,...,...,...,...
95,1991,Canada,1805.209,77.6
96,1991,France,1558.033,77.2
97,1991,Great Britain,842.797,75.9
98,1991,Japan,1166.430,79.1


In [5]:
healthexp = pd.get_dummies(healthexp)

In [7]:
X = healthexp.drop(['Life_Expectancy'], axis=1)
y = healthexp['Life_Expectancy']

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=19)

In [9]:
rfr = RandomForestRegressor(random_state=13)

In [10]:
rfr.fit(X_train, y_train)

In [11]:
y_pred = rfr.predict(X_test)

In [12]:
mean_absolute_error(y_test, y_pred)

0.25916363636361917

In [13]:
mean_squared_error(y_test, y_pred)

0.10221141818181628

In [14]:
r2_score(y_test, y_pred)

0.9910457602615238

In [25]:
def objective(trial):

    n_estimators = trial.suggest_int('n_estimators', 100, 1000)
    max_depth = trial.suggest_int('max_depth', 10, 50)
    min_samples_split = trial.suggest_int('min_samples_split', 2, 32)
    min_samples_leaf = trial.suggest_int('min_samples_leaf', 1, 32)

    model = RandomForestRegressor(n_estimators=n_estimators,
    max_depth=max_depth,
    min_samples_split=min_samples_split,
    min_samples_leaf=min_samples_leaf)

    score = cross_val_score(model, X, y, n_jobs=-1, cv=5, scoring='neg_mean_squared_error').mean()
    return score

In [34]:
study = optuna.create_study(direction='maximize', sampler=optuna.samplers.RandomSampler(seed=42)) # Default is random Search

[I 2025-08-02 12:05:42,538] A new study created in memory with name: no-name-49bb1dd2-5eff-422c-9a84-c8013cf89cdc


In [35]:
study.optimize(objective, n_trials=100)

[I 2025-08-02 12:05:44,859] Trial 0 finished with value: -4.488999619416664 and parameters: {'n_estimators': 437, 'max_depth': 48, 'min_samples_split': 24, 'min_samples_leaf': 20}. Best is trial 0 with value: -4.488999619416664.
[I 2025-08-02 12:05:46,208] Trial 1 finished with value: -5.180871459678092 and parameters: {'n_estimators': 240, 'max_depth': 16, 'min_samples_split': 3, 'min_samples_leaf': 28}. Best is trial 0 with value: -4.488999619416664.
[I 2025-08-02 12:05:48,864] Trial 2 finished with value: -5.621930410356181 and parameters: {'n_estimators': 641, 'max_depth': 39, 'min_samples_split': 2, 'min_samples_leaf': 32}. Best is trial 0 with value: -4.488999619416664.
[I 2025-08-02 12:05:53,655] Trial 3 finished with value: -3.0184689526378627 and parameters: {'n_estimators': 850, 'max_depth': 18, 'min_samples_split': 7, 'min_samples_leaf': 6}. Best is trial 3 with value: -3.0184689526378627.
[I 2025-08-02 12:05:55,590] Trial 4 finished with value: -3.7733519045940653 and param

In [36]:
best_params = study.best_params
print(f"Best Hyperparameters: {best_params}")

Best Hyperparameters: {'n_estimators': 112, 'max_depth': 14, 'min_samples_split': 3, 'min_samples_leaf': 2}


In [37]:
best_score = study.best_value
print(f"Best Accuracy: {best_score:.3f}")

Best Accuracy: -1.791


In [38]:
optuna.visualization.plot_optimization_history(study)

In [39]:
optuna.visualization.plot_parallel_coordinate(study)

In [40]:
optuna.visualization.plot_slice(study, params=['n_estimators', 'max_depth', 'min_samples_leaf', 'min_samples_split'])

In [41]:
optuna.visualization.plot_param_importances(study)

In [42]:
best_n_estimators = best_params['n_estimators']
best_max_depth = best_params['max_depth']
best_min_samples_split = best_params['min_samples_split']
best_min_samples_leaf = best_params['min_samples_leaf']

In [51]:
best_model = RandomForestRegressor(n_estimators=best_n_estimators,
max_depth=best_max_depth,
min_samples_split=best_min_samples_split,
min_samples_leaf=best_min_samples_leaf)
best_model.fit(X_train, y_train)

In [52]:
y_pred = best_model.predict(X_test)

In [53]:
mean_squared_error(y_test, y_pred)

0.13160346261091369

In [54]:
r2_score(y_test, y_pred)

0.9884708677798059