#### Import the required libraries

In [18]:
import seaborn as sns
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import optuna

#### Get the data

In [7]:
# Load the dataset.
df = sns.load_dataset(name='healthexp')

df.head()

Unnamed: 0,Year,Country,Spending_USD,Life_Expectancy
0,1970,Germany,252.311,70.6
1,1970,France,192.143,72.2
2,1970,Great Britain,123.993,71.9
3,1970,Japan,150.437,72.0
4,1970,USA,326.961,70.9


In [8]:
# Get the dummies for the data.
df_model = pd.get_dummies(data=df)

df_model.head()

Unnamed: 0,Year,Spending_USD,Life_Expectancy,Country_Canada,Country_France,Country_Germany,Country_Great Britain,Country_Japan,Country_USA
0,1970,252.311,70.6,False,False,True,False,False,False
1,1970,192.143,72.2,False,True,False,False,False,False
2,1970,123.993,71.9,False,False,False,True,False,False
3,1970,150.437,72.0,False,False,False,False,True,False
4,1970,326.961,70.9,False,False,False,False,False,True


In [17]:
# Get the model and target data.
X = df_model.drop(labels=['Life_Expectancy'], axis=1)
y = df_model['Life_Expectancy']

# Split the data into train and test.
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.2, random_state=20240717)

X_train.head()
# X_test.head()
# y_train.head()
# y_test .head()

Unnamed: 0,Year,Spending_USD,Country_Canada,Country_France,Country_Germany,Country_Great Britain,Country_Japan,Country_USA
204,2009,3880.842,False,True,False,False,False,False
116,1994,1420.271,False,False,False,False,True,False
164,2002,2065.133,False,False,False,False,True,False
171,2003,5726.538,False,False,False,False,False,True
261,2018,10451.386,False,False,False,False,False,True


#### Build the base model.

In [15]:
# Define the model.
rfr_base = RandomForestRegressor(random_state=20240717)

# Fit the data to the model.
rfr_base.fit(X=X_train, y=y_train)

# Make predictions on the test data.
y_pred = rfr_base.predict(X=X_test)

# Get the metrics.
mae = mean_absolute_error(y_true=y_test, y_pred=y_pred)
mse = mean_squared_error(y_true=y_test, y_pred=y_pred)
r2s = r2_score(y_true=y_test, y_pred=y_pred)

print(f'mean_absolute_error : {mae}\nmean_squared_error : {mse}\nr2_score : {r2s}')

mean_absolute_error : 0.7211863636363536
mean_squared_error : 0.9606070863636212
r2_score : 0.9136726372978682


#### Hypertune and build model with 'Optuna'.

In [None]:
# Define the 'objective' function.
def objective(trial):
    # Define the hyperparameter space.
    n_estimators = trial.suggest_int(name='n_estimators', low=100, high=1000)
    max_depth = trial.suggest_int(name='max_depth', low=10, high=50)
    min_samples_split = trial.suggest_int(name='min_sample_split', low=2, high=32)
    min_samples_leaf = trial.suggest_int(name='min_sample_leaf', low=1, high=32)

    # Build the model.
    rfr_optuna = RandomForestRegressor(n_estimators=n_estimators, 
                                       max_depth=max_depth,
                                       min_samples_split=min_samples_split, 
                                       min_samples_leaf=min_samples_leaf#, random_state=20240717
                                       )
    
    # Get cross-val score
    score_crossval = cross_val_score(estimator=rfr_optuna,
                                     X=X_train, y=y_train,
                                     cv=5, scoring='neg_mean_squared_error', n_jobs=-1)

    return score_crossval 
    

In [None]:
# Create study for the above 'objective'
study = optuna.create_study(sampler=optuna.samplers.RandomSampler(),
                            direction='maximise')

#### Resources

1. [sklearn - model evaluation and scoring](https://scikit-learn.org/stable/modules/model_evaluation.html#scoring-parameter)
2. [Optuna - trial.suggest_int](https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html)
3. [Optuna - create_study](https://optuna.readthedocs.io/en/stable/reference/generated/optuna.study.create_study.html#optuna.study.create_study)