#### Import the required libraries

In [10]:
import seaborn as sns
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

import optuna

#### Get the data

In [7]:
# Load the dataset.
df = sns.load_dataset(name='healthexp')

df.head()

Unnamed: 0,Year,Country,Spending_USD,Life_Expectancy
0,1970,Germany,252.311,70.6
1,1970,France,192.143,72.2
2,1970,Great Britain,123.993,71.9
3,1970,Japan,150.437,72.0
4,1970,USA,326.961,70.9


In [8]:
# Get the dummies for the data.
df_model = pd.get_dummies(data=df)

df_model.head()

Unnamed: 0,Year,Spending_USD,Life_Expectancy,Country_Canada,Country_France,Country_Germany,Country_Great Britain,Country_Japan,Country_USA
0,1970,252.311,70.6,False,False,True,False,False,False
1,1970,192.143,72.2,False,True,False,False,False,False
2,1970,123.993,71.9,False,False,False,True,False,False
3,1970,150.437,72.0,False,False,False,False,True,False
4,1970,326.961,70.9,False,False,False,False,False,True


In [12]:
# Get the model and target data.
X = df_model.drop(labels=['Life_Expectancy'], axis=1)
y = df_model['Life_Expectancy']

# Split the data into train and test.
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.2, random_state=20240717)

#### Build the base model.

In [15]:
# Define the model.
rfr = RandomForestRegressor(random_state=20240717)

# Fit the data to the model.
rfr.fit(X=X_train, y=y_train)

# Make predictions on the test data.
y_pred = rfr.predict(X=X_test)

# Get the metrics.
mae = mean_absolute_error(y_true=y_test, y_pred=y_pred)
mse = mean_squared_error(y_true=y_test, y_pred=y_pred)
r2s = r2_score(y_true=y_test, y_pred=y_pred)

print(f'mean_absolute_error : {mae}\nmean_squared_error : {mse}\nr2_score : {r2s}')

mean_absolute_error : 0.7211863636363536
mean_squared_error : 0.9606070863636212
r2_score : 0.9136726372978682
