In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [3]:
dataset = pd.read_csv('insurance_pre.csv')

In [5]:
dataset

Unnamed: 0,age,sex,bmi,children,smoker,charges
0,19,female,27.900,0,yes,16884.92400
1,18,male,33.770,1,no,1725.55230
2,28,male,33.000,3,no,4449.46200
3,33,male,22.705,0,no,21984.47061
4,32,male,28.880,0,no,3866.85520
...,...,...,...,...,...,...
1333,50,male,30.970,3,no,10600.54830
1334,18,female,31.920,0,no,2205.98080
1335,18,female,36.850,0,no,1629.83350
1336,21,female,25.800,0,no,2007.94500


In [7]:
dataset = pd.get_dummies(dataset,drop_first=True)
dataset

Unnamed: 0,age,bmi,children,charges,sex_male,smoker_yes
0,19,27.900,0,16884.92400,False,True
1,18,33.770,1,1725.55230,True,False
2,28,33.000,3,4449.46200,True,False
3,33,22.705,0,21984.47061,True,False
4,32,28.880,0,3866.85520,True,False
...,...,...,...,...,...,...
1333,50,30.970,3,10600.54830,True,False
1334,18,31.920,0,2205.98080,False,False
1335,18,36.850,0,1629.83350,False,False
1336,21,25.800,0,2007.94500,False,False


In [9]:
dataset.columns

Index(['age', 'bmi', 'children', 'charges', 'sex_male', 'smoker_yes'], dtype='object')

In [11]:
independent = dataset[['age', 'bmi', 'children', 'sex_male', 'smoker_yes']]

In [13]:
dependent = dataset[['charges']]

In [15]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(independent,dependent,test_size=0.30,random_state=0)

In [43]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor

# Corrected param_grid
param_grid = {
    'n_estimators': [10, 50, 100, 200],  # Number of trees in the forest
    'criterion': ['squared_error', 'absolute_error', 'friedman_mse', 'poisson'],  # Valid criteria for regression
    'max_features': ['sqrt', 'log2', None],  # Number of features considered for split
    'max_depth': [None, 10, 20, 30],  # Maximum depth of trees
    'min_samples_split': [2, 5, 10],  # Minimum samples required to split a node
    'min_samples_leaf': [1, 2, 4],  # Minimum samples required in a leaf node
    'bootstrap': [True, False],  # Whether to bootstrap samples
}

# Perform grid search
grid = GridSearchCV(RandomForestRegressor(), param_grid, refit=True, verbose=3, n_jobs=-1)
grid.fit(x_train, y_train)

# Display results
print("Best Parameters:", grid.best_params_)
print("Best Score:", grid.best_score_)


Fitting 5 folds for each of 3456 candidates, totalling 17280 fits


  return fit_method(estimator, *args, **kwargs)


Best Parameters: {'bootstrap': True, 'criterion': 'absolute_error', 'max_depth': None, 'max_features': None, 'min_samples_leaf': 4, 'min_samples_split': 2, 'n_estimators': 50}
Best Score: 0.8411108887330089


In [45]:
result = grid.cv_results_

In [47]:
grid_predictions = grid.predict(x_test)

In [49]:
from sklearn.metrics import r2_score
r_score = r2_score(y_test,grid_predictions)

In [51]:
r_score

0.8905870139484385

In [53]:
table = pd.DataFrame.from_dict(result)

In [55]:
table

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_bootstrap,param_criterion,param_max_depth,param_max_features,param_min_samples_leaf,param_min_samples_split,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.032751,0.002249,0.004606,0.000375,True,squared_error,,sqrt,1,2,10,"{'bootstrap': True, 'criterion': 'squared_erro...",0.837039,0.790241,0.789425,0.787527,0.756303,0.792107,0.025817,2811
1,0.142138,0.004201,0.009205,0.000746,True,squared_error,,sqrt,1,2,50,"{'bootstrap': True, 'criterion': 'squared_erro...",0.858324,0.786312,0.809484,0.826226,0.762956,0.808660,0.032754,2483
2,0.278360,0.004393,0.013408,0.000546,True,squared_error,,sqrt,1,2,100,"{'bootstrap': True, 'criterion': 'squared_erro...",0.861823,0.795368,0.809467,0.818446,0.765621,0.810145,0.031435,2396
3,0.538769,0.009894,0.022430,0.000740,True,squared_error,,sqrt,1,2,200,"{'bootstrap': True, 'criterion': 'squared_erro...",0.864152,0.792615,0.802802,0.832151,0.768233,0.811991,0.033172,2291
4,0.029931,0.000959,0.004406,0.000376,True,squared_error,,sqrt,1,5,10,"{'bootstrap': True, 'criterion': 'squared_erro...",0.842392,0.802983,0.814333,0.816451,0.767890,0.808810,0.024187,2477
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3451,0.678710,0.006552,0.016519,0.002700,False,poisson,30,,4,5,200,"{'bootstrap': False, 'criterion': 'poisson', '...",0.833919,0.762870,0.799049,0.757367,0.749263,0.780494,0.031684,3005
3452,0.038404,0.000695,0.004100,0.000206,False,poisson,30,,4,10,10,"{'bootstrap': False, 'criterion': 'poisson', '...",0.832588,0.767274,0.806582,0.762040,0.751991,0.784095,0.030520,2916
3453,0.170216,0.004566,0.007916,0.000491,False,poisson,30,,4,10,50,"{'bootstrap': False, 'criterion': 'poisson', '...",0.832588,0.767274,0.806582,0.762068,0.751991,0.784101,0.030516,2914
3454,0.327986,0.006115,0.010321,0.001372,False,poisson,30,,4,10,100,"{'bootstrap': False, 'criterion': 'poisson', '...",0.832588,0.767274,0.806582,0.762085,0.751991,0.784104,0.030513,2909
