In [1]:
import pandas as pd
import itertools
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score

In [2]:
criteria = ["squared_error", "friedman_mse", "absolute_error", "poisson"]
max_features = ["sqrt", "log2", None]
splitters = ["best","random"]

In [3]:
results = []

In [4]:
dataset = pd.read_csv("insurance_pre.csv")
dataset

Unnamed: 0,age,sex,bmi,children,smoker,charges
0,19,female,27.900,0,yes,16884.92400
1,18,male,33.770,1,no,1725.55230
2,28,male,33.000,3,no,4449.46200
3,33,male,22.705,0,no,21984.47061
4,32,male,28.880,0,no,3866.85520
...,...,...,...,...,...,...
1333,50,male,30.970,3,no,10600.54830
1334,18,female,31.920,0,no,2205.98080
1335,18,female,36.850,0,no,1629.83350
1336,21,female,25.800,0,no,2007.94500


In [5]:
dataset = pd.get_dummies(dataset,drop_first = True)
dataset = dataset.astype(int)
dataset 

Unnamed: 0,age,bmi,children,charges,sex_male,smoker_yes
0,19,27,0,16884,0,1
1,18,33,1,1725,1,0
2,28,33,3,4449,1,0
3,33,22,0,21984,1,0
4,32,28,0,3866,1,0
...,...,...,...,...,...,...
1333,50,30,3,10600,1,0
1334,18,31,0,2205,0,0
1335,18,36,0,1629,0,0
1336,21,25,0,2007,0,0


In [6]:
independent = dataset[['age', 'bmi', 'children','sex_male','smoker_yes']]
dependent = dataset[['charges']]

In [7]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(independent, dependent,test_size = 0.30, random_state = 0)
x_train

Unnamed: 0,age,bmi,children,sex_male,smoker_yes
1163,18,28,0,0,0
196,39,32,0,0,0
438,52,46,5,0,0
183,44,26,0,0,0
1298,33,27,2,1,0
...,...,...,...,...,...
763,27,26,0,1,0
835,42,35,2,1,0
1216,40,25,0,1,0
559,19,35,0,1,0


In [8]:
for criterion, max_feature, splitter in itertools.product(criteria, max_features, splitters):
    try:
        # Create and train the model
        model = DecisionTreeRegressor(
            criterion=criterion,
            max_features=max_feature,
            splitter=splitter,
            random_state=0
        )
        model.fit(x_train, y_train)

        # Predict and calculate R² score
        y_pred = model.predict(x_test)
        score = r2_score(y_test, y_pred)

        # Store result
        results.append({
            "criterion": criterion,
            "max_features": max_feature,
            "splitter": splitter,
            "r2_score": score
        })

    except Exception as e:
        # Log errors gracefully (e.g., for unsupported combinations)
        results.append({
            "criterion": criterion,
            "max_features": max_feature,
            "splitter": splitter,
            "r2_score": None,
            "error": str(e)
        })


In [9]:
df_results = pd.DataFrame(results)

# Save to Excel
df_results.to_excel("decision_tree_results.xlsx", index=False)

print("✅ Results saved to 'decision_tree_results.xlsx'")

✅ Results saved to 'decision_tree_results.xlsx'
