#### Regression Assignment
- 1. Domain -- Supervise Learning
- 2. Type - Regression
- 3. Multiple Linear Regression

### Key Parameters:

- criterion - Split quality metric: 'squared_error', 'absolute_error', 'friedman_mse', 'poisson'
- splitter - Split strategy: 'best' (deterministic) or 'random' (adds randomness)
- max_depth - Maximum tree depth (controls overfitting)
- min_samples_split - Minimum samples to split a node (int or fraction)
- min_samples_leaf - Minimum samples at leaf nodes
- max_features - Features to consider per split
- max_leaf_nodes - Maximum number of leaves
- min_impurity_decrease - Minimum improvement needed to split
- ccp_alpha - Cost complexity pruning parameter
- min_weight_fraction_leaf - Minimum weighted fraction at leaves

In [None]:
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error


In [5]:
df = pd.read_csv('insurance_pre.csv')
df = pd.get_dummies(df, drop_first=True)
X = df.drop('charges', axis=1)
y = df['charges']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Define different parameter combinations
param_combinations = [
    # Default configuration
    {'criterion': 'squared_error', 'splitter': 'best', 'max_depth': None, 'min_samples_split': 2},
    
    # Different criterion
    {'criterion': 'squared_error', 'splitter': 'best', 'max_depth': 5},
    {'criterion': 'absolute_error', 'splitter': 'best', 'max_depth': 5},
    {'criterion': 'friedman_mse', 'splitter': 'best', 'max_depth': 5},
    {'criterion': 'poisson', 'splitter': 'best', 'max_depth': 5},
    
    # Different max_depth values
    {'criterion': 'squared_error', 'max_depth': 3},
    {'criterion': 'squared_error', 'max_depth': 7},
    {'criterion': 'squared_error', 'max_depth': 10},
    
    # Different splitter
    {'criterion': 'squared_error', 'splitter': 'random', 'max_depth': 5, 'random_state': 42},
    
    # min_samples_split variations
    {'criterion': 'squared_error', 'max_depth': 5, 'min_samples_split': 5},
    {'criterion': 'squared_error', 'max_depth': 5, 'min_samples_split': 20},
    {'criterion': 'squared_error', 'max_depth': 5, 'min_samples_split': 0.1},  # 10% of samples
    
    # min_samples_leaf variations
    {'criterion': 'squared_error', 'max_depth': 5, 'min_samples_leaf': 5},
    {'criterion': 'squared_error', 'max_depth': 5, 'min_samples_leaf': 10},
    
    # max_features variations
    {'criterion': 'squared_error', 'max_depth': 5, 'max_features': 1.0},
    {'criterion': 'squared_error', 'max_depth': 5, 'max_features': 'sqrt'},
    
    # max_leaf_nodes
    {'criterion': 'squared_error', 'max_leaf_nodes': 10},
    {'criterion': 'squared_error', 'max_leaf_nodes': 20},
    {'criterion': 'squared_error', 'max_leaf_nodes': 50},
    
    # min_impurity_decrease
    {'criterion': 'squared_error', 'max_depth': 5, 'min_impurity_decrease': 0.01},
    {'criterion': 'squared_error', 'max_depth': 5, 'min_impurity_decrease': 0.05},
    
    # ccp_alpha (cost complexity pruning)
    {'criterion': 'squared_error', 'max_depth': None, 'ccp_alpha': 0.01},
    {'criterion': 'squared_error', 'max_depth': None, 'ccp_alpha': 0.05},
    
    # min_weight_fraction_leaf
    {'criterion': 'squared_error', 'max_depth': 5, 'min_weight_fraction_leaf': 0.05},
]

In [7]:
rscore = []
def run_test_scenario(X_train, X_test, y_train, y_test, scenario):
    test_id = scenario.pop('test_id')
    purpose = scenario.pop('purpose')
    
    #print(f"\nRunning {test_id}: {purpose}")
    print(f"Parameters:  {test_id} : {scenario}")
    
    # Create and train model
    model = DecisionTreeRegressor(**scenario)
    model.fit(X_train, y_train)
    
    # Make predictions
    y_pred = model.predict(X_test)
    
    # Calculate metrics
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    results = {
        'test_id': test_id,
        'purpose': purpose,
        'mse': mse,
        'rmse': rmse,
        'mae': mae,
        'r2': r2,
        'feature_importances': model.feature_importances_
    }
    
    if scenario.get('oob_score'):
        results['oob_score'] = model.oob_score_
    print(f"Results - r2: R²: {r2:.4f}")

    rscore.append(r2)
    return results


In [13]:
results = []
for idx, params in enumerate(param_combinations):
    print(f"\n--- Configuration {idx} ---")
    print(f"Parameters: {params}")
    
    try:
        # Create and train model
        model = DecisionTreeRegressor(**params)
        model.fit(X_train, y_train)
        
        # Make predictions
        y_pred = model.predict(X_test)
        
        # Calculate metrics
        mse = mean_squared_error(y_test, y_pred)
        mae = mean_absolute_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)
        
        # Tree properties
        n_leaves = model.get_n_leaves()
        depth = model.get_depth()
        
        print(f"Tree depth: {depth}")
        print(f"Number of leaves: {n_leaves}")
        print(f"MSE: {mse:.4f}")
        print(f"MAE: {mae:.4f}")
        print(f"R² Score: {r2:.4f}")
        
        results.append({
            'config': idx,
            'params': str(params)[:50] + '...' if len(str(params)) > 50 else str(params),
            'criterion': params.get('criterion', 'squared_error'),
            'max_depth': params.get('max_depth', 'None'),
            'tree_depth': depth,
            'n_leaves': n_leaves,
            'mse': mse,
            'mae': mae,
            'r2': r2
        })
    except Exception as e:
        print(f"Error: {e}")
        continue



--- Configuration 0 ---
Parameters: {'criterion': 'squared_error', 'splitter': 'best', 'max_depth': None, 'min_samples_split': 2}
Tree depth: 20
Number of leaves: 1068
MSE: 42790070.8761
MAE: 3105.9337
R² Score: 0.7244

--- Configuration 1 ---
Parameters: {'criterion': 'squared_error', 'splitter': 'best', 'max_depth': 5}
Tree depth: 5
Number of leaves: 32
MSE: 25840628.5493
MAE: 2891.6904
R² Score: 0.8336

--- Configuration 2 ---
Parameters: {'criterion': 'absolute_error', 'splitter': 'best', 'max_depth': 5}
Tree depth: 5
Number of leaves: 32
MSE: 21766037.6094
MAE: 1958.8165
R² Score: 0.8598

--- Configuration 3 ---
Parameters: {'criterion': 'friedman_mse', 'splitter': 'best', 'max_depth': 5}
Tree depth: 5
Number of leaves: 32
MSE: 25840628.5493
MAE: 2891.6904
R² Score: 0.8336

--- Configuration 4 ---
Parameters: {'criterion': 'poisson', 'splitter': 'best', 'max_depth': 5}
Tree depth: 5
Number of leaves: 32
MSE: 25873031.7285
MAE: 2890.2313
R² Score: 0.8333

--- Configuration 5 ---
P