In [16]:
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import r2_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error

In [3]:
url = "https://raw.githubusercontent.com/selva86/datasets/master/BostonHousing.csv"
df = pd.read_csv(url)
print(df.head())


      crim    zn  indus  chas    nox     rm   age     dis  rad  tax  ptratio  \
0  0.00632  18.0   2.31     0  0.538  6.575  65.2  4.0900    1  296     15.3   
1  0.02731   0.0   7.07     0  0.469  6.421  78.9  4.9671    2  242     17.8   
2  0.02729   0.0   7.07     0  0.469  7.185  61.1  4.9671    2  242     17.8   
3  0.03237   0.0   2.18     0  0.458  6.998  45.8  6.0622    3  222     18.7   
4  0.06905   0.0   2.18     0  0.458  7.147  54.2  6.0622    3  222     18.7   

        b  lstat  medv  
0  396.90   4.98  24.0  
1  396.90   9.14  21.6  
2  392.83   4.03  34.7  
3  394.63   2.94  33.4  
4  396.90   5.33  36.2  


In [4]:
X = df.drop('medv', axis=1)
y = df['medv']

In [5]:
df.head()

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,b,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


In [6]:
X = df.iloc[:,0:13]
y = df.iloc[:,13]

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state=42)

In [10]:
rt = DecisionTreeRegressor(criterion = 'friedman_mse', max_depth=5)

In [11]:
rt.fit(X_train,y_train)

In [12]:
y_pred = rt.predict(X_test)

In [14]:
y_pred

array([22.72619048, 30.05      , 19.73333333, 20.35555556, 15.91052632,
       22.72619048, 15.91052632, 15.91052632, 22.72619048, 20.35555556,
       19.73333333, 19.73333333,  9.2037037 , 22.72619048, 20.35555556,
       24.45      , 19.73333333,  9.2037037 , 44.65555556, 14.135     ,
       22.72619048, 22.72619048, 15.91052632, 25.98518519, 14.135     ,
       14.135     , 20.35555556, 14.135     , 15.91052632, 20.35555556,
       19.73333333, 22.72619048, 17.8       , 20.35555556, 15.91052632,
       15.91052632, 33.1       , 20.35555556, 22.3       , 22.72619048,
       19.6       , 25.98518519, 44.65555556, 20.35555556, 22.72619048,
       14.135     , 15.91052632, 22.72619048, 15.91052632, 30.05      ,
       20.35555556, 33.1       , 15.91052632, 25.98518519, 44.65555556,
       22.72619048, 15.91052632, 30.05      , 22.72619048, 22.3       ,
       25.98518519, 33.1       , 30.05      , 20.35555556, 30.05      ,
       15.91052632, 14.135     , 22.72619048, 30.05      , 15.91

In [13]:
r2_score(y_test,y_pred)

0.8851372725318482

# Hyperparameter Tuning

In [18]:
param_grid = {
    'max_depth':[2,4,8,10,None],
    'criterion':['friedman_mse','absolute_error','squared_error'],
    'max_features':[0.25,0.5,1.0],
    'min_samples_split':[0.25,0.5,1.0]
}

In [19]:

grid_search = GridSearchCV(rt, param_grid, cv=5, scoring='r2',)
grid_search.fit(X_train, y_train)

best_model = grid_search.best_estimator_
print("Best Parameters:", grid_search.best_params_)

Best Parameters: {'criterion': 'squared_error', 'max_depth': 4, 'max_features': 0.5, 'min_samples_split': 0.25}


In [24]:
best_model.score(X_test, y_test)

0.600659658880479

In [31]:
y_pred = best_model.predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score, classification_report

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)


y_pred_class = np.round(y_pred).astype(int)  # to convert to class labels of 0 or 1
y_test_class = np.round(y_test).astype(int)

accuracy = accuracy_score(y_test_class, y_pred_class)
class_report = classification_report(y_test_class, y_pred_class)


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [35]:
print(f"Mean Squared Error: {mse:.4f}")
print(f"R² Score: {r2:.4f}")
print(f"Accuracy: {accuracy:.4f}")
print("\nClassification Report:\n", class_report)

Mean Squared Error: 29.2851
R² Score: 0.6007
Accuracy: 0.0294

Classification Report:
               precision    recall  f1-score   support

           5       0.00      0.00      0.00         1
           7       0.00      0.00      0.00         2
          10       0.00      0.00      0.00         4
          11       0.00      0.00      0.00         2
          12       0.00      0.00      0.00         0
          13       0.00      0.00      0.00         4
          14       0.00      0.00      0.00         5
          15       0.00      0.00      0.00         4
          16       0.00      0.00      0.00         2
          17       0.04      0.17      0.07         6
          18       0.00      0.00      0.00         6
          19       0.00      0.00      0.00         6
          20       0.00      0.00      0.00        11
          21       0.00      0.00      0.00         8
          22       0.05      0.50      0.09         4
          23       0.00      0.00      0.00     