###Question 6: Write a Python program to:
###● Load the Iris Dataset
###● Train a Decision Tree Classifier using the Gini criterion
###● Print the model's accuracy and feature importances

In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

In [2]:
# Load the Iris Dataset
iris = load_iris()
X = iris.data
y = iris.target

In [3]:
# Spliting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Train a Decision Tree Classifier using Gini criterion
dt_model = DecisionTreeClassifier(criterion="gini", random_state=42)
dt_model.fit(X_train, y_train)

# Make predictions
y_pred = dt_model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)

In [4]:
# Print the model's accuracy and feature importances

print("Model Accuracy:", accuracy) # model's accuracy

print("Feature Importances:")
for feature, importance in zip(iris.feature_names, dt_model.feature_importances_):
    print(f"{feature}: {importance:.4f}") # feature importances

Model Accuracy: 1.0
Feature Importances:
sepal length (cm): 0.0000
sepal width (cm): 0.0167
petal length (cm): 0.9061
petal width (cm): 0.0772


###Question 7: Write a Python program to:
###● Load the Iris Dataset
###● Train a Decision Tree Classifier with max_depth=3 and compare its accuracy to a fully-grown tree.


In [5]:
# Load the Iris dataset

iris = load_iris()
X = iris.data
y = iris.target

In [6]:
### Train a Decision Tree Classifier with max_depth=3
### and compare its accuracy to a fully-grown tree.

# Split the dataset into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Decision Tree with max_depth = 3

dt_limited = DecisionTreeClassifier(max_depth=3, random_state=42)
dt_limited.fit(X_train, y_train)
y_pred_limited = dt_limited.predict(X_test)
accuracy_limited = accuracy_score(y_test, y_pred_limited)

# Fully-grown Decision Tree (no depth restriction)

dt_full = DecisionTreeClassifier(random_state=42)
dt_full.fit(X_train, y_train)
y_pred_full = dt_full.predict(X_test)
accuracy_full = accuracy_score(y_test, y_pred_full)

# Print comparison results

print("Decision Tree with max_depth = 3 Accuracy:", accuracy_limited)
print("Fully-grown Decision Tree Accuracy:", accuracy_full)

Decision Tree with max_depth = 3 Accuracy: 1.0
Fully-grown Decision Tree Accuracy: 1.0


###Question 8: Write a Python program to:
### ● Load the Boston Housing Dataset
### ● Train a Decision Tree Regressor
### ● Print the Mean Squared Error (MSE) and feature importances

In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error

In [12]:
# Load Boston Housing CSV

df = pd.read_csv("boston_housing.csv")

In [16]:
df.head()

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,b,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


In [18]:
# Train a Decision Tree Regressor

X = df.drop("medv", axis=1)   # target column
y = df["medv"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

model = DecisionTreeRegressor(random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

In [19]:
# Print the Mean Squared Error (MSE) and feature importances

print("MSE:", mean_squared_error(y_test, y_pred))

print("\nFeature Importances:")
for feature, importance in zip(X.columns, model.feature_importances_):
    print(f"{feature}: {importance:.4f}")

MSE: 10.416078431372549

Feature Importances:
crim: 0.0513
zn: 0.0034
indus: 0.0058
chas: 0.0000
nox: 0.0271
rm: 0.6003
age: 0.0136
dis: 0.0707
rad: 0.0019
tax: 0.0125
ptratio: 0.0110
b: 0.0090
lstat: 0.1933


###Question 9: Write a Python program to:
###● Load the Iris Dataset
###● Tune the Decision Tree's max_depth and min_samples_split using GridSearchCV
###● Print the best parameters and the resulting model accuracy

In [22]:
# Import required libraries

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

In [23]:
# Load the Iris dataset

iris = load_iris()
X = iris.data
y = iris.target

In [24]:
# Tune the Decision Tree's max_depth
# and min_samples_split using GridSearchCV

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Define the Decision Tree model
dt = DecisionTreeClassifier(random_state=42)

# Define the parameter grid
param_grid = {
    "max_depth": [None, 2, 3, 4, 5],
    "min_samples_split": [2, 5, 10]
}

# Apply GridSearchCV
grid_search = GridSearchCV(
    estimator=dt,
    param_grid=param_grid,
    cv=5,
    scoring="accuracy"
)

grid_search.fit(X_train, y_train)

best_model = grid_search.best_estimator_

# Make predictions using the best model
y_pred = best_model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)

In [25]:
# Print the best parameters and the resulting model accuracy

print("Best Parameters:", grid_search.best_params_)
print("Model Accuracy:", accuracy)

Best Parameters: {'max_depth': None, 'min_samples_split': 2}
Model Accuracy: 1.0
