
ML Lab Activity 5: Decision Tree Classification without Pruning

Problem: Classifying Breast Cancer Tumors (Malignant vs. Benign)

Objective:
Train a Decision Tree classifier on the Breast Cancer dataset, apply pruning, and evaluate the model using accuracy, confusion matrix, precision, recall, and F1-score.


In [None]:
from sklearn.datasets import load_breast_cancer
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

# Load the dataset
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)

In [None]:
# Show first few rows and class distribution
print(X.head())
print("\nTarget labels:", data.target_names)
print("Class distribution:\n", y.value_counts())

In [None]:
# Split the datasets into train and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
# Train the Decicion Tree
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

In [None]:
# Visualize the Decision Tree
plt.figure(figsize=(20,10))
plot_tree(clf, filled=True, feature_names=data.feature_names, class_names=data.target_names)
plt.title("Decision Tree (No Pruning)")
plt.show()

In [None]:
#Evualation Metrix
print("Accuracy (no pruning):", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))