<a href="https://colab.research.google.com/github/tejaswinirb1/ML_observations/blob/main/decisiontree.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, export_text
from sklearn.metrics import accuracy_score
import numpy as np

# Load the Breast Cancer dataset
data = load_breast_cancer()
X = data.data
y = data.target
feature_names = data.feature_names
target_names = data.target_names

# Display dataset information
print("Feature Names:", feature_names)
print("Target Names:", target_names)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Decision Tree classifier
clf = DecisionTreeClassifier(criterion="entropy", random_state=42)
clf.fit(X_train, y_train)

# Evaluate the model
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"\nAccuracy on test data: {accuracy:.2f}")

# Show the structure of the tree
tree_rules = export_text(clf, feature_names=list(feature_names))
print("\nDecision Tree Rules:\n")
print(tree_rules)

# Classify a new sample (you can modify this sample)
# For example, take the first test sample
new_sample = X_test[0].reshape(1, -1)
predicted_class = clf.predict(new_sample)
print("\nClassifying a new sample:")
print("Input features:", X_test[0])
print("Predicted class:", target_names[predicted_class[0]])


Feature Names: ['mean radius' 'mean texture' 'mean perimeter' 'mean area'
 'mean smoothness' 'mean compactness' 'mean concavity'
 'mean concave points' 'mean symmetry' 'mean fractal dimension'
 'radius error' 'texture error' 'perimeter error' 'area error'
 'smoothness error' 'compactness error' 'concavity error'
 'concave points error' 'symmetry error' 'fractal dimension error'
 'worst radius' 'worst texture' 'worst perimeter' 'worst area'
 'worst smoothness' 'worst compactness' 'worst concavity'
 'worst concave points' 'worst symmetry' 'worst fractal dimension']
Target Names: ['malignant' 'benign']

Accuracy on test data: 0.95

Decision Tree Rules:

|--- mean concave points <= 0.05
|   |--- worst radius <= 16.83
|   |   |--- area error <= 48.70
|   |   |   |--- worst texture <= 30.15
|   |   |   |   |--- class: 1
|   |   |   |--- worst texture >  30.15
|   |   |   |   |--- worst radius <= 14.43
|   |   |   |   |   |--- class: 1
|   |   |   |   |--- worst radius >  14.43
|   |   |   | 