In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Step 1: Load the Dataset
iris = datasets.load_iris()
print(iris.DESCR)

df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['target'] = iris.target
print(iris.target_names)

# Step 2: Exploratory Data Analysis (EDA)
print("First 5 rows of dataset:")
print(df.head())

print("\nDataset Summary:")
print(df.describe())

print("\nTarget Value Counts:")
print(df['target'].value_counts())

# Pairplot Visualization
sns.pairplot(df, hue='target', diag_kind='kde')
plt.show()

# Correlation Heatmap
plt.figure(figsize=(8, 6))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm', linewidths=0.5)
plt.title("Feature Correlation Heatmap")
plt.show()

# Step 3: Splitting the Dataset into Train and Test
X = df.drop(columns=['target'])
y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(X)


In [None]:

# Step 4: Train Decision Tree Model
dt_model = DecisionTreeClassifier(criterion='gini',  random_state=42)
dt_model.fit(X_train, y_train)

# Step 5: Visualizing the Decision Tree
plt.figure(figsize=(12, 8))
plot_tree(dt_model, feature_names=iris.feature_names, class_names=iris.target_names, filled=True)
plt.title("Decision Tree Structure")
plt.show()

# Step 6: Model Evaluation
y_pred = dt_model.predict(X_test)

print("\nConfusion Matrix:")
conf_matrix = confusion_matrix(y_test, y_pred)
print(conf_matrix)

# Visualizing the Confusion Matrix
plt.figure(figsize=(6, 4))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=iris.target_names, yticklabels=iris.target_names)
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix")
plt.show()

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

print("\nModel Accuracy:", accuracy_score(y_test, y_pred))