<a href="https://colab.research.google.com/github/mukthipriya/aiml-intership/blob/main/task_5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.tree import DecisionTreeClassifier, export_graphviz
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import numpy as np

# Load data
df = pd.read_csv("heart.csv")  # Ensure this file exists in your directory

X = df.drop('target', axis=1)
y = df['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Decision Tree
dtree = DecisionTreeClassifier(random_state=42)
dtree.fit(X_train, y_train)
y_pred = dtree.predict(X_test)
print("Decision Tree Accuracy:", accuracy_score(y_test, y_pred))

# Visualize Decision Tree (requires graphviz installed on your system)
try:
    import graphviz
    dot_data = export_graphviz(
        dtree, out_file=None,
        feature_names=X.columns,
        class_names=['No Disease', 'Disease'],
        filled=True, rounded=True,
        special_characters=True
    )
    graph = graphviz.Source(dot_data)
    graph.render("decision_tree")  # creates a PDF
    # graph.view()  # Uncomment to open the PDF automatically
except Exception as e:
    print("Graphviz visualization skipped:", e)

# Pruned Decision Tree
dtree_pruned = DecisionTreeClassifier(max_depth=3, random_state=42)
dtree_pruned.fit(X_train, y_train)
print("Pruned Tree Accuracy:", accuracy_score(y_test, dtree_pruned.predict(X_test)))

# Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
rf_pred = rf.predict(X_test)
print("Random Forest Accuracy:", accuracy_score(y_test, rf_pred))

# Feature Importances
importances = rf.feature_importances_
indices = np.argsort(importances)[::-1]
plt.figure(figsize=(10,6))
plt.title("Feature Importances")
plt.bar(range(X.shape[1]), importances[indices])
plt.xticks(range(X.shape[1]), X.columns[indices], rotation=90)
plt.show()

# Cross-validation
cv_scores = cross_val_score(rf, X, y, cv=5)
print("Random Forest CV Accuracy: %.2f (+/- %.2f)" % (cv_scores.mean(), cv_scores.std()))