# CS5830 Project 8: Decision Trees/Neural Networks

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, export_graphviz
from sklearn.metrics import precision_recall_fscore_support

from IPython.display import SVG
from graphviz import Source

from pathlib import Path

output_dir = Path('output')

In [None]:
df = pd.read_csv("diabetes.csv")
df.info()

In [None]:
X = df.drop("Outcome", axis=1)
y = df["Outcome"]
X

## Explore feature correlations

In [None]:
corr = X.corr()
plt.figure(figsize=(12, 10))
heatmap = sns.heatmap(
    corr,
    annot=True,
    cmap='coolwarm',
    fmt=".2f",
    annot_kws={"fontsize": 16}
)  # Adjust fontsize as needed

heatmap.set_xticklabels(heatmap.get_xticklabels(),  fontsize=16)
heatmap.set_yticklabels(heatmap.get_yticklabels(), fontsize=16)
plt.title("Correlation Heatmap", fontsize=19)  # Adjust fontsize as needed
plt.show()

In [None]:
X = X[["Glucose", "BMI", "Age", "Pregnancies", "DiabetesPedigreeFunction"]]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
display(X_train)

## Decision Tree: `max_depth=3`

In [None]:
dt = DecisionTreeClassifier(max_depth=3, random_state=1)
dt.fit(X_train, y_train)

p, r, f, _ = precision_recall_fscore_support(y_test, dt.predict(X_test), labels=[0, 1])
display(pd.DataFrame({
    "Precision": p,
    "Recall": r,
    "F1": f
    }, index=["Class 0", "Class 1"]).T)

dot = export_graphviz(
    dt,
    out_file=None,
    feature_names=["Glucose", "BMI", "Age", "Pregnancies", "DiabetesPedigreeFunction"],
    class_names=["0", "1"],
    filled = True
)

graph = Source(dot)
svg = SVG(graph.pipe(format='svg'))
# plt.savefig("tree3.jpg")
display(svg)
svg_bytes = graph.pipe(format='png')

filename = "decision_tree3.png"
with open(output_dir / filename, "wb") as f:
    f.write(svg_bytes)

## Decision Tree: `max_depth=9`

In [None]:
dt = DecisionTreeClassifier(max_depth=9, random_state=1)
dt.fit(X_train, y_train)

p, r, f, _ = precision_recall_fscore_support(y_test, dt.predict(X_test), labels=[0, 1])
display(pd.DataFrame({
    "Precision": p,
    "Recall": r,
    "F1": f
    }, index=["Class 0", "Class 1"]).T)

dot = export_graphviz(
    dt,
    out_file=None,
    feature_names=["Glucose", "BMI", "Age", "Pregnancies", "DiabetesPedigreeFunction"],
    class_names=["0", "1"],
    filled = True
)

graph = Source(dot)
svg = SVG(graph.pipe(format='svg'))
display(svg)
svg_bytes = graph.pipe(format='png')

# Write the SVG bytes to a file
file_path = "decision_tree5.png"
with open(output_dir / file_path, "wb") as f:
    f.write(svg_bytes)

## Neural Networks