In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import precision_recall_fscore_support
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.tree import export_graphviz
from IPython.display import SVG
from sklearn import tree
from graphviz import Source

In [None]:
df = pd.read_csv("diabetes.csv")
df

### Dropping NaN values

In [None]:
df = df.dropna()

## Looking at data correlation

In [None]:
corr = df.corr()
plt.figure(figsize=(12, 10))
heatmap = sns.heatmap(corr, annot=True, cmap='coolwarm', fmt=".2f",
            annot_kws={"fontsize": 16})  # Adjust fontsize as needed
heatmap.set_xticklabels(heatmap.get_xticklabels(),  fontsize=16)
heatmap.set_yticklabels(heatmap.get_yticklabels(), fontsize=16)
plt.title("Correlation Heatmap", fontsize=19)  # Adjust fontsize as needed
plt.show()

# Defining X, y

In [None]:
X, y = df.drop("Outcome", axis=1), df["Outcome"]
X

# Using Top 6 features for decision tree

In [None]:
X = X[["Glucose", "BMI", "Age", "Pregnancies", "DiabetesPedigreeFunction"]]

# Splitting data into train and test

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Creating our tree with max depth 3

In [None]:
dt = DecisionTreeClassifier(max_depth=3, random_state=1)
dt.fit(X_train, y_train)

p, r, f, _ = precision_recall_fscore_support(y_test, dt.predict(X_test), labels=[0, 1])
display(f"precision: {p}")
display(f"recall: {r}")
display(f"f-score: {f}")


dot = tree.export_graphviz(dt, out_file=None,
                                    feature_names=["Solids", "Chloramines", "Sulfate", "Conductivity", "Organic_carbon", "Turbidity"],
                                    class_names=["0", "1"], filled = True)

graph = Source(dot)
svg = SVG(graph.pipe(format='svg'))
# plt.savefig("tree3.jpg")
display(svg)
svg_bytes = graph.pipe(format='png')

# Specify the file path where you want to save the SVG file
file_path = "decision_tree3.png"

# Write the SVG bytes to a file
with open(file_path, "wb") as f:
    f.write(svg_bytes)

# Creating our tree with max depth 9

In [None]:
dt = DecisionTreeClassifier(max_depth=9, random_state=1)
dt.fit(X_train, y_train)

p, r, f, _ = precision_recall_fscore_support(y_test, dt.predict(X_test), labels=[0, 1])
display(f"precision: {p}")
display(f"recall: {r}")
display(f"f-score: {f}")

dot = tree.export_graphviz(dt, out_file=None,
                                    feature_names=["Solids", "Chloramines", "Sulfate", "Conductivity", "Organic_carbon", "Turbidity"],
                                    class_names=["0", "1"], filled = True)

graph = Source(dot)
svg = SVG(graph.pipe(format='svg'))
display(svg)
svg_bytes = graph.pipe(format='png')

# Specify the file path where you want to save the SVG file
file_path = "decision_tree5.png"

# Write the SVG bytes to a file
with open(file_path, "wb") as f:
    f.write(svg_bytes)