In [None]:
import yaml
 
try:
    with open("../config.yaml", "r") as file:
        config = yaml.safe_load(file)
except:
    print("Yaml configuration file not found!")

In [None]:
import pandas as pd

df = pd.read_csv(config['input_data']['file'])
df

In [None]:
df = df.drop(['PatientID', 'DoctorInCharge'], axis=1)

In [None]:
df

In [None]:
df.dtypes

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn import tree
import matplotlib.pyplot as plt

In [None]:
selected_features = [
    'MemoryComplaints',
    'BehavioralProblems',
    'FunctionalAssessment',
    'ADL',
    'MMSE'
]

X = df[selected_features]
y = df['Diagnosis']  # 0/1 for Alzheimer's

In [None]:
# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Initialize and train Decision Tree
dtree = DecisionTreeClassifier(random_state=42, max_depth=5)  # max_depth optional
dtree.fit(X_train, y_train)

# Make predictions
y_pred = dtree.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Optional: Visualize the Decision Tree
plt.figure(figsize=(16,10))
tree.plot_tree(
    dtree, 
    feature_names=selected_features, 
    class_names=['No Alzheimer', 'Alzheimer'], 
    filled=True, 
    rounded=True
)
plt.show()

In [None]:
#check feature importance via:

importances = pd.DataFrame({
    'Feature': selected_features,
    'Importance': dtree.feature_importances_
})
print(importances.sort_values(by='Importance', ascending=False))