In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Read the data
data = pd.read_csv('diabetes.csv')

# Split data into features and target
X = data.drop('Outcome', axis=1)  # Features
y = data['Outcome']  # Target

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize decision tree classifier with hyperparameters
clf = DecisionTreeClassifier(max_depth=5, min_samples_split=2, min_samples_leaf=1)

# Fit the classifier on the training data
clf.fit(X_train, y_train)

# Predictions for training set
y_train_pred = clf.predict(X_train)

# Predictions for testing set
y_test_pred = clf.predict(X_test)

# Calculate accuracy on training set
accuracy_train = accuracy_score(y_train, y_train_pred)
print("Accuracy on Training Set:", accuracy_train)

# Calculate accuracy on testing set
accuracy_test = accuracy_score(y_test, y_test_pred)
print("Accuracy on Testing Set:", accuracy_test)

# Test with a new patient
# Example: [Pregnancies, Glucose, BloodPressure, SkinThickness, Insulin, BMI, DiabetesPedigreeFunction, Age]
new_patient = [[6, 148, 72, 35, 0, 33.6, 0.627, 50]]  
predicted_outcome = clf.predict(new_patient)
print("Predicted Outcome for New Patient:", predicted_outcome[0])


Accuracy on Training Set: 0.8420195439739414
Accuracy on Testing Set: 0.7922077922077922
Predicted Outcome for New Patient: 1


