In [1]:
# Importing  necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [2]:
df=pd.read_csv("diabetes.csv") # Read the CSV file 

In [3]:
print(df.head())  # Display the first few rows of the dataset

   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0            6      148             72             35        0  33.6   
1            1       85             66             29        0  26.6   
2            8      183             64              0        0  23.3   
3            1       89             66             23       94  28.1   
4            0      137             40             35      168  43.1   

   DiabetesPedigreeFunction  Age  Outcome  
0                     0.627   50        1  
1                     0.351   31        0  
2                     0.672   32        1  
3                     0.167   21        0  
4                     2.288   33        1  


In [4]:

X = df.drop('Outcome', axis=1)  # Split the dataset into features (X) and labels (y)
y = df['Outcome']


In [5]:
# get  the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [6]:

model = DecisionTreeClassifier(random_state=42)     # Create a Decision Tree model



In [7]:

model.fit(X_train, y_train)     # Train the model

In [8]:


y_pred = model.predict(X_test)    # Prediction on the test set


In [9]:
# get the number of people with and without diabetes in the test set
diabetes_count = y_test[y_test == 1].count()
no_diabetes_count = y_test[y_test == 0].count()


people_with_diabetes = df.loc[y_test[y_test == 1].index]

print("People with diabetes:")
print(people_with_diabetes)


print("Number of people with diabetes:", diabetes_count)
print("Number of people without diabetes:", no_diabetes_count)


People with diabetes:
     Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
667           10      111             70             27        0  27.5   
199            4      148             60             27      318  30.9   
356            1      125             50             40      167  33.3   
604            4      183              0              0        0  28.4   
213            0      140             65             26      130  42.6   
209            7      184             84             33        0  35.5   
328            2      102             86             36      120  45.5   
120            0      162             76             56      100  53.2   
363            4      146             78              0        0  38.5   
425            4      184             78             39      277  37.0   
300            0      167              0              0        0  32.3   
338            9      152             78             34      171  34.2   
579            2

In [10]:

# Evaluate the model
print("Accuracy Score:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy Score: 0.7467532467532467
Confusion Matrix:
 [[75 24]
 [15 40]]
Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.76      0.79        99
           1       0.62      0.73      0.67        55

    accuracy                           0.75       154
   macro avg       0.73      0.74      0.73       154
weighted avg       0.76      0.75      0.75       154

