# Diabetes Prediction using Machine Learning
Binary classification project.

In [None]:
# Install libraries (run if needed)
!pip install numpy pandas matplotlib seaborn scikit-learn

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [None]:
# Load dataset (download diabetes.csv and place in same folder)
df = pd.read_csv("diabetes.csv")
df.head()

In [None]:
# Dataset info
print(df.shape)
print(df.describe())
print(df['Outcome'].value_counts())

In [None]:
# Split features & labels
X = df.drop('Outcome', axis=1)
y = df['Outcome']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

In [None]:
# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
# Train model
model = LogisticRegression()
model.fit(X_train, y_train)

In [None]:
# Predictions
y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

In [None]:
# Predict custom input
sample = np.array([[2,120,70,20,79,25.0,0.5,33]])
sample = scaler.transform(sample)

prediction = model.predict(sample)

if prediction[0] == 1:
    print("Diabetic")
else:
    print("Not Diabetic")