# 🧠 Diabetes Prediction Model (Cleaned & Improved Version)
This notebook trains a Logistic Regression model to predict diabetes, saves both the model and scaler, and includes an example prediction with confidence output.

In [None]:

# 1️⃣ Import Libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
import pickle


In [None]:

# 2️⃣ Load Dataset
# Ensure the 'diabetes.csv' file is in the same folder as this notebook
df = pd.read_csv('diabetes.csv')
df.head()


In [None]:

# 3️⃣ Split Data into Features (X) and Target (Y)
X = df.drop(columns='Outcome', axis=1)
Y = df['Outcome']

# Train-test split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)


In [None]:

# 4️⃣ Scale the Data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:

# 5️⃣ Train Logistic Regression Model
model = LogisticRegression(max_iter=1000)
model.fit(X_train_scaled, Y_train)

# Evaluate accuracy
train_acc = accuracy_score(Y_train, model.predict(X_train_scaled))
test_acc = accuracy_score(Y_test, model.predict(X_test_scaled))

print(f"✅ Training Accuracy: {train_acc*100:.2f}%")
print(f"✅ Testing Accuracy: {test_acc*100:.2f}%")
print("\nClassification Report:\n", classification_report(Y_test, model.predict(X_test_scaled)))


In [None]:

# 6️⃣ Save Model and Scaler
model_path = 'C:/Projects/Deploying Machine Learning Model/trained_model.sav'
scaler_path = 'C:/Projects/Deploying Machine Learning Model/scaler.sav'

with open(model_path, 'wb') as model_file:
    pickle.dump(model, model_file)

with open(scaler_path, 'wb') as scaler_file:
    pickle.dump(scaler, scaler_file)

print("💾 Model and Scaler saved successfully!")


In [None]:

# 7️⃣ Example Prediction with Confidence Output

# Load model & scaler
loaded_model = pickle.load(open('C:/Projects/Deploying Machine Learning Model/trained_model.sav', 'rb'))
loaded_scaler = pickle.load(open('C:/Projects/Deploying Machine Learning Model/scaler.sav', 'rb'))

# Example input data (Pregnancies, Glucose, BloodPressure, SkinThickness, Insulin, BMI, DiabetesPedigreeFunction, Age)
input_data = (4, 154, 62, 31, 284, 32.8, 0.237, 23)

# Convert and reshape
input_array = np.asarray(input_data).reshape(1, -1)

# Scale input
scaled_input = loaded_scaler.transform(input_array)

# Predict
prediction = loaded_model.predict(scaled_input)
probability = loaded_model.predict_proba(scaled_input)[0][prediction[0]] * 100

print("🩸 Prediction:", "Diabetic" if prediction[0] == 1 else "Not Diabetic")
print(f"🧾 Confidence: {probability:.2f}%")
