In [1]:
# !pip install numpy==2.2.1 pandas==2.2.3 scikit-learn==1.6.0

In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import pickle

In [3]:
# load the dataset
data = pd.read_csv("diabetes.csv")

In [4]:
data.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [5]:
# splitting the data to features and target
X = data.drop(columns=["Outcome"])
y = data["Outcome"]

In [8]:
# split the data to training data and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

In [9]:
print(y.shape, y_train.shape, y_test.shape)

(768,) (614,) (154,)


In [10]:
# scale the features using Standard Scaler
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [11]:
# train the Logistic Regression model
model = LogisticRegression()

In [12]:
model.fit(X_train_scaled, y_train)

In [15]:
y_pred_train = model.predict(X_train_scaled)

accuracy = accuracy_score(y_train, y_pred_train)
print("Training accuracy:", round(accuracy, 2))

Training accuracy: 0.79


In [16]:
y_pred = model.predict(X_test_scaled)

accuracy = accuracy_score(y_test, y_pred)
print("Test accuracy:", round(accuracy, 2))

Test accuracy: 0.71


In [17]:
# save the trained model and scaler as pickle file

with open("diabetes_model.pkl", "wb") as model_file:
    pickle.dump(model, model_file)

with open("scaler.pkl", "wb") as scaler_file:
    pickle.dump(scaler, scaler_file)

**Code for Prediction**

In [18]:
import pickle
import numpy as np

with open("diabetes_model.pkl", "rb") as model_file:
    loaded_model = pickle.load(model_file)

with open("scaler.pkl", "rb") as scaler_file:
    loaded_scaler = pickle.load(scaler_file)

In [19]:
new_data_point = [6, 148, 72, 35, 0, 33.6, 0.627, 50]

In [20]:
scaled_data = loaded_scaler.transform([new_data_point])



In [25]:
scaled_data

array([[ 0.65855832,  0.85907333,  0.13906137,  0.89772544, -0.73076636,
         0.20709337,  0.45320462,  1.40678576]])

In [26]:
prediction = loaded_model.predict(scaled_data)

In [27]:
prediction

array([1])

In [29]:
print(prediction[0])

1


In [30]:
if prediction[0] == 1:
    print("Diabeteic")
else:
    print("Non Diabetic")

Diabeteic
