In [96]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

In [20]:
df = pd.read_csv('diabetes.csv')
df.head(10)

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
5,5,116,74,0,0,25.6,0.201,30,0
6,3,78,50,32,88,31.0,0.248,26,1
7,10,115,0,0,0,35.3,0.134,29,0
8,2,197,70,45,543,30.5,0.158,53,1
9,8,125,96,0,0,0.0,0.232,54,1


In [21]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 9 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Pregnancies               768 non-null    int64  
 1   Glucose                   768 non-null    int64  
 2   BloodPressure             768 non-null    int64  
 3   SkinThickness             768 non-null    int64  
 4   Insulin                   768 non-null    int64  
 5   BMI                       768 non-null    float64
 6   DiabetesPedigreeFunction  768 non-null    float64
 7   Age                       768 non-null    int64  
 8   Outcome                   768 non-null    int64  
dtypes: float64(2), int64(7)
memory usage: 54.1 KB


In [22]:
x = df[["Pregnancies","Glucose","BMI","Age"]]
y = df["Outcome"]

In [23]:
x.head(10)

Unnamed: 0,Pregnancies,Glucose,BMI,Age
0,6,148,33.6,50
1,1,85,26.6,31
2,8,183,23.3,32
3,1,89,28.1,21
4,0,137,43.1,33
5,5,116,25.6,30
6,3,78,31.0,26
7,10,115,35.3,29
8,2,197,30.5,53
9,8,125,0.0,54


In [24]:
y.head(10)

0    1
1    0
2    1
3    0
4    1
5    0
6    1
7    0
8    1
9    1
Name: Outcome, dtype: int64

In [25]:
# Data splitting

from sklearn.model_selection import train_test_split

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.3)

In [26]:
x_train

Unnamed: 0,Pregnancies,Glucose,BMI,Age
499,6,154,29.3,39
427,1,181,34.1,38
13,1,189,30.1,59
159,17,163,40.9,47
532,1,86,41.3,29
...,...,...,...,...
750,4,136,31.2,22
56,7,187,37.7,41
737,8,65,32.0,42
493,4,125,28.9,45


In [27]:
x_test

Unnamed: 0,Pregnancies,Glucose,BMI,Age
578,10,133,27.0,36
726,1,116,36.1,25
586,8,143,34.9,41
214,9,112,34.2,36
112,1,89,31.2,23
...,...,...,...,...
17,7,107,29.6,31
246,10,122,31.2,41
195,5,158,39.4,29
523,9,130,34.2,45


In [103]:
import numpy as np

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def calculate_cost(y, y_pred):
    m = len(y)
    epsilon = 1e-15  # small constant to avoid log(0)
    y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
    cost = (-1/m) * np.sum(y * np.log(y_pred) + (1-y) * np.log(1-y_pred))
    return cost


def gradient_descent(X, y, y_pred, alpha):
    m = len(y)
    gradient = np.dot(X.T, (y_pred - y)) / m
    return alpha * gradient

def logistic_regression(X, y, alpha, epochs):
    m, n = X.shape
    X = np.column_stack((np.ones(m), X))  # Add intercept term
    theta = np.zeros(n + 1)

    for _ in range(epochs):
        z = np.dot(X, theta)
        y_pred = sigmoid(z)
        cost = calculate_cost(y, y_pred)
        gradient = gradient_descent(X, y, y_pred, alpha)
        theta -= gradient

    return theta

def predict(X, theta):
    X = np.column_stack((np.ones(X.shape[0]), X))
    y_pred = sigmoid(np.dot(X, theta))
    return (y_pred >= 0.5).astype(int)

def accuracy(y_true, y_pred):
    return np.mean(y_true == y_pred)


In [104]:
# Logistic Regression with L1 Regularization
def l1_regularization(theta, alpha):
    return alpha * np.sign(theta)

def logistic_regression_l1_regularization(X, y, alpha, epochs):
    m, n = X.shape
    X = np.column_stack((np.ones(m), X))  # Add intercept term
    theta = np.zeros(n + 1)

    for _ in range(epochs):
        z = np.dot(X, theta)
        y_pred = sigmoid(z)
        cost = calculate_cost(y, y_pred)
        gradient = gradient_descent(X, y, y_pred, alpha)
        regularization_term = l1_regularization(theta[1:], alpha)
        theta[0] -= gradient[0]
        theta[1:] -= gradient[1:] + regularization_term

    return theta


In [110]:
x_train_np = x_train.values
x_test_np = x_test.values

# Logistic Regression
alpha = 0.04
epochs = 1000

theta_lr = logistic_regression(x_train_np, y_train, alpha, epochs)
y_pred_lr = predict(x_test_np, theta_lr)
accuracy_lr = accuracy(y_test, y_pred_lr)
print("Logistic Regression Accuracy:", accuracy_lr)

# Logistic Regression with L1 Regularization
alpha_l1 = 0.04
theta_lr_l1 = logistic_regression_l1_regularization(x_train_np, y_train, alpha_l1, epochs)
y_pred_lr_l1 = predict(x_test_np, theta_lr_l1)
accuracy_lr_l1 = accuracy(y_test, y_pred_lr_l1)
print("Logistic Regression with L1 Regularization Accuracy:", accuracy_lr_l1)

Logistic Regression Accuracy: 0.6406926406926406
Logistic Regression with L1 Regularization Accuracy: 0.6536796536796536


In [89]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(x_train, y_train)

LogisticRegression()

In [90]:
x_test

Unnamed: 0,Pregnancies,Glucose,BMI,Age
668,6,98,34.0,43
324,2,112,35.7,21
624,2,108,30.8,21
690,8,107,24.6,34
473,7,136,29.9,50
...,...,...,...,...
619,0,119,32.4,24
198,4,109,34.8,26
538,0,127,36.3,23
329,6,105,30.8,37


In [91]:
y_test

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0,
       0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1,
       0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
       0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1,
       0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1,
       0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0,
       0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0,
       0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0,
       1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,
       1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0], dtype=int64)

In [97]:
i1 = model.predict_proba([[4,141,27.6,40]])
print(i1)

[[0.59618549 0.40381451]]


In [98]:
i_pred = model.predict([[4,141,27.6,40]])
print(i_pred)

[0]


In [99]:
i2 = model.predict_proba([[13,106,34.2,52]])
print(i2)

[[0.50577434 0.49422566]]


In [100]:
accuracy = model.score(x_test,y_test)
print(accuracy)

0.7316017316017316
