### Diabetes Prediction using Pima Indians Dataset

In [34]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [63]:
# Load the Pima Indians Diabetes Dataset
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
columns = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 
           'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome']
data = pd.read_csv(url, header=None, names=columns)
data.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [36]:

class Perceptron:
    def __init__(self, learning_rate=0.01, epochs=1000):
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        for _ in range(self.epochs):
            for idx, x_i in enumerate(X):
                # Weighted sum
                linear_output = np.dot(x_i, self.weights) + self.bias
                # Step function
                y_pred = 1 if linear_output >= 0 else 0
                # Update rule
                update = self.learning_rate * (y[idx] - y_pred)
                self.weights += update * x_i
                self.bias += update

    def predict(self, X):
        linear_output = np.dot(X, self.weights) + self.bias
        return np.where(linear_output >= 0, 1, 0)






In [37]:
data.shape

(768, 9)

In [54]:
data.describe()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,3.845052,120.894531,69.105469,20.536458,79.799479,31.992578,0.471876,33.240885,0.348958
std,3.369578,31.972618,19.355807,15.952218,115.244002,7.88416,0.331329,11.760232,0.476951
min,0.0,0.0,0.0,0.0,0.0,0.0,0.078,21.0,0.0
25%,1.0,99.0,62.0,0.0,0.0,27.3,0.24375,24.0,0.0
50%,3.0,117.0,72.0,23.0,30.5,32.0,0.3725,29.0,0.0
75%,6.0,140.25,80.0,32.0,127.25,36.6,0.62625,41.0,1.0
max,17.0,199.0,122.0,99.0,846.0,67.1,2.42,81.0,1.0


In [64]:
# Separate features and target
X = data.iloc[:, :-1].values  # Features
print(X)
print(X.shape)
y = data.iloc[:, -1].values   # Target (0 or 1)
print(y)
print(y.shape)

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

[[  6.    148.     72.    ...  33.6     0.627  50.   ]
 [  1.     85.     66.    ...  26.6     0.351  31.   ]
 [  8.    183.     64.    ...  23.3     0.672  32.   ]
 ...
 [  5.    121.     72.    ...  26.2     0.245  30.   ]
 [  1.    126.     60.    ...  30.1     0.349  47.   ]
 [  1.     93.     70.    ...  30.4     0.315  23.   ]]
(768, 8)
[1 0 1 0 1 0 1 0 1 1 0 1 0 1 1 1 1 1 0 1 0 0 1 1 1 1 1 0 0 0 0 1 0 0 0 0 0
 1 1 1 0 0 0 1 0 1 0 0 1 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0 1 0 1 0 0 0 1 0 1 0
 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 0 0 0 0 1 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 1 1
 1 0 0 1 1 1 0 0 0 1 0 0 0 1 1 0 0 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
 0 0 0 0 1 0 1 1 0 0 0 1 0 0 0 0 1 1 0 0 0 0 1 1 0 0 0 1 0 1 0 1 0 0 0 0 0
 1 1 1 1 1 0 0 1 1 0 1 0 1 1 1 0 0 0 0 0 0 1 1 0 1 0 0 0 1 1 1 1 0 1 1 1 1
 0 0 0 0 0 1 0 0 1 1 0 0 0 1 1 1 1 0 0 0 1 1 0 1 0 0 0 0 0 0 0 0 1 1 0 0 0
 1 0 1 0 0 1 0 1 0 0 1 1 0 0 0 0 0 1 0 0 0 1 0 0 1 1 0 0 1 0 0 0 1 1 1 0 0
 1 0 1 0 1 1 0 1 0 0 1 0 1 1 0 0 1 0 1 0 0 1 0 1 0 1 1 

In [65]:
# Standardize features for better performance
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [66]:
for index, row in enumerate(data):
    print(f" Index: {index}, values: {row}")

 Index: 0, values: Pregnancies
 Index: 1, values: Glucose
 Index: 2, values: BloodPressure
 Index: 3, values: SkinThickness
 Index: 4, values: Insulin
 Index: 5, values: BMI
 Index: 6, values: DiabetesPedigreeFunction
 Index: 7, values: Age
 Index: 8, values: Outcome


In [67]:
class Perceptron:
    def __init__(self, learning_rate=0.01, epochs=1000):
        # Initialize learning rate and number of training epochs
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.weights = None 
        self.bias = None
        
    def fit(self, feature_matrix, labels):
        # Get the number of samples (rows) and features (columns)
        num_samples, num_features = feature_matrix.shape
        self.weights = np.zeros(num_features)  # Initialize weights as zeros 
        self.bias = 0  # Initialize bias as zero
        
        # Iterate through epochs
        for _ in range(self.epochs):
            for index, row in enumerate(feature_matrix):
                # Compute the linear output
                linear_output = np.dot(row, self.weights) + self.bias
                # Apply the step function for binary classification
                predicted_label = 1 if linear_output >= 0 else 0
                
                # Calculate the update based on the difference between actual and predicted labels
                update = self.learning_rate * (labels[index] - predicted_label)
                
                # Update weights and bias
                self.weights += update * row
                self.bias += update


    def predict(self, feature_matrix):
        # Compute the linear output for the entire feature matrix
        linear_output = np.dot(feature_matrix, self.weights) + self.bias
        # Apply the step function to produce binary predictions
        return np.where(linear_output >= 0, 1, 0)
    
    def accuracy(self, feature_matrix, labels):
        # Predict labels for the feature matrix
        predictions = self.predict(feature_matrix)
        # Calculate accuracy as the percentage of correct predictions
        return np.mean(predictions == labels)


In [68]:
# Train the perceptron
perceptron = Perceptron(learning_rate=0.01, epochs=1000)
perceptron.fit(X_train, y_train)

In [74]:
# Evaluate the perceptron
predictions = perceptron.predict(X_test)
accuracy = np.mean(predictions == y_test)
accuracy1= perceptron.accuracy(X_test, y_test) #using the function
print("Perceptron Classification Accuracy on Pima Indians Diabetes Dataset:", accuracy)
print("Perceptron Classification Accuracy on Pima Indians Diabetes Dataset: {:.2f}".format(accuracy1))



Perceptron Classification Accuracy on Pima Indians Diabetes Dataset: 0.7662337662337663
Perceptron Classification Accuracy on Pima Indians Diabetes Dataset: 0.77


#### with tensorflow

In [42]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Build model
model = Sequential([
    Dense(1, activation='sigmoid', input_shape=(8,))
])

# Compile
model.compile(optimizer='sgd',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Train
history = model.fit(X_train, y_train,
                   epochs=100,
                   batch_size=32,
                   validation_split=0.2,
                   verbose=0)

# Evaluate
train_loss, train_acc = model.evaluate(X_train, y_train, verbose=0)
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)

print(f"\nTensorFlow Perceptron - Training accuracy: {train_acc:.4f}")
print(f"TensorFlow Perceptron - Test accuracy: {test_acc:.4f}")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



TensorFlow Perceptron - Training accuracy: 0.7622
TensorFlow Perceptron - Test accuracy: 0.7792
