### Diabetes Prediction using Pima Indians Dataset

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [17]:
# Load the Pima Indians Diabetes Dataset
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
columns = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 
           'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome']
data = pd.read_csv(url, header=None, names=columns)
data.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [None]:

class Perceptron:
    def __init__(self, learning_rate=0.01, epochs=1000):
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        for _ in range(self.epochs):
            for idx, x_i in enumerate(X):
                # Weighted sum
                linear_output = np.dot(x_i, self.weights) + self.bias
                # Step function
                y_pred = 1 if linear_output >= 0 else 0
                # Update rule
                update = self.learning_rate * (y[idx] - y_pred)
                self.weights += update * x_i
                self.bias += update

    def predict(self, X):
        linear_output = np.dot(X, self.weights) + self.bias
        return np.where(linear_output >= 0, 1, 0)






In [18]:
data.shape

(768, 9)

In [None]:
data.describe()

In [19]:
# Separate features and target
X = data.iloc[:, :-1].values  # Features
print(X)
print(X.shape)
y = data.iloc[:, -1].values   # Target (0 or 1)
print(y)
print(y.shape)

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

[[  6.    148.     72.    ...  33.6     0.627  50.   ]
 [  1.     85.     66.    ...  26.6     0.351  31.   ]
 [  8.    183.     64.    ...  23.3     0.672  32.   ]
 ...
 [  5.    121.     72.    ...  26.2     0.245  30.   ]
 [  1.    126.     60.    ...  30.1     0.349  47.   ]
 [  1.     93.     70.    ...  30.4     0.315  23.   ]]
(768, 8)
[1 0 1 0 1 0 1 0 1 1 0 1 0 1 1 1 1 1 0 1 0 0 1 1 1 1 1 0 0 0 0 1 0 0 0 0 0
 1 1 1 0 0 0 1 0 1 0 0 1 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0 1 0 1 0 0 0 1 0 1 0
 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 0 0 0 0 1 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 1 1
 1 0 0 1 1 1 0 0 0 1 0 0 0 1 1 0 0 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
 0 0 0 0 1 0 1 1 0 0 0 1 0 0 0 0 1 1 0 0 0 0 1 1 0 0 0 1 0 1 0 1 0 0 0 0 0
 1 1 1 1 1 0 0 1 1 0 1 0 1 1 1 0 0 0 0 0 0 1 1 0 1 0 0 0 1 1 1 1 0 1 1 1 1
 0 0 0 0 0 1 0 0 1 1 0 0 0 1 1 1 1 0 0 0 1 1 0 1 0 0 0 0 0 0 0 0 1 1 0 0 0
 1 0 1 0 0 1 0 1 0 0 1 1 0 0 0 0 0 1 0 0 0 1 0 0 1 1 0 0 1 0 0 0 1 1 1 0 0
 1 0 1 0 1 1 0 1 0 0 1 0 1 1 0 0 1 0 1 0 0 1 0 1 0 1 1 

In [None]:
# Standardize features for better performance
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [None]:
for index, row in enumerate(data):
    print(f" Index: {index}, values: {row}")

In [None]:
class Perceptron:
    def __init__(self, learning_rate=0.01, epochs=1000):
        # Initialize learning rate and number of training epochs
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.weights = None 
        self.bias = None
        
    def fit(self, feature_matrix, labels):
        # Get the number of samples (rows) and features (columns)
        num_samples, num_features = feature_matrix.shape
        self.weights = np.zeros(num_features)  # Initialize weights as zeros 
        self.bias = 0  # Initialize bias as zero
        
        # Iterate through epochs
        for _ in range(self.epochs):
            for index, row in enumerate(feature_matrix):
                # Compute the linear output
                linear_output = np.dot(row, self.weights) + self.bias
                # Apply the step function for binary classification
                predicted_label = 1 if linear_output >= 0 else 0
                
                # Calculate the update based on the difference between actual and predicted labels
                update = self.learning_rate * (labels[index] - predicted_label)
                
                # Update weights and bias
                self.weights += update * row
                self.bias += update


    def predict(self, feature_matrix):
        # Compute the linear output for the entire feature matrix
        linear_output = np.dot(feature_matrix, self.weights) + self.bias
        # Apply the step function to produce binary predictions
        return np.where(linear_output >= 0, 1, 0)
    
    def accuracy(self, feature_matrix, labels):
        # Predict labels for the feature matrix
        predictions = self.predict(feature_matrix)
        # Calculate accuracy as the percentage of correct predictions
        return np.mean(predictions == labels)


In [None]:
# Train the perceptron
perceptron = Perceptron(learning_rate=0.01, epochs=1000)
perceptron.fit(X_train, y_train)

In [None]:
# Evaluate the perceptron
predictions = perceptron.predict(X_test)
accuracy = np.mean(predictions == y_test)
accuracy1= perceptron.accuracy(X_test, y_test) #using the function
print("Perceptron Classification Accuracy on Pima Indians Diabetes Dataset:", accuracy)
print("Perceptron Classification Accuracy on Pima Indians Diabetes Dataset: {:.2f}".format(accuracy1))



accuracy as 10 epochs:<br>
Perceptron Classification Accuracy on Pima Indians Diabetes Dataset: 0.8181818181818182 <br>
Perceptron Classification Accuracy on Pima Indians Diabetes Dataset: 0.82

accuracy at 100 epochs: <br>
Perceptron Classification Accuracy on Pima Indians Diabetes Dataset: 0.7402597402597403 <br>
Perceptron Classification Accuracy on Pima Indians Diabetes Dataset: 0.74

accuracy at 1000 epochs: <br>
Perceptron Classification Accuracy on Pima Indians Diabetes Dataset: 0.7662337662337663 <br>
Perceptron Classification Accuracy on Pima Indians Diabetes Dataset: 0.77

#### with tensorflow

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Build model
model = Sequential([
    Dense(1, activation='sigmoid', input_shape=(8,))
])

# Compile
model.compile(optimizer='sgd',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Train
history = model.fit(X_train, y_train,
                   epochs=1000,
                   batch_size=32,
                   validation_split=0.2,
                   verbose=0)

# Evaluate
train_loss, train_acc = model.evaluate(X_train, y_train, verbose=0)
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)

print(f"\nTensorFlow Perceptron - Training accuracy: {train_acc:.4f}")
print(f"TensorFlow Perceptron - Test accuracy: {test_acc:.4f}")

### MLP with One hidden layer

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler



# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# One-hot encoding for the target variable (binary classification)
y_train = y_train.reshape(-1, 1)
y_test = y_test.reshape(-1, 1)

# Define the MLP architecture
input_neurons = X.shape[1]
hidden_neurons = 64
output_neurons = 1
learning_rate = 0.01
epochs = 1000

# Initialize weights and biases
np.random.seed(42)
weights_input_hidden = np.random.randn(input_neurons, hidden_neurons) * 0.01
bias_hidden = np.zeros((1, hidden_neurons))
weights_hidden_output = np.random.randn(hidden_neurons, output_neurons) * 0.01
bias_output = np.zeros((1, output_neurons))

# Activation functions
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))

# Training the MLP
for epoch in range(epochs):
    # Forward propagation
    hidden_layer_input = np.dot(X_train, weights_input_hidden) + bias_hidden
    hidden_layer_activation = sigmoid(hidden_layer_input)

    output_layer_input = np.dot(hidden_layer_activation, weights_hidden_output) + bias_output
    predicted_output = sigmoid(output_layer_input)

    # Compute error
    error = y_train - predicted_output

    # Backpropagation
    d_predicted_output = error * sigmoid_derivative(output_layer_input)
    error_hidden_layer = np.dot(d_predicted_output, weights_hidden_output.T)
    d_hidden_layer = error_hidden_layer * sigmoid_derivative(hidden_layer_input)

    # Update weights and biases
    weights_hidden_output += np.dot(hidden_layer_activation.T, d_predicted_output) * learning_rate
    bias_output += np.sum(d_predicted_output, axis=0, keepdims=True) * learning_rate
    weights_input_hidden += np.dot(X_train.T, d_hidden_layer) * learning_rate
    bias_hidden += np.sum(d_hidden_layer, axis=0, keepdims=True) * learning_rate

    # Print loss every 100 epochs
    if (epoch + 1) % 100 == 0:
        loss = np.mean(np.square(error))
        print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss:.4f}")

# Evaluate the model
def predict(X):
    hidden_layer_input = np.dot(X, weights_input_hidden) + bias_hidden
    hidden_layer_activation = sigmoid(hidden_layer_input)
    output_layer_input = np.dot(hidden_layer_activation, weights_hidden_output) + bias_output
    predicted_output = sigmoid(output_layer_input)
    return (predicted_output > 0.5).astype(int)

train_accuracy = np.mean(predict(X_train) == y_train)
test_accuracy = np.mean(predict(X_test) == y_test)

print(f"Training Accuracy: {train_accuracy:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")


### MLP with 2 Hidden layers


In [None]:
X = data.drop(columns=['Outcome']).values
y = data['Outcome'].values

# Scale the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# One-hot encoding for the target variable (binary classification)
y_train = y_train.reshape(-1, 1)
y_test = y_test.reshape(-1, 1)

# Define the MLP architecture
input_neurons = X.shape[1]
hidden_neurons_1 = 64
hidden_neurons_2 = 32
output_neurons = 1
learning_rate = 0.01
epochs = 1000

# Initialize weights and biases
np.random.seed(42)
weights_input_hidden1 = np.random.randn(input_neurons, hidden_neurons_1) * 0.01
bias_hidden1 = np.zeros((1, hidden_neurons_1))
weights_hidden1_hidden2 = np.random.randn(hidden_neurons_1, hidden_neurons_2) * 0.01
bias_hidden2 = np.zeros((1, hidden_neurons_2))
weights_hidden_output = np.random.randn(hidden_neurons_2, output_neurons) * 0.01
bias_output = np.zeros((1, output_neurons))

# Activation functions
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))

# Training the MLP
for epoch in range(epochs):
    # Forward propagation
    hidden_layer1_input = np.dot(X_train, weights_input_hidden1) + bias_hidden1
    hidden_layer1_activation = sigmoid(hidden_layer1_input)

    hidden_layer2_input = np.dot(hidden_layer1_activation, weights_hidden1_hidden2) + bias_hidden2
    hidden_layer2_activation = sigmoid(hidden_layer2_input)

    output_layer_input = np.dot(hidden_layer2_activation, weights_hidden_output) + bias_output
    predicted_output = sigmoid(output_layer_input)

    # Compute error
    error = y_train - predicted_output

    # Backpropagation
    d_predicted_output = error * sigmoid_derivative(output_layer_input)
    error_hidden_layer2 = np.dot(d_predicted_output, weights_hidden_output.T)
    d_hidden_layer2 = error_hidden_layer2 * sigmoid_derivative(hidden_layer2_input)

    error_hidden_layer1 = np.dot(d_hidden_layer2, weights_hidden1_hidden2.T)
    d_hidden_layer1 = error_hidden_layer1 * sigmoid_derivative(hidden_layer1_input)

    # Update weights and biases
    weights_hidden_output += np.dot(hidden_layer2_activation.T, d_predicted_output) * learning_rate
    bias_output += np.sum(d_predicted_output, axis=0, keepdims=True) * learning_rate

    weights_hidden1_hidden2 += np.dot(hidden_layer1_activation.T, d_hidden_layer2) * learning_rate
    bias_hidden2 += np.sum(d_hidden_layer2, axis=0, keepdims=True) * learning_rate

    weights_input_hidden1 += np.dot(X_train.T, d_hidden_layer1) * learning_rate
    bias_hidden1 += np.sum(d_hidden_layer1, axis=0, keepdims=True) * learning_rate

    # Print loss every 100 epochs
    if (epoch + 1) % 100 == 0:
        loss = np.mean(np.square(error))
        print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss:.4f}")

# Evaluate the model
def predict(X):
    hidden_layer1_input = np.dot(X, weights_input_hidden1) + bias_hidden1
    hidden_layer1_activation = sigmoid(hidden_layer1_input)

    hidden_layer2_input = np.dot(hidden_layer1_activation, weights_hidden1_hidden2) + bias_hidden2
    hidden_layer2_activation = sigmoid(hidden_layer2_input)

    output_layer_input = np.dot(hidden_layer2_activation, weights_hidden_output) + bias_output
    predicted_output = sigmoid(output_layer_input)
    return (predicted_output > 0.5).astype(int)

train_accuracy = np.mean(predict(X_train) == y_train)
test_accuracy = np.mean(predict(X_test) == y_test)

print(f"Training Accuracy: {train_accuracy:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")


#### MLP with Tensorflow

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam


# Split features and target
X = data.drop(columns=['Outcome']).values
y = data['Outcome'].values

# Scale the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the MLP model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),  # Hidden layer 1
    Dense(32, activation='relu'),  # Hidden layer 2
    Dense(1, activation='sigmoid')  # Output layer for binary classification
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.01), loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2, verbose=1)

# Evaluate the model
train_loss, train_accuracy = model.evaluate(X_train, y_train, verbose=0)
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)

print(f"Training Accuracy: {train_accuracy:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")

# Make predictions
predictions = (model.predict(X_test) > 0.5).astype(int)
