In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer 

In [64]:
class Perceptron:
    def __init__(self, num_features, lr=0.01):
        self.num_features = num_features
        self.weights = np.zeros(num_features, dtype=np.float64) 
        self.bias = 0.0  
        self.lr = lr  

    def forward(self, x):
        x = np.asarray(x, dtype=np.float64)  
        weighted_sum_z = self.bias + np.dot(x, self.weights)
        return 1 if weighted_sum_z >= 0 else -1

    def update(self, x, y_true):
        x = np.asarray(x, dtype=np.float64) 
        y_true = float(y_true) 
        prediction = self.forward(x)
        error = y_true - prediction
        self.bias += self.lr * error 
        self.weights += self.lr * error * x  
        return error

#     def train(self, X, Y, epochs=10):
#         X = np.asarray(X, dtype=np.float64)  
#         Y = np.asarray(Y, dtype=np.float64)  
#         for epoch in range(epochs):
#             total_errors = 0
#             for x, y in zip(X, Y):
#                 error = self.update(x, y)
#                 total_errors += abs(error)
#             print(f'Epoch {epoch+1}/{epochs}, Total Errors: {total_errors}')
    def train(self, X, Y, epochs=10):
        X = np.asarray(X, dtype=np.float64)
        Y = np.asarray(Y, dtype=np.float64)
        for epoch in range(epochs):
            total_errors = 0
            # Shuffle the data at the start of each epoch
            indices = np.arange(X.shape[0])
            np.random.shuffle(indices)
            X = X[indices]
            Y = Y[indices]
            for x, y in zip(X, Y):
                error = self.update(x, y)
                total_errors += abs(error)
            print(f'Epoch {epoch+1}/{epochs}, Total Errors: {total_errors}')

    def predict(self, X):
        X = np.asarray(X, dtype=np.float64)  
        predictions = []
        for x in X:
            predictions.append(self.forward(x))
        return np.array(predictions)

In [71]:
#read the dataset
data = pd.read_csv('./data/diabetes_scale.csv')

#Fix up those missing data
imr = SimpleImputer(missing_values=np.nan, strategy='mean')
imputed_data = pd.DataFrame(imr.fit_transform(data), columns=data.columns)

#print(imputed_data)

features = data.drop("outcome", axis=1) 
target = data["outcome"]
x_train,x_test,y_train,y_test = train_test_split(features,target,test_size=0.2,random_state=42)

#print(x_train.shape)
#print(y_train.shape)
#print(x_test.shape)
#print(y_test.shape)

scaler = StandardScaler() 
x_train = scaler.fit_transform(x_train) 
x_test = scaler.transform(x_test) 
#Let the classification be more directly viewed by changing 0 to -1 .
# target = np.where(target == 1, 1, -1)

data.columns = ['Outcome', 'Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age']
#print(data.head(16))

#Derive features.
features = data.drop('Outcome', axis=1).values 
target = data['Outcome'].values  



In [72]:
#Initial
num_features = x_train.shape[1] 

ppn = Perceptron(num_features=num_features, lr=0.001) 

In [73]:
ppn.train(x_train, y_train, epochs=100) 
#Set the training epochs as 100 



Epoch 1/100, Total Errors: 786.0
Epoch 2/100, Total Errors: 802.0
Epoch 3/100, Total Errors: 802.0
Epoch 4/100, Total Errors: 802.0
Epoch 5/100, Total Errors: 802.0
Epoch 6/100, Total Errors: 802.0
Epoch 7/100, Total Errors: 802.0
Epoch 8/100, Total Errors: 802.0
Epoch 9/100, Total Errors: 802.0
Epoch 10/100, Total Errors: 802.0
Epoch 11/100, Total Errors: 802.0
Epoch 12/100, Total Errors: 802.0
Epoch 13/100, Total Errors: 802.0
Epoch 14/100, Total Errors: 802.0
Epoch 15/100, Total Errors: 802.0
Epoch 16/100, Total Errors: 802.0
Epoch 17/100, Total Errors: 802.0
Epoch 18/100, Total Errors: 802.0
Epoch 19/100, Total Errors: 802.0
Epoch 20/100, Total Errors: 802.0
Epoch 21/100, Total Errors: 802.0
Epoch 22/100, Total Errors: 802.0
Epoch 23/100, Total Errors: 802.0
Epoch 24/100, Total Errors: 802.0
Epoch 25/100, Total Errors: 802.0
Epoch 26/100, Total Errors: 802.0
Epoch 27/100, Total Errors: 802.0
Epoch 28/100, Total Errors: 802.0
Epoch 29/100, Total Errors: 802.0
Epoch 30/100, Total Err

In [74]:
y_pred_test = ppn.predict(x_test) 

print(y_pred_test)

[-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]


In [75]:
pn = Perceptron(num_features=x_train.shape[1], lr=0.0001) 

ppn.train(x_train, y_train, epochs=100) 

y_pred_test = ppn.predict(x_test) 

accuracy = np.mean(y_pred_test == y_test) 
print(f"Accuracy on test set: {accuracy:.2f}")

Epoch 1/100, Total Errors: 802.0
Epoch 2/100, Total Errors: 802.0
Epoch 3/100, Total Errors: 802.0
Epoch 4/100, Total Errors: 802.0
Epoch 5/100, Total Errors: 802.0
Epoch 6/100, Total Errors: 802.0
Epoch 7/100, Total Errors: 802.0
Epoch 8/100, Total Errors: 802.0
Epoch 9/100, Total Errors: 802.0
Epoch 10/100, Total Errors: 802.0
Epoch 11/100, Total Errors: 802.0
Epoch 12/100, Total Errors: 802.0
Epoch 13/100, Total Errors: 802.0
Epoch 14/100, Total Errors: 802.0
Epoch 15/100, Total Errors: 802.0
Epoch 16/100, Total Errors: 802.0
Epoch 17/100, Total Errors: 802.0
Epoch 18/100, Total Errors: 802.0
Epoch 19/100, Total Errors: 802.0
Epoch 20/100, Total Errors: 802.0
Epoch 21/100, Total Errors: 802.0
Epoch 22/100, Total Errors: 802.0
Epoch 23/100, Total Errors: 802.0
Epoch 24/100, Total Errors: 802.0
Epoch 25/100, Total Errors: 802.0
Epoch 26/100, Total Errors: 802.0
Epoch 27/100, Total Errors: 802.0
Epoch 28/100, Total Errors: 802.0
Epoch 29/100, Total Errors: 802.0
Epoch 30/100, Total Err