1. Download and load the data

In [1]:
import requests

url = 'https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/diabetes'
filename = 'diabetes_libsvm.data'  

response = requests.get(url)

if response.status_code == 200:
    with open(filename, 'wb') as file:
        file.write(response.content)
    print(f'File downloaded successfully as {filename}')
else:
    print(f'Failed to download the file. Status code: {response.status_code}')

File downloaded successfully as diabetes_libsvm.data


In [94]:
from sklearn.datasets import load_svmlight_file
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
file_path = 'diabetes_libsvm.data'

X, y = load_svmlight_file(file_path)

X_dense = X.toarray()

print(f"Features shape: {X.shape}")
print(f"Labels shape: {y.shape}")

X_train, X_test, y_train, y_test = train_test_split(X_dense, y, test_size=0.4)


scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)

Features shape: (768, 8)
Labels shape: (768,)


2. Perception model testing and performance analysis

In [102]:
from sklearn.linear_model import Perceptron
def train_perceptron(max_iter: int, eta0: float):
    perceptron = Perceptron(max_iter=max_iter, eta0=eta0)
    perceptron.fit(X_train_scaled, y_train)
    return perceptron

from sklearn.metrics import accuracy_score

def evaluate_model(model):
    y_pred = model.predict(X_test_scaled)
    accuracy = accuracy_score(y_pred,y_test)
    return accuracy


# Initialize the Perceptron
perception = train_perceptron(max_iter=2, eta0=0.1)
accuracy = evaluate_model(perception)
print(f"Accuracy: {accuracy}")

Accuracy: 0.7142857142857143


3. Experiment and analysis

Define ranges for Perceptron hyperparameters



In [103]:
import numpy as np
max_iter_values = []
for i in range(1,500,5):
    max_iter_values.append(i)

eta0_values = []
for i in np.arange(0.01, 1, 0.01):
    eta0_values.append(i)


Loop and get best result

In [104]:
import warnings

warnings.filterwarnings("ignore")

best_accuracy = 0.0
best_model = None
best_hyperparameters = {}

# Train and evaluate models for each combination of hyperparameters
for max_iter in max_iter_values:
    for eta0 in eta0_values:
        # Train the model
        model = train_perceptron(max_iter, eta0)

        # Evaluate the model
        accuracy = evaluate_model(model)
        # Update the best model if this one is better
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_model = model
            best_hyperparameters = {
                'max_iter': max_iter,
                'eta0': eta0
            }

# Print the best model details
print("Best Model Based on Accuracy:")
print(f"max_iter: {best_hyperparameters['max_iter']}, eta0: {best_hyperparameters['eta0']}")
print(f"Highest Accuracy: {best_accuracy:.2f}")


Best Model Based on Accuracy:
max_iter: 6, eta0: 0.01
Highest Accuracy: 0.70
