In [1]:
import random
import math

# Load dataset into 2D python list
diabetes = []
with open("diabetes.csv") as f:
    for line in f:
        diabetes.append(line.strip().split(','))

# Split dataset into training, validation, and test sets
random.shuffle(diabetes)
n = len(diabetes)
n_train = int(n * 0.7)
n_val = int(n * 0.15)
n_test = n - n_train - n_val
train_set = diabetes[:n_train]
val_set = diabetes[n_train:n_train+n_val]
test_set = diabetes[n_train+n_val:]

# Define sigmoid function
def sigmoid(z):
    return 1 / (1 + math.exp(-z))

# Train the model
max_iter = 500
lr = 0.01
history = []
n_features = len(train_set[0]) - 1
theta = [random.uniform(0, 1) for _ in range(n_features + 1)]  # initialize theta randomly
for itr in range(1, max_iter+1):
    total_cost = 0
    for sample in train_set:
        X = [float(x) for x in sample[:-1]]
        X.append(1)  # add bias term
        y = int(sample[-1])
        z = sum(theta[i] * X[i] for i in range(n_features+1))
        h = sigmoid(z)
        J = - y * math.log(h) - (1-y) * math.log(1-h)
        total_cost += J
        dv = [X[i] * (h-y) for i in range(n_features+1)]
        theta = [theta[i] - lr * dv[i] for i in range(n_features+1)]
    avg_cost = total_cost / n_train
    history.append(avg_cost)

# Evaluate the model on the validation set
correct = 0
for sample in val_set:
    X = [float(x) for x in sample[:-1]]
    X.append(1)  # add bias term
    y = int(sample[-1])
    z = sum(theta[i] * X[i] for i in range(n_features+1))
    h = sigmoid(z)
    if h >= 0.5:
        h = 1
    else:
        h = 0
    if h == y:
        correct += 1
val_acc = correct * 100 / n_val
print(f"Validation accuracy: {val_acc:.2f}%")

# Evaluate the model on the test set
correct = 0
for sample in test_set:
    X = [float(x) for x in sample[:-1]]
    X.append(1)  # add bias term
    y = int(sample[-1])
    z = sum(theta[i] * X[i] for i in range(n_features+1))
    h = sigmoid(z)
    if h >= 0.5:
        h = 1
    else:
        h = 0
    if h == y:
        correct += 1
test_acc = correct * 100 / n_test
print(f"Test accuracy: {test_acc:.2f}%")


ValueError: ignored