In [None]:
import pandas as pd
import numpy as np
columns = ['RI', 'Na', 'Mg', 'Al', 'Si', 'K', 'Ca', 'Ba', 'Fe', 'Type']
df = pd.read_csv(
    "glass+identification/glass.data",
    header=None,
    names=columns
)

print(df.head())


        RI     Na    Mg    Al     Si     K    Ca   Ba   Fe  Type
1  1.52101  13.64  4.49  1.10  71.78  0.06  8.75  0.0  0.0     1
2  1.51761  13.89  3.60  1.36  72.73  0.48  7.83  0.0  0.0     1
3  1.51618  13.53  3.55  1.54  72.99  0.39  7.78  0.0  0.0     1
4  1.51766  13.21  3.69  1.29  72.61  0.57  8.22  0.0  0.0     1
5  1.51742  13.27  3.62  1.24  73.08  0.55  8.07  0.0  0.0     1


In [24]:
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values
y = np.where(y == 1, 1, 0)


In [None]:
#We use Z-score normalization (also called standardization).
def normalize(X):
    mean = np.mean(X, axis=0)
    std = np.std(X, axis=0)
    return (X - mean) / std

X = normalize(X)


In [7]:
def split_data(X, y, train_ratio=0.7, val_ratio=0.15):
    n = X.shape[0]
    indices = np.random.permutation(n)

    train_end = int(train_ratio * n)
    val_end = int((train_ratio + val_ratio) * n)

    X_train = X[indices[:train_end]]
    y_train = y[indices[:train_end]]

    X_val = X[indices[train_end:val_end]]
    y_val = y[indices[train_end:val_end]]

    X_test = X[indices[val_end:]]
    y_test = y[indices[val_end:]]

    return X_train, y_train, X_val, y_val, X_test, y_test


In [8]:
X_train, y_train, X_val, y_val, X_test, y_test = split_data(X, y)


In [None]:
#“The sigmoid function maps the linear output of logistic regression into a 
# probability between 0 and 1, 
# making it suitable for binary classification.”
def sigmoid(z):
    return 1 / (1 + np.exp(-z))


In [None]:
#Computes the predicted probability that an input belongs to class 1.
#“The predict_prob function computes the linear combination of inputs and weights, adds bias, 
# and applies the sigmoid function to obtain class probabilities.”
def predict_prob(X, weights, bias):
    z = np.dot(X, weights) + bias
    return sigmoid(z)


In [None]:
#It measures how far the predicted probabilities are from the true labels.
#“Binary cross-entropy loss measures the difference between true labels 
# and predicted probabilities and heavily penalizes confident incorrect 
# predictions, making it suitable for logistic regression.”
def loss(y, y_hat):
    epsilon = 1e-9
    return -np.mean(y * np.log(y_hat + epsilon) +
                    (1 - y) * np.log(1 - y_hat + epsilon))


In [None]:
#to learn the best weights and bias that minimize the loss function.
def train(X, y, lr=0.01, epochs=1000):
    n_features = X.shape[1]
    weights = np.zeros(n_features)
    bias = 0

    for epoch in range(epochs):
        y_hat = predict_prob(X, weights, bias)

        dw = np.dot(X.T, (y_hat - y)) / len(y)
        db = np.mean(y_hat - y)

        weights -= lr * dw
        bias -= lr * db

        if epoch % 100 == 0:
            print(f"Epoch {epoch}, Loss: {loss(y, y_hat):.4f}")

    return weights, bias


In [13]:
weights, bias = train(X_train, y_train)


Epoch 0, Loss: 0.6931
Epoch 100, Loss: 0.6080
Epoch 200, Loss: 0.5663
Epoch 300, Loss: 0.5414
Epoch 400, Loss: 0.5245
Epoch 500, Loss: 0.5121
Epoch 600, Loss: 0.5026
Epoch 700, Loss: 0.4950
Epoch 800, Loss: 0.4888
Epoch 900, Loss: 0.4836


In [None]:
#“After training, predicted probabilities are converted into class labels using a 
# threshold of 0.5, and model performance is evaluated using accuracy on training,
#  validation, and test datasets.”
def predict(X, weights, bias):
    probs = predict_prob(X, weights, bias)
    return np.where(probs >= 0.5, 1, 0)


In [15]:
def accuracy(y_true, y_pred):
    return np.mean(y_true == y_pred)


In [16]:
train_acc = accuracy(y_train, predict(X_train, weights, bias))
val_acc = accuracy(y_val, predict(X_val, weights, bias))
test_acc = accuracy(y_test, predict(X_test, weights, bias))

print("Train Accuracy:", train_acc)
print("Validation Accuracy:", val_acc)
print("Test Accuracy:", test_acc)


Train Accuracy: 0.7315436241610739
Validation Accuracy: 0.875
Test Accuracy: 0.696969696969697
