The goal here is use algorithm using pytorch functions. To train in modifcations of said algorithms.

In [159]:
import torch 
import torch.nn as nn
import torch.optim as optim 
import pandas as pd

In [160]:
#replacement for sklearn.model_selection import train_test_split 

def train_test_split_torch(X, y, test_size=0.2, shuffle=True, random_state=None):

    if not isinstance(X, torch.Tensor): #wanna make sure X and Y are tensor
        X = torch.tensor(X,dtype=torch.float32)
    if not isinstance(y, torch.Tensor):
        y = torch.tensor(y, dtype=torch.long)

    if random_state is not None:
        torch.manual_seed(random_state)
    if shuffle:
        perm = torch.randperm(X.size(0)) #generate random permutation indices
        X = X[perm]
        y = y[perm]

    # calculate split index
    split_idx = int(X.size(0) * (1 - test_size))

    #split data
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]

    return X_train, X_test , y_train, y_test

In [161]:
# replacement for sklean. import StandardScaler

# StandardScaler is to standardize features by removing the mean and scaling to unit variance

class StandardScaler:
    def __init__(self):
        self.mean = None
        self.std = None 

    def fit(self,X):
        self.mean = torch.mean(X,dim=0) # compute mean along the sample axis
        self.std = torch.std(X, dim=0) # compute standard deviation along the sample axis
        self.std[self.std == 0] = 1.0   # avoid division by zero for constant features

    def transform(self, X):
        if self.mean is None or self.std is None:
            raise ValueError('Scaler has not been fitted yet.')
        return (X - self.mean)/self.std 

    def fit_transform(self,X):
        self.fit(X)
        return self.transform(X)

In [162]:
def precision_score(y_true, y_pred, num_classes, average='weighted'):
    """
    Calculate the weighted precision score for multi-class classification.

    Parameters:
        y_true (torch.Tensor): Ground truth labels.
        y_pred (torch.Tensor): Predicted labels.
        num_classes (int): Number of classes.

    Returns:
        float: Weighted precision score.
    """

    true_positives = torch.zeros(num_classes)
    false_positives = torch.zeros(num_classes)
    support = torch.zeros(num_classes)

    for cls in range(num_classes):
        true_positives[cls] = ((y_pred == cls) & (y_true == cls)).sum().item()
        false_positives[cls] = ((y_pred == cls) & (y_true != cls)).sum().item()
        support[cls] = (y_true == cls).sum().item()

    precision_per_class = true_positives / (true_positives + false_positives + 1e-8)  # Add epsilon to avoid division by zero

    if average == 'weighted':
        total_samples = support.sum().item()
        return (precision_per_class * support / total_samples).sum().item()
    elif average == 'macro':
        return precision_per_class.mean().item()
    elif average == 'micro':
        total_tp = true_positives.sum().item()
        total_fp = false_positives.sum().item()
        return total_tp / (total_tp + total_fp + 1e-8)

        #sum().item() so it return scalar instead of tensor

    else: 
        raise ValueError('invalid value for average choose from weighted, macro or mciro')



In [163]:
class LogisticRegression(nn.Module):
    def __init__(self, input_dim, num_classes):
        super().__init__() 
        self.linear = nn.Linear(input_dim, num_classes)

    def forward(self, x):
        return self.linear(x)

In [164]:
# replacement for from sklearn.datasets import load_iris 

#load iris dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data" # cant opt for DataLoader if the data is big
columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']
data = pd.read_csv(url, header=None, names=columns)

# encode species as integers
species_map = {'Iris-setosa':0,'Iris-versicolor':1,'Iris-virginica':2}
data['species'] = data['species'].map(species_map)

# extract features and labels
X = data[["sepal_length", "sepal_width", "petal_length", "petal_width"]].values
y = data["species"].values

#split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split_torch(X, y,test_size=0.2,random_state=42)

#standardize the feautres
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

#convert to pytorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
y_test = torch.tensor(y_test,dtype=torch.long)

print('X_train shape:', X_train.shape)
print('y_train shape:', y_train.shape)

input_dim = X_train.shape[1]
num_classes = len(torch.unique(y_train))
model = LogisticRegression(input_dim, num_classes)

criterion = nn.CrossEntropyLoss() # loss function for multi-class classification
optimizer = optim.SGD(model.parameters(), lr=0.01)

num_epochs = 1000 
for  epoch in range(num_epochs):
    outputs = model(X_train)
    loss = criterion(outputs, y_train)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 100 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

# evaluate the model
with torch.no_grad():
    outputs = model(X_test)
    _, predicted = torch.max(outputs,1)

    precision = precision_score(y_test.numpy(), predicted.numpy(),num_classes=num_classes,average='weighted')
    print(f"Test Precision: {precision:.4f}")


  X_train = torch.tensor(X_train, dtype=torch.float32)
  X_test = torch.tensor(X_test, dtype=torch.float32)
  y_train = torch.tensor(y_train, dtype=torch.long)
  y_test = torch.tensor(y_test,dtype=torch.long)


X_train shape: torch.Size([120, 4])
y_train shape: torch.Size([120])
Epoch [100/1000], Loss: 0.6064
Epoch [200/1000], Loss: 0.5022
Epoch [300/1000], Loss: 0.4481
Epoch [400/1000], Loss: 0.4131
Epoch [500/1000], Loss: 0.3877
Epoch [600/1000], Loss: 0.3679
Epoch [700/1000], Loss: 0.3518
Epoch [800/1000], Loss: 0.3381
Epoch [900/1000], Loss: 0.3262
Epoch [1000/1000], Loss: 0.3156
Test Precision: 0.9007


In [165]:
# Experiment with learning rate schedulers

# Reinitialize the model
model = LogisticRegression(input_dim, num_classes)

# Redefine the optimizer and scheduler
optimizer = optim.SGD(model.parameters(), lr=0.01)
scheduler = StepLR(optimizer, step_size=100, gamma=0.5)

# Training loop
for epoch in range(num_epochs):
    # Forward pass
    outputs = model(X_train)
    loss = criterion(outputs, y_train)

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Update the learning rate
    scheduler.step()

    # Print loss and current learning rate every 100 epochs
    if (epoch + 1) % 100 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}, LR: {scheduler.get_last_lr()[0]:.6f}")

# Evaluate the model
with torch.no_grad():
    outputs = model(X_test)
    _, predicted = torch.max(outputs, 1)

    precision = precision_score(y_test.numpy(), predicted.numpy(), num_classes=num_classes, average='weighted')
    print(f"Test Precision: {precision:.4f}")

Epoch [100/1000], Loss: 0.6985, LR: 0.005000
Epoch [200/1000], Loss: 0.5916, LR: 0.002500
Epoch [300/1000], Loss: 0.5582, LR: 0.001250
Epoch [400/1000], Loss: 0.5445, LR: 0.000625
Epoch [500/1000], Loss: 0.5382, LR: 0.000313
Epoch [600/1000], Loss: 0.5352, LR: 0.000156
Epoch [700/1000], Loss: 0.5338, LR: 0.000078
Epoch [800/1000], Loss: 0.5330, LR: 0.000039
Epoch [900/1000], Loss: 0.5327, LR: 0.000020
Epoch [1000/1000], Loss: 0.5325, LR: 0.000010
Test Precision: 0.7860


In [166]:
# # add bias regulirzation

# # regularzing wieght also regularize bias term

# lambda_bias_reg = 0.001 

# Define regularization strengths
lambda_reg_values = [0.01, 0.001]  # Different weight regularization strengths
lambda_bias_reg = 0.001  # Bias regularization strength

num_epochs = 1000

for lambda_reg in lambda_reg_values:
    print(f"\nExperiment with lambda_reg = {lambda_reg}, lambda_bias_reg = {lambda_bias_reg}")

    # Reinitialize the model
    model = LogisticRegression(input_dim, num_classes)

    # Reinitialize the optimizer
    optimizer = optim.SGD(model.parameters(), lr=0.01)

    # Training loop
    for epoch in range(num_epochs):
        # Forward pass
        outputs = model(X_train)
        loss = criterion(outputs, y_train)

        # Add L2 regularization for weights and bias
        l2_reg = torch.tensor(0.0)
        bias_reg = torch.tensor(0.0)

        for name, param in model.named_parameters():
            if 'bias' in name:  # Apply bias regularization
                bias_reg += torch.norm(param, 2)
            else:  # Apply weight regularization
                l2_reg += torch.norm(param, 2)

        loss += lambda_reg * l2_reg + lambda_bias_reg * bias_reg

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Print loss every 100 epochs
        if (epoch + 1) % 100 == 0:
            print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

    # Evaluate the model
    with torch.no_grad():
        outputs = model(X_test)
        _, predicted = torch.max(outputs, 1)

        precision = precision_score(y_test.numpy(), predicted.numpy(), num_classes=num_classes, average='weighted')
        print(f"Test Precision: {precision:.4f}")


Experiment with lambda_reg = 0.01, lambda_bias_reg = 0.001
Epoch [100/1000], Loss: 0.6890
Epoch [200/1000], Loss: 0.5536
Epoch [300/1000], Loss: 0.4911
Epoch [400/1000], Loss: 0.4531
Epoch [500/1000], Loss: 0.4267
Epoch [600/1000], Loss: 0.4068
Epoch [700/1000], Loss: 0.3909
Epoch [800/1000], Loss: 0.3776
Epoch [900/1000], Loss: 0.3662
Epoch [1000/1000], Loss: 0.3561
Test Precision: 0.9007

Experiment with lambda_reg = 0.001, lambda_bias_reg = 0.001
Epoch [100/1000], Loss: 0.6981
Epoch [200/1000], Loss: 0.5522
Epoch [300/1000], Loss: 0.4860
Epoch [400/1000], Loss: 0.4456
Epoch [500/1000], Loss: 0.4172
Epoch [600/1000], Loss: 0.3955
Epoch [700/1000], Loss: 0.3778
Epoch [800/1000], Loss: 0.3630
Epoch [900/1000], Loss: 0.3501
Epoch [1000/1000], Loss: 0.3387
Test Precision: 0.9007
