In [4]:
import torch
import matplotlib.pyplot as plt
import numpy as np
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, transforms
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

# --- Configuration ---
# Use CUDA if available, otherwise use CPU
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
INPUT_SIZE = 28 * 28  # 784 pixels
BATCH_SIZE = 64
LEARNING_RATE = 0.01
NUM_EPOCHS = 20
SEED = 42

# Ensure reproducibility
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed(SEED)

# ----------------------------------------------------------------------
# 1. DATA PREPARATION UTILITIES (FILTERING 0s and 1s)
# ----------------------------------------------------------------------

class BinaryMNISTDataset(Dataset):
    """
    Custom Dataset to filter MNIST for only digits 0 and 1, and flatten images.
    Digit 1 is relabeled as 1 (Positive Class).
    Digit 0 is relabeled as 0 (Negative Class).
    """
    def __init__(self, data, targets):
        # Flatten the 28x28 images into 784 features
        self.data = data.float().reshape(-1, INPUT_SIZE)
        # Reshape targets to (N, 1) and move to float for BCE loss
        self.targets = targets.float().unsqueeze(1)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.targets[idx]

def get_binary_dataloaders(batch_size=BATCH_SIZE):
    """Loads MNIST, filters for 0s and 1s, splits, and creates DataLoaders."""
    transform = transforms.Compose([
        transforms.ToTensor(),
        # Normalize pixel values to [0, 1] range. Normalization is crucial for
        # numerical stability in linear models.
        transforms.Normalize((0.1307,), (0.3081,))
    ])

    # Load full dataset
    train_data = datasets.MNIST(
        root='./data', train=True, download=True, transform=transform
    )

    # 1. Filter for digits 0 and 1
    is_binary_mask = (train_data.targets == 0) | (train_data.targets == 1)
    
    X_full = train_data.data[is_binary_mask]
    Y_full = train_data.targets[is_binary_mask]

    # 2. Relabeling: Target 1 remains 1. Target 0 remains 0.
    # The float() conversion will handle this: 1 -> 1.0, 0 -> 0.0
    
    # 3. Split into Train/Validation/Test (60% / 20% / 20% of the 0/1 data)
    
    # First split: 80% Train/Val, 20% Test
    X_train_val, X_test, Y_train_val, Y_test = train_test_split(
        X_full, Y_full, test_size=0.2, random_state=SEED, stratify=Y_full
    )
    
    # Second split: 60% Train, 20% Validation (75% of X_train_val goes to train)
    X_train, X_val, Y_train, Y_val = train_test_split(
        X_train_val, Y_train_val, test_size=(0.2/0.8), random_state=SEED, stratify=Y_train_val
    )

    # 4. Create Custom Datasets
    train_dataset = BinaryMNISTDataset(X_train, Y_train)
    val_dataset = BinaryMNISTDataset(X_val, Y_val)
    test_dataset = BinaryMNISTDataset(X_test, Y_test)

    # 5. Create DataLoaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, val_loader, test_loader

ModuleNotFoundError: No module named 'torch'

In [None]:
# ----------------------------------------------------------------------
# 2. MODEL, LOSS, and ACCURACY
# ----------------------------------------------------------------------

class BinaryLogisticRegression:
    """
    Logistic Regression Model implemented from scratch using pure PyTorch tensors.
    W: (784, 1), b: (1,)
    """
    def __init__(self, input_size):
        # Initialize Weights (W) with small random values and Bias (b) with zeros.
        # requires_grad=True is crucial for Autograd to track these tensors.
        self.W = torch.randn(input_size, 1, device=DEVICE) * 0.01
        self.b = torch.zeros(1, device=DEVICE)
        self.W.requires_grad_(True)
        self.b.requires_grad_(True)
        
    def forward(self, x):
        # 1. Linear Score (Logit): z = X @ W + b
        # de 34an calculate el z score
        linear = torch.matmul(x, self.W) + self.b
        # 2. Sigmoid Activation: y_hat = sigma(z) (Probability P(y=1|X))
        # => squashing the values bta3t el z [0,1] (probabilties)
        y_pred = torch.sigmoid(linear)
        return y_pred
    
    def parameters(self):
        # Returns the list of trainable parameters for the training loop
        return [self.W, self.b]

def custom_binary_cross_entropy(y_pred, y_true):
    
    # Clip predictions to prevent log(0) which causes instability (inf or NaN)
    epsilon = 1e-15
    y_pred = torch.clamp(y_pred, epsilon, 1.0 - epsilon)
    #l=-((tlog(y))+(1-t)log(1-y))
    loss = - (y_true * torch.log(y_pred) + (1 - y_true) * torch.log(1 - y_pred))
    return loss.mean()

def calculate_binary_accuracy(y_pred, y_true):
    """Calculates accuracy by converting probabilities to class labels (0 or 1)."""
    # Decision Rule: If probability of P(y=1) >= 0.5, predict 1, else 0.
    y_pred_class = (y_pred >= 0.5).float()  
    correct = (y_pred_class == y_true).float().sum()  #count elsa7(true predictions) 
    #acc=(truepositives/total)
    return (correct / len(y_true)).item()


#this note to test and calc y(sigof z)

In [None]:
#training the model
def train_model_binary(model, train_loader, val_loader, loss_fn, lr, epochs):
    """
    The main training loop using manual Gradient Descent.
    """
    train_losses, val_losses = [], []
    train_accuracies, val_accuracies = [], []
    
    print(f"Starting training on {DEVICE}...")

    for epoch in range(epochs):
        epoch_train_loss, epoch_train_acc = 0.0, 0.0
        
        # --- TRAINING PHASE ---
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(DEVICE), y_batch.to(DEVICE)
            
            # 1. Manual Gradient Zeroing (CRUCIAL)
            # Clears accumulated gradients from the previous batch
            for param in model.parameters():
                if param.grad is not None:
                    param.grad.zero_()
            
            # 2. Forward Pass: Get prediction and loss
            y_pred = model.forward(X_batch)
            loss = loss_fn(y_pred, y_batch)
            
            # 3. Backward Pass: Compute gradients
            loss.backward()
            
            # 4. Gradient Descent Optimization (Manual Update)
            with torch.no_grad(): # Don't track this operation in the computation graph
                model.W -= lr * model.W.grad # W_new = W_old - LR * dL/dW
                model.b -= lr * model.b.grad # b_new = b_old - LR * dL/db
                
            # Accumulate metrics
            epoch_train_loss += loss.item() * len(X_batch)
            epoch_train_acc += calculate_binary_accuracy(y_pred, y_batch) * len(X_batch)
            
        avg_train_loss = epoch_train_loss / len(train_loader.dataset)
        avg_train_acc = epoch_train_acc / len(train_loader.dataset)

        

In [None]:
def train_model_binary(model, train_loader, val_loader, loss_fn, lr, epochs):
    """
    The main training loop using manual Gradient Descent.
    """
    train_losses, val_losses = [], []
    train_accuracies, val_accuracies = [], []
    
    print(f"Starting training on {DEVICE}...")

    for epoch in range(epochs):
        epoch_train_loss, epoch_train_acc = 0.0, 0.0
        
        # --- TRAINING PHASE ---
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(DEVICE), y_batch.to(DEVICE)
            
            # 1. Manual Gradient Zeroing (CRUCIAL)
            # Clears accumulated gradients from the previous batch
            for param in model.parameters():
                if param.grad is not None:
                    param.grad.zero_()
            
            # 2. Forward Pass: Get prediction and loss
            y_pred = model.forward(X_batch)
            loss = loss_fn(y_pred, y_batch)
            
            # 3. Backward Pass: Compute gradients
            loss.backward()
            #function bt7sb el chain rule derivitaves
            
            # 4. Gradient Descent Optimization (Manual Update)
            with torch.no_grad(): # Don't track this operation in the computation graph
                model.W -= lr * model.W.grad # W_new = W_old - LR * dL/dW
                model.b -= lr * model.b.grad # b_new = b_old - LR * dL/db
                
            # Accumulate metrics
            epoch_train_loss += loss.item() * len(X_batch)
            epoch_train_acc += calculate_binary_accuracy(y_pred, y_batch) * len(X_batch)
            
        avg_train_loss = epoch_train_loss / len(train_loader.dataset)
        avg_train_acc = epoch_train_acc / len(train_loader.dataset)
        
        # --- VALIDATION PHASE ---
        epoch_val_loss, epoch_val_acc = 0.0, 0.0
        with torch.no_grad(): # Disable gradient tracking for evaluation
            for X_batch_val, y_batch_val in val_loader:
                X_batch_val, y_batch_val = X_batch_val.to(DEVICE), y_batch_val.to(DEVICE)
                
                y_pred_val = model.forward(X_batch_val)
                loss_val = loss_fn(y_pred_val, y_batch_val)
                
                # Accumulate metrics
                epoch_val_loss += loss_val.item() * len(X_batch_val)
                epoch_val_acc += calculate_binary_accuracy(y_pred_val, y_batch_val) * len(X_batch_val)
                
        avg_val_loss = epoch_val_loss / len(val_loader.dataset)
        avg_val_acc = epoch_val_acc / len(val_loader.dataset)
        
        # Store for plotting
        train_losses.append(avg_train_loss)
        val_losses.append(avg_val_loss)
        train_accuracies.append(avg_train_acc)
        val_accuracies.append(avg_val_acc)
        
        print(f'Epoch {epoch+1:2d}/{epochs} | Train Loss: {avg_train_loss:.4f}, Acc: {avg_train_acc:.4f} | Val Loss: {avg_val_loss:.4f}, Acc: {avg_val_acc:.4f}')
        
    return train_losses, val_losses, train_accuracies, val_accuracies

In [None]:
def evaluate_binary_model(model, test_loader, loss_fn):
    """
    Evaluates final model performance on the Test Set.
    """
    all_preds, all_targets = [], []
    total_loss = 0.0
    
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            X_batch, y_batch = X_batch.to(DEVICE), y_batch.to(DEVICE)
            
            y_pred = model.forward(X_batch)
            loss = loss_fn(y_pred, y_batch)
            
            total_loss += loss.item() * len(X_batch)
            y_pred_class = (y_pred >= 0.5).float()
            
            # Move results to CPU for scikit-learn metrics
            all_preds.extend(y_pred_class.cpu().numpy().flatten())
            all_targets.extend(y_batch.cpu().numpy().flatten())
            
    avg_loss = total_loss / len(test_loader.dataset)
    conf_matrix = confusion_matrix(all_targets, all_preds)
    final_acc = (conf_matrix[0, 0] + conf_matrix[1, 1]) / len(all_targets)
    
    return final_acc, conf_matrix, avg_loss

In [None]:
# ----------------------------------------------------------------------
# 4. PLOTTING FUNCTION
# ----------------------------------------------------------------------

def plot_metrics(train_metrics, val_metrics, title, ylabel):
    """Generates a plot for tracking convergence."""
    plt.figure(figsize=(10, 5))
    plt.plot(train_metrics, label=f'Training {ylabel}')
    plt.plot(val_metrics, label=f'Validation {ylabel}')
    plt.title(title)
    plt.xlabel('Epoch')
    plt.ylabel(ylabel)
    plt.legend()
    plt.grid(True)
    plt.show()

In [None]:
# ----------------------------------------------------------------------
# 5. EXECUTION BLOCK
# ----------------------------------------------------------------------

if __name__ == '__main__':
    # 1. Load and prepare binary data (0s and 1s only)
    train_loader_binary, val_loader_binary, test_loader_binary = get_binary_dataloaders()
    
    # 2. Instantiate the model
    model_binary = BinaryLogisticRegression(INPUT_SIZE)

    # 3. Train the model
    train_losses, val_losses, train_accuracies, val_accuracies = train_model_binary(
        model=model_binary,
        train_loader=train_loader_binary,
        val_loader=val_loader_binary,
        loss_fn=custom_binary_cross_entropy,
        lr=LEARNING_RATE,
        epochs=NUM_EPOCHS
    )

    # 4. Evaluate on the Test Set
    test_acc, conf_matrix, test_loss = evaluate_binary_model(
        model=model_binary,
        test_loader=test_loader_binary,
        loss_fn=custom_binary_cross_entropy
    )

In [3]:
 # 5. Output Results and Visualizations
    
print("\n" + "="*50)
print(f"FINAL TEST SET RESULTS (Binary Classification)")
print("="*50)
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_acc*100:.2f}%")
print("\nConfusion Matrix (Rows=True Class, Cols=Predicted Class):")
    # Rows: 0 (True Negatives), 1 (True Positives)
    # Columns: 0 (Predicted Negatives), 1 (Predicted Positives)
    # The matrix should be close to: [[TN, FP], [FN, TP]]
print(conf_matrix)
print("="*50)

    # Plotting Loss and Accuracy Curves
plot_metrics(train_losses, val_losses, 
                 'Binary Logistic Regression Loss Over Epochs', 'Loss')
plot_metrics(train_accuracies, val_accuracies, 
                 'Binary Logistic Regression Accuracy Over Epochs', 'Accuracy')


FINAL TEST SET RESULTS (Binary Classification)


NameError: name 'test_loss' is not defined