In [1]:
# main_script.py

# --- 1. Import necessary libraries ---
import torch
import pandas as pd
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
import os

# --- 2. Data Generation and Loading ---
# This section uses the same function as before to ensure the same dataset is used.

def generate_or_load_data(filename='binary_data.csv'):
    """
    Generates a synthetic binary classification dataset if the file doesn't exist.
    Loads the data from the CSV file into a pandas DataFrame.
    """
    if not os.path.exists(filename):
        print(f"'{filename}' not found. Generating a new dataset.")
        # Generate a dataset with 100 samples, 2 input features, and 2 classes.
        X, y = make_classification(
            n_samples=100,
            n_features=2,
            n_informative=2,
            n_redundant=0,
            n_classes=2,
            random_state=1
        )
        # Create a DataFrame and save it to CSV.
        df = pd.DataFrame(X, columns=['feature_1', 'feature_2'])
        df['label'] = y
        df.to_csv(filename, index=False)
        print(f"Dataset saved to '{filename}'.")
    else:
        print(f"Loading existing dataset from '{filename}'.")

    # Load the data from the CSV file.
    return pd.read_csv(filename)

# --- 3. Activation and Loss Functions (Manual Implementation) ---
# We use built-in torch.relu, but define sigmoid and BCE loss manually.

def sigmoid(z):
    """Sigmoid activation function for the output layer."""
    return 1 / (1 + torch.exp(-z))

def binary_cross_entropy_loss(y_true, y_pred):
    """
    Binary Cross-Entropy loss function.
    We add a small epsilon value to prevent log(0) which results in NaN.
    """
    epsilon = 1e-7
    # Clamp predictions to avoid log(0) or log(1) issues
    y_pred = torch.clamp(y_pred, epsilon, 1 - epsilon)
    # BCE formula
    loss = -torch.mean(y_true * torch.log(y_pred) + (1 - y_true) * torch.log(1 - y_pred))
    return loss

# --- 4. Main Execution Block ---
if __name__ == "__main__":
    # --- Data Preparation ---
    # Load data using the function defined above.
    data_df = generate_or_load_data()
    X = data_df[['feature_1', 'feature_2']].values
    y = data_df['label'].values

    # Split data into training (80%) and testing (20%) sets.
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # --- Convert to PyTorch Tensors and Move to Device ---
    # Set device to GPU if available, otherwise CPU.
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"\nUsing device: '{device}'")

    # Convert numpy arrays to PyTorch tensors.
    X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
    y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1).to(device)
    X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
    y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1).to(device)

    # --- Hyperparameters ---
    n_input_features = X_train_tensor.shape[1] # Should be 2
    n_hidden_units = 4
    n_output_units = 1
    learning_rate = 0.1
    epochs = 100

    # --- Model Initialization (Manual 2-4-1 Architecture) ---
    # Layer 1: Input (2) to Hidden (4)
    W1 = torch.randn(n_input_features, n_hidden_units, device=device, requires_grad=True, dtype=torch.float32)
    b1 = torch.zeros(1, n_hidden_units, device=device, requires_grad=True, dtype=torch.float32)

    # Layer 2: Hidden (4) to Output (1)
    W2 = torch.randn(n_hidden_units, n_output_units, device=device, requires_grad=True, dtype=torch.float32)
    b2 = torch.zeros(1, n_output_units, device=device, requires_grad=True, dtype=torch.float32)

    print("\n--- Starting Training for 2-4-1 ANN ---")
    # --- Training Loop ---
    for epoch in range(epochs):
        # --- Forward Pass ---
        # 1. First linear layer (input to hidden)
        Z1 = X_train_tensor @ W1 + b1
        # 2. First activation (ReLU)
        A1 = torch.relu(Z1)
        # 3. Second linear layer (hidden to output)
        Z2 = A1 @ W2 + b2
        # 4. Final activation (Sigmoid for binary classification)
        y_pred = sigmoid(Z2)

        # --- Calculate Loss ---
        loss = binary_cross_entropy_loss(y_train_tensor, y_pred)

        # --- Backward Pass ---
        # This single call computes gradients for all tensors with requires_grad=True
        # (W1, b1, W2, b2) that were part of the loss computation.
        loss.backward()

        # --- Manual Weight Update (Gradient Descent) ---
        # Use torch.no_grad() to ensure these updates are not tracked by autograd.
        with torch.no_grad():
            # Update weights and biases for both layers
            W1 -= learning_rate * W1.grad
            b1 -= learning_rate * b1.grad
            W2 -= learning_rate * W2.grad
            b2 -= learning_rate * b2.grad

            # --- Zero the Gradients ---
            # This is critical to prevent gradient accumulation across epochs.
            W1.grad.zero_()
            b1.grad.zero_()
            W2.grad.zero_()
            b2.grad.zero_()

        # Print loss every 10 epochs
        if (epoch + 1) % 10 == 0:
            print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss.item():.4f}")

    print("--- Training Finished ---\n")

    # --- Evaluation on Test Set ---
    with torch.no_grad():
        # Perform a forward pass with the trained weights on the test data
        Z1_test = X_test_tensor @ W1 + b1
        A1_test = torch.relu(Z1_test)
        Z2_test = A1_test @ W2 + b2
        test_pred_probs = sigmoid(Z2_test)

        # Convert probabilities to binary predictions (0 or 1)
        test_pred_labels = (test_pred_probs >= 0.5).float()

        # Calculate accuracy
        accuracy = (test_pred_labels == y_test_tensor).sum().item() / len(y_test_tensor)
        print(f"Accuracy on test set: {accuracy * 100:.2f}%")

'binary_data.csv' not found. Generating a new dataset.
Dataset saved to 'binary_data.csv'.

Using device: 'cpu'

--- Starting Training for 2-4-1 ANN ---
Epoch 10/100, Loss: 0.4951
Epoch 20/100, Loss: 0.3661
Epoch 30/100, Loss: 0.2967
Epoch 40/100, Loss: 0.2522
Epoch 50/100, Loss: 0.2183
Epoch 60/100, Loss: 0.1934
Epoch 70/100, Loss: 0.1733
Epoch 80/100, Loss: 0.1568
Epoch 90/100, Loss: 0.1434
Epoch 100/100, Loss: 0.1322
--- Training Finished ---

Accuracy on test set: 100.00%
