# Binary classification Model

### train / test split

In [234]:
import numpy as np
import torch
X_train = np.loadtxt("datasets/X_train_imputed_scaled.csv", delimiter=",", skiprows=1)
X_test = np.loadtxt("datasets/X_test_imputed_scaled.csv", delimiter=",", skiprows=1)
y_train = np.loadtxt("datasets/y_train.csv", delimiter=",", skiprows=1)
y_test = np.loadtxt("datasets/y_test.csv", delimiter=",", skiprows=1)

X_train = np.delete(X_train, 0, axis=1)
X_test = np.delete(X_test, 0, axis=1)
y_train = np.delete(y_train, 0, axis=1)
y_test = np.delete(y_test, 0, axis=1)

X_train = torch.from_numpy(X_train).type(torch.float32).squeeze()
X_test = torch.from_numpy(X_test).type(torch.float32).squeeze()
y_train = torch.from_numpy(y_train).type(torch.float32).squeeze()
y_test = torch.from_numpy(y_test).type(torch.float32).squeeze()


In [235]:
from sklearn.model_selection import train_test_split

len(X_train), len(X_test), len(y_train), len(y_test)

(10018, 2648, 10018, 2648)

In [236]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

(torch.Size([10018, 7]),
 torch.Size([2648, 7]),
 torch.Size([10018]),
 torch.Size([2648]))

In [237]:
type(X_train), X_train.dtype

(torch.Tensor, torch.float32)

In [238]:
import torch
from torch import nn

device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cpu'

In [239]:
torch.cuda.is_available()

False

### Construct Model Class

In [240]:
class ExoplanetsV0(nn.Module):
    def __init__(self, input_features, output_features, hidden_units=8):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(in_features=input_features, out_features=hidden_units),
            nn.ReLU(),
            nn.Linear(in_features=hidden_units, out_features=int(hidden_units)),
            nn.ReLU(),
            nn.Linear(in_features=int(hidden_units), out_features=int(hidden_units)),
            nn.ReLU(),
            nn.Linear(in_features=int(hidden_units), out_features=output_features),
            #nn.Sigmoid()
        )

    def forward(self, x):
        return self.layers(x)


# Create an instance of model
model_0 = ExoplanetsV0(input_features=X_test.shape[1], output_features=1, hidden_units=64).to(device)
model_0

ExoplanetsV0(
  (layers): Sequential(
    (0): Linear(in_features=7, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=64, bias=True)
    (5): ReLU()
    (6): Linear(in_features=64, out_features=1, bias=True)
  )
)

In [241]:
X_train.shape, y_train.shape

(torch.Size([10018, 7]), torch.Size([10018]))

### Loss Function and Optimizer

In [242]:
# Create a loss function for binary classification

### Tried to fix with wages-----------------------------------
weight_for_0 = len(y_test) + len(y_train) / (2 * ((y_test == 0).sum() + (y_train == 0).sum()))
weight_for_1 = len(y_test) + len(y_train) / (2 * ((y_test == 1).sum() + (y_train == 1).sum()))

pos_weight = torch.tensor([weight_for_1 / weight_for_0]).to(device)  # dla klasy 1
loss_fn = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
#loss_fn = nn.BCEWithLogitsLoss()
###----------------------------------------------------------

#loss_fn = nn.BCEWithLogitsLoss()

# Create an optimizer for binary classification
optimizer = torch.optim.SGD(params=model_0.parameters(), lr=0.01)

In [243]:
# Calculate accuracy
def accuracy_fn(y_true, y_pred):
    correct = torch.eq(y_true, y_pred).sum().item()
    acc = (correct / len(y_pred)) * 100
    return acc

# Train Model

Model outputs are raw **Logits**

We are converting logits into prediction probabilities by passing them to some kind of activation function (int this case `nn.Sigmoid()`) Then we can convert our model's prediction probabilities to **prediction labels** by taking the `argmaX_scaled()`

In [244]:
model_0.eval()
with torch.inference_mode():
    y_logits = model_0(X_test.to(device))
y_logits[:5]

tensor([[-0.0187],
        [-0.0068],
        [-0.0122],
        [-0.0120],
        [-0.0200]])

In [245]:
y_pred_probs = torch.sigmoid(y_logits)
y_pred_probs

tensor([[0.4953],
        [0.4983],
        [0.4969],
        ...,
        [0.4962],
        [0.4951],
        [0.4974]])

In [246]:
torch.round(y_pred_probs)

tensor([[0.],
        [0.],
        [0.],
        ...,
        [0.],
        [0.],
        [0.]])

In [247]:
# find predicted labels
y_preds = torch.round(y_pred_probs)

# in full
y_pred_labels = torch.round(torch.sigmoid(model_0(X_test.to(device))))

# check for equality
print(torch.eq(y_preds.squeeze(), y_pred_labels.squeeze()))

# get rid of extra dimension
y_preds.squeeze()

tensor([True, True, True,  ..., True, True, True])


tensor([0., 0., 0.,  ..., 0., 0., 0.])

In [248]:
y_test[:5]

tensor([1., 1., 0., 1., 1.])

### Building a training loop and testing loop

In [249]:
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score
torch.manual_seed(42)
torch.cuda.manual_seed(42)

# Set the number of epochs
epochs = 10000

# Tracking loss and accuracy
track_accuracy = []
track_loss = []
track_test_accuracy = []
track_test_loss = []

# Put data to target device
X_train, y_train = X_train.to(device), y_train.to(device)
X_test, y_test = X_test.to(device), y_test.to(device)

for epoch in range(epochs):
    ### Training
    model_0.train()

    # 1. Forward pass
    y_logits = model_0(X_train).squeeze()
    y_probs = torch.sigmoid(y_logits)
    y_pred = torch.round(y_probs)  # turn logits into pred probs and into pred labels

    # 2. Calculate the loss / cost
    loss = loss_fn(y_logits, y_train)

    acc = accuracy_fn(y_true=y_train.detach(), y_pred=y_pred.detach())
    precision = precision_score(y_true=y_train.detach(), y_pred=y_pred.detach(), zero_division=0) * 100
    recall = recall_score(y_true=y_train.detach(), y_pred=y_pred.detach(), zero_division=0) * 100
    f1 = f1_score(y_true=y_train.detach(), y_pred=y_pred.detach(), zero_division=0) * 100
    auc = roc_auc_score(y_true=y_train.detach(), y_score=y_probs.detach())
    track_loss.append(loss.item())
    acc = accuracy_fn(y_true=y_train, y_pred=y_pred)
    track_accuracy.append(acc)

    # 3. Optimizer zero grad
    optimizer.zero_grad()

    # 4. Backpropagation algorithm
    loss.backward()

    # 5. Gradient descent algorithm
    optimizer.step()

    ### Testing
    model_0.eval()
    with torch.inference_mode():
        # 1. Forward pass
        test_logits = model_0(X_test).squeeze()     # logits
        test_probs = torch.sigmoid(test_logits)     # Probability
        test_pred = torch.round(test_probs)         # 1 or 0

        # 2. Calculate test loss/acc

        test_loss = loss_fn(test_logits, y_test)
        track_test_loss.append(test_loss.item())

        test_acc = accuracy_fn(y_true=y_test, y_pred=test_pred)
        test_precision = precision_score(y_true=y_test, y_pred=test_pred, zero_division=0) * 100
        test_recall = recall_score(y_true=y_test, y_pred=test_pred, zero_division=0) * 100
        test_f1 = f1_score(y_true=y_test, y_pred=test_pred, zero_division=0) * 100
        test_auc = roc_auc_score(y_true=y_test, y_score=test_probs)
        track_test_accuracy.append(test_acc)

    # Print out what's happenin'
    if epoch % 100 == 0 or epoch == 9999:
        print(
            f"Epoch {epoch} \n"
            f" loss: {loss:.5f} | accuracy: {acc:.2f}% | Precision: {precision:.2f}% | Recall: {recall:.2f}% | f1: {f1:.2f}% | auc: {auc:.2f} \n"
            f"Test loss: {test_loss:.5f} | Test accuracy: {test_acc:.2f}% | Test precision: {test_precision:.2f}% | Test recall: {test_recall:.2f}% | Test f1: {test_f1:.2f}% | Test auc: {test_auc:.2f}"
            f"Epoch {epoch} | loss: {loss:.5f}, accuracy: {acc:.2f}%, Test loss: {test_loss:.5f}, Test accuracy: {test_acc:.2f}% \n")

Epoch 0 
 loss: 0.69464 | accuracy: 43.86% | Precision: 13.43% | Recall: 2.95% | f1: 4.84% | auc: 0.32 
Test loss: 0.69545 | Test accuracy: 38.75% | Test precision: 14.83% | Test recall: 3.36% | Test f1: 5.48% | Test auc: 0.29Epoch 0 | loss: 0.69464, accuracy: 43.86%, Test loss: 0.69545, Test accuracy: 38.75% 

Epoch 100 
 loss: 0.69073 | accuracy: 51.64% | Precision: 0.00% | Recall: 0.00% | f1: 0.00% | auc: 0.62 
Test loss: 0.69272 | Test accuracy: 47.17% | Test precision: 0.00% | Test recall: 0.00% | Test f1: 0.00% | Test auc: 0.62Epoch 100 | loss: 0.69073, accuracy: 51.64%, Test loss: 0.69272, Test accuracy: 47.17% 

Epoch 200 
 loss: 0.68737 | accuracy: 51.64% | Precision: 0.00% | Recall: 0.00% | f1: 0.00% | auc: 0.69 
Test loss: 0.69012 | Test accuracy: 47.17% | Test precision: 0.00% | Test recall: 0.00% | Test f1: 0.00% | Test auc: 0.70Epoch 200 | loss: 0.68737, accuracy: 51.64%, Test loss: 0.69012, Test accuracy: 47.17% 

Epoch 300 
 loss: 0.68409 | accuracy: 51.64% | Precision:

KeyboardInterrupt: 

# Tracking Accuracy

In [None]:
import matplotlib.pyplot as plt

epoch_range = range(1, epochs + 1)

plt.figure(figsize=(12, 5))

# Accuracy
plt.subplot(1, 2, 1)
plt.plot(epoch_range, track_accuracy, label='Train Accuracy')
plt.plot(epoch_range, track_test_accuracy, label='Test Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.legend()
plt.grid(True)

# Loss
plt.subplot(1, 2, 2)
plt.plot(epoch_range, track_loss, label='Train Loss')
plt.plot(epoch_range, track_test_loss, label='Test Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()


# Saving and loading already trained Model

Creating template / method for effective saving already trained model, so user don't have to wait for it to finish training before usage.

In [None]:
# Saving our PyTorch model
from pathlib import Path

# 1. Create models directory if it doesn't exist yet
MODEL_PATH = Path("models")
MODEL_PATH.mkdir(parents = True, exist_ok = True)

# 2. Create model save path
MODEL_NAME = "nn_exoplanets0.pth"
MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME

# 3. Save the model state dict
print(f"Saving model to: {MODEL_SAVE_PATH}")
torch.save(obj = model_0.state_dict(),
           f = MODEL_SAVE_PATH)

In [None]:
!ls -l models

### Loading Trained model



In [None]:
# To load in a saved state_dict we have to instantiate a new instace of our model class
loaded_model_0 = ExoplanetsV0(input_features = 18, output_features = 1, hidden_units = 64)

#Load the saved state_dict of model_0 (update the new instance with old parameters
loaded_model_0.load_state_dict(torch.load(f = MODEL_SAVE_PATH))

In [None]:
loaded_model_0.eval()
with torch.inference_mode():
    loaded_logits = loaded_model_0(X_test.to(device))
    loaded_pred_probs = torch.sigmoid(loaded_logits)
    loaded_preds = torch.round(loaded_pred_probs)

loaded_preds

In [None]:
loaded_preds == y_preds

In [None]:
loaded_preds, y_preds

# Looking for mistakes

### Balance between classes

In [None]:
print("class distribution y:")
print(f"Confirmed (1): {(y == 1).sum()} - {((y == 1).sum() / len(y)) * 100:.2f}%")
print(f"False Positive (0): {(y == 0).sum()} - {((y == 0).sum() / len(y)) * 100:.2f}%")

#### Fix 1. Using wage in loss_fn

Using wage in BCEWithLogitsLoss()

#### Fix 2. Using fake data to check if model is learning anything

In [None]:
import numpy as np
import torch
from sklearn.datasets import make_classification

# Stwórz sztuczne dane które NA PEWNO są przewidywalne
X_easy, y_easy = make_classification(
    n_samples=2000,
    n_features=10,
    n_informative=8,  # 8 użytecznych features
    n_redundant=2,  # 2 zbędne features
    n_clusters_per_class=1,
    random_state=42
)

# Konwersja do tensorów
X_easy_tensor = torch.from_numpy(X_easy).float()
y_easy_tensor = torch.from_numpy(y_easy).float()

print(f"Easy data - X: {X_easy_tensor.shape}, y: {y_easy_tensor.shape}")
print(f"Class balance: {y_easy.mean():.3f} positive")

In [None]:
# Test on easy data
def test_your_model():
    # Generate easy data (18 features like in  original data)
    X_easy, y_easy = make_classification(
        n_samples=2000,
        n_features=18,  # SAME AS IN DATA
        n_informative=16,  # Most features are useful
        n_redundant=2,
        n_clusters_per_class=1,
        random_state=42,
        flip_y=0.01  # Only 1% noise
    )

    # Convert to tensors -
    X_easy_tensor = torch.from_numpy(X_easy).float()
    y_easy_tensor = torch.from_numpy(y_easy).float()

    # Train/test split
    split_idx = int(0.8 * len(X_easy_tensor))
    X_train = X_easy_tensor[:split_idx]
    X_test = X_easy_tensor[split_idx:]
    y_train = y_easy_tensor[:split_idx]
    y_test = y_easy_tensor[split_idx:]

    # USE EXACT MODEL
    model = ExoplanetsV0(input_features=18, output_features=1, hidden_units=128)

    # USED HYPERPARAMETERS (you can also test different ones)
    optimizer = torch.optim.SGD(params=model.parameters(), lr=0.01)  # optimizer
    loss_fn = nn.BCEWithLogitsLoss()  # loss function

    print("=== TESTING YOUR MODEL ON EASY DATA ===")
    print(f"Model: {model}")
    print(f"Optimizer: SGD lr=0.01")
    print(f"Data: {X_train.shape} -> {y_train.shape}")

    # Training
    for epoch in range(100):
        model.train()
        optimizer.zero_grad()

        # Forward pass
        y_logits = model(X_train).squeeze()

        # prediction logic
        y_pred = torch.round(torch.sigmoid(y_logits))

        # Loss
        loss = loss_fn(y_logits, y_train)  # BCEWithLogitsLoss with logits
        acc = (y_pred == y_train).float().mean()

        # Backward
        loss.backward()
        optimizer.step()

        # Test every 20 epochs
        if epoch % 20 == 0:
            model.eval()
            with torch.no_grad():
                test_logits = model(X_test).squeeze()
                test_pred = torch.round(torch.sigmoid(test_logits))
                test_loss = loss_fn(test_logits, y_test)
                test_acc = (test_pred == y_test).float().mean()

            print(
                f"Epoch {epoch:3d} | Train Loss: {loss:.4f}, Acc: {acc:.4f} | Test Loss: {test_loss:.4f}, Acc: {test_acc:.4f}")

    final_test_acc = (test_pred == y_test).float().mean()
    print(f"\n FINAL TEST ACCURACY: {final_test_acc:.4f}")

    if final_test_acc > 0.85:
        print("MODEL WORKS CORRECTLY - problem is in KEPLER DATA")
        return True
    elif final_test_acc > 0.6:
        print("MODEL WORKS POORLY - possible architecture problem")
        return False
    else:
        print("MODEL DOESN'T WORK - problem in IMPLEMENTATION")
        return False


# Run test
print("Testing YOUR exact model on easy data...")
model_works = test_your_model()

### Conclusion

Problem in `Sigmoid()` in `nn.Sequential`. Sigmoid should be used to round when we want to convert logits > probs > labels, it was affecting final answer when used in wrong place.

Model can achieve ~60% accuracy on Dataset, let's try wages now, with working implementation. -> Model achieves over 90% accuracy.

### Observations

* When applied change and increased number of hidden layers
    * `Linear>ReLU>Linear>ReLU>Linear>` -> `Linear>ReLU>Linear>ReLU>Linear>ReLU>Linear` accuracy dropped to 60,5% and model wasn't learning