In [241]:
import sklearn.preprocessing as pre
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, random_split
import numpy as np
torch.manual_seed(42)
np.random.seed(42)
import pandas as pd

In [205]:
data = np.genfromtxt('data/train.csv', delimiter=',')
print("data shape", data.shape)

X = data[1:,1:-1]
print("X shape:", X.shape)

y = data[1:, -1].reshape(-1,1)
print("y shape:", y.shape)



data shape (2191, 13)
X shape: (2190, 11)
y shape: (2190, 1)
(731, 12)


In [120]:
def preprocess(X, Y, test_size=0.10, random_state=22):
    # 1: Scale and Center the data
    scaler = pre.StandardScaler().fit(X)
    X = scaler.transform(X)
    print("x-std: ", X.std())
    print("x-mean: ", X.mean())
    x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=test_size, random_state=random_state)
    print("\nX-train shape:", x_train.shape)
    print("Y-train shape:", y_train.shape)
    print("\nX-test shape:", x_test.shape)
    print("Y-test shape:", y_test.shape)
    return x_train, y_train, x_test, y_test, scaler

x_train, y_train, x_test, y_test, scaler = preprocess(X, y)

x-std:  0.9999999999999996
x-mean:  4.307971349696023e-14

X-train shape: (1971, 11)
Y-train shape: (1971, 1)

X-test shape: (219, 11)
Y-test shape: (219, 1)


In [38]:
def Kernel_SVM_train(X, y, lbl):
    # Map labels: if equal to lbl then -1, else 1, and reshape to column vector.
    y = np.where(y == lbl, -1, 1).reshape(-1, 1)
    g = 1/(X.shape[0] * X.var())
    alpha = 1e-5
    u1 = 1
    # Compute the polynomial kernel (X is assumed to be shape: (n_samples, n_features))
    K = polynomial_kernel(X, degree=2, gamma=g)
    
    # Create diagonal matrix with y values
    Y = np.diagflat(y.ravel())
    
    # Initialize L with dynamic size (n_samples, 1)
    n_samples = X.shape[0]
    L = np.random.randn(n_samples, 1)
    
    for i in range(180000):
        grad = grad_f(L, Y, K, u1, y)
        L = L - alpha * grad
        if i % 20 == 0:
            print(f"Iteration {i}, grad = {np.sum(grad)}")
    return K, L, g

def Kernel_SVM_predict(K, L, y, lbl):
    y = np.where(y == lbl, -1, 1).reshape(-1, 1)
    # Compute predictions
    y_hat = K @ (L * y)
    # Find indices for negative and positive classes
    neg_idx = np.where(y.ravel() == -1)[0]
    pos_idx = np.where(y.ravel() == 1)[0]

    neg_side = np.max(y_hat[neg_idx])
    pos_side = np.min(y_hat[pos_idx])
    b = (pos_side + neg_side) / 2

    y_hat = np.sign(K @ (L * y) + b)
    print("Accuracy on Training Data: ", accuracy_score(y, y_hat))
    return y_hat, b

def grad_f(L, Y, K, u1, y):
    n = L.shape[0]
    dL = np.zeros((n, 1))
    mask = L.flatten() < 0  
    dL[mask, 0] = -1
    df = Y @ K @ Y @ L + 2 * u1 * (L.T @ y) * y - np.ones((n, 1)) + dL
    return df

In [45]:
print("Starting Kernel_SVM...")
K, L, g = Kernel_SVM_train(x_train, y_train, 0)
preds = Kernel_SVM_predict(K, L, y_train, 0)
print("Success")

Starting Kernel_SVM...
Iteration 0, grad = -47877.60021500215
Iteration 20, grad = -47823.39269712348
Iteration 40, grad = -47769.249544556485
Iteration 60, grad = -47715.17068087381
Iteration 80, grad = -47661.15602974004
Iteration 100, grad = -47607.20551491025
Iteration 120, grad = -47553.31906022929
Iteration 140, grad = -47499.49658963331
Iteration 160, grad = -47445.7380271485
Iteration 180, grad = -47392.04329689085
Iteration 200, grad = -47338.41232306704
Iteration 220, grad = -47284.84502997334
Iteration 240, grad = -47231.34134199632
Iteration 260, grad = -47177.90118361158
Iteration 280, grad = -47124.52447938566
Iteration 300, grad = -47071.21115397329
Iteration 320, grad = -47017.96113211938
Iteration 340, grad = -46964.77433865848
Iteration 360, grad = -46911.65069851343
Iteration 380, grad = -46858.59013669715
Iteration 400, grad = -46805.59257831097


KeyboardInterrupt: 

In [52]:
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.metrics.pairwise import polynomial_kernel

def Kernel_SVM_train(X, y, lbl):
    """
    Train a simple Kernel SVM using gradient descent on a polynomial kernel.

    Parameters
    ----------
    X   : np.ndarray of shape (n_samples, n_features)
          Training features.
    y   : np.ndarray of shape (n_samples,)
          Class labels corresponding to each sample in X.
    lbl : The specific class label we want to treat as -1; others become +1.

    Returns
    -------
    K : np.ndarray
        The (n_samples x n_samples) polynomial kernel matrix.
    L : np.ndarray
        The learned weight vector (dual variables).
    g : float
        The gamma used in the polynomial kernel.
    """
    # Vectorize label mapping: if y == lbl => -1, else => 1
    y = np.where(y == lbl, -1, 1).reshape(-1, 1)
    
    # Kernel scale (gamma)
    g = 1 / (X.shape[0] * X.var())
    
    # Hyperparameters
    alpha = 1e-4
    u1 = 1
    
    # Compute the polynomial kernel
    K = polynomial_kernel(X, degree=2, gamma=g)
    
    # Initialize L randomly
    n_samples = X.shape[0]
    L = np.random.randn(n_samples, 1)
    
    # Precompute y*y^T for elementwise multiplication with K
    yyT = y @ y.T

    # Gradient descent
    for i in range(180000):
        # Instead of building a mask and setting dL, just do a direct where
        dL = np.where(L < 0, -1.0, 0.0)
        
        # Compute the gradient
        #  Y @ K @ Y @ L  becomes  (y*y^T * K) @ L
        #  plus the 2*u1*(L.T@y)*y term
        #  minus the ones vector
        grad = (K * yyT) @ L + 2 * u1 * (L.T @ y) * y - np.ones((n_samples, 1)) + dL.reshape(-1, 1)
        
        # Update
        L = L - alpha * grad
        
        # Print every 20 iterations to keep track
        if i % 200 == 0:
            print(f"Iteration {i}, grad sum = {grad.sum():.6f}")
    
    return K, L, g


def Kernel_SVM_predict(K, L, y, lbl):
    """
    Predict using the trained Kernel SVM.

    Parameters
    ----------
    K   : np.ndarray of shape (n_samples, n_samples)
          Polynomial kernel matrix (on the same data used in training).
    L   : np.ndarray of shape (n_samples, 1)
          Learned weight vector (dual variables).
    y   : np.ndarray of shape (n_samples,)
          Class labels of the training data.
    lbl : The specific label that was mapped to -1 in training.

    Returns
    -------
    y_hat  : np.ndarray of shape (n_samples,)
             Predicted labels (-1 or +1).
    b      : float
             The bias term calculated from margin midpoint.
    """
    # Vectorize label mapping again
    y = np.where(y == lbl, -1, 1).reshape(-1, 1)

    # Compute raw decision function
    decision = K @ (L * y)
    
    # Locate negative and positive samples
    neg_idx = np.where(y.ravel() == -1)[0]
    pos_idx = np.where(y.ravel() == 1)[0]
    
    # Margin midpoint
    neg_side = decision[neg_idx].max()
    pos_side = decision[pos_idx].min()
    b = (pos_side + neg_side) / 2
    
    # Final prediction
    y_hat = np.sign(decision + b).ravel()

    # Compute accuracy
    print("Accuracy on Training Data:", accuracy_score(y, y_hat.reshape(-1, 1)))
    return y_hat, b


In [53]:
print("Starting Kernel_SVM...")
K, L, g = Kernel_SVM_train(x_train, y_train, 0)
preds = Kernel_SVM_predict(K, L, y_train, 0)
print("Success")

Starting Kernel_SVM...
Iteration 0, grad sum = 19309.262284
Iteration 200, grad sum = -2184.109787
Iteration 400, grad sum = -2164.150517
Iteration 600, grad sum = -2139.503489
Iteration 800, grad sum = -2118.321001
Iteration 1000, grad sum = -2095.694003
Iteration 1200, grad sum = -2069.992289
Iteration 1400, grad sum = -2052.899638
Iteration 1600, grad sum = -2027.280203
Iteration 1800, grad sum = -2015.127815
Iteration 2000, grad sum = -1975.557943
Iteration 2200, grad sum = -1955.349936
Iteration 2400, grad sum = -1933.530107
Iteration 2600, grad sum = -1909.576772
Iteration 2800, grad sum = -1885.151927
Iteration 3000, grad sum = -1867.253128
Iteration 3200, grad sum = -1843.702659
Iteration 3400, grad sum = -1831.087134
Iteration 3600, grad sum = -1813.964884
Iteration 3800, grad sum = -1801.835825
Iteration 4000, grad sum = -1790.536429
Iteration 4200, grad sum = -1782.118559
Iteration 4400, grad sum = -1766.486988
Iteration 4600, grad sum = -1749.408906
Iteration 4800, grad sum

In [54]:
def Kernel_SVM_predict_test(x_test, x_train, L, y_train, y_test, gamma, degree=2):
    y_test = np.where(y_test == 0, -1, 1).reshape(-1, 1)
    y_train = np.where(y_train == 0, -1, 1).reshape(-1, 1)
    # Compute the kernel matrix between test and training data
    K_test = polynomial_kernel(x_test, x_train, degree=degree, gamma=gamma)
    
    # Compute decision values for the training data (to estimate bias b)
    K_train = polynomial_kernel(x_train, x_train, degree=degree, gamma=gamma)
    decision_train = K_train @ (L * y_train)
    
    # Estimate the bias b using training points
    neg_idx = np.where(y_train.ravel() == -1)[0]
    pos_idx = np.where(y_train.ravel() == 1)[0]
    
    # For a simple bias estimation, average the max decision value for negative class 
    # and the min decision value for positive class
    neg_side = np.max(decision_train[neg_idx])
    pos_side = np.min(decision_train[pos_idx])
    b = (pos_side + neg_side) / 2
    
    # Predict the labels for test data
    decision_test = K_test @ (L * y_train)
    y_pred = np.sign(decision_test + b)
    print("K-SVM accuracy on test data: ", accuracy_score(y_test, y_pred))
    return y_pred

pred = Kernel_SVM_predict_test(x_test, x_train, L, y_train, y_test, g, degree=2)

K-SVM accuracy on test data:  0.771689497716895


In [60]:
from sklearn.svm import SVC

# Create a kernel SVM with a radial basis function (RBF) kernel
model = SVC(kernel='rbf', C=1.0, gamma='scale')

y_train_transform = np.where(y_train == 0, -1, 1).ravel()
# Fit the model to training data
model.fit(x_train, y_train_transform)

y_test_transform = np.where(y_test == 0, -1, 1).ravel()

# Predict
y_pred = model.predict(x_test)
print(accuracy_score(y_test, y_pred))

0.771689497716895


In [232]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)  # X_data: your original NumPy array
X_tensor = torch.from_numpy(X_scaled.astype(np.float32))
y_tensor = torch.from_numpy(y).float()  # shape [n_samples, 1]

# Use a TensorDataset & DataLoader for easier batching
dataset = TensorDataset(X_tensor, y_tensor)

train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

In [233]:
class BinaryClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.net = nn.Sequential(
            nn.Linear(11, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(p=0.3),
            
            nn.Linear(64, 32),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.Dropout(p=0.3),
            
            nn.Linear(32, 1)  # Single output unit
        )
        
    def forward(self, x):
        return self.net(x)

In [234]:
model = BinaryClassifier()

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4)

In [235]:
num_epochs = 1000
patience = 5  # Early stopping patience
best_val_loss = float('inf')
epochs_no_improve = 0

train_losses = []
val_losses = []

for epoch in range(num_epochs):
    #####################
    #  TRAINING PHASE  #
    #####################
    model.train()
    running_train_loss = 0.0

    for X_batch, y_batch in train_loader:
        # Forward pass
        logits = model(X_batch)
        loss = criterion(logits, y_batch)
        
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        running_train_loss += loss.item() * X_batch.size(0)
    
    epoch_train_loss = running_train_loss / len(train_loader.dataset)
    train_losses.append(epoch_train_loss)
    
    #######################
    #  VALIDATION PHASE  #
    #######################
    model.eval()
    running_val_loss = 0.0

    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            logits = model(X_batch)
            loss = criterion(logits, y_batch)
            running_val_loss += loss.item() * X_batch.size(0)
    
    epoch_val_loss = running_val_loss / len(val_loader.dataset)
    val_losses.append(epoch_val_loss)
    
    print(f"Epoch [{epoch+1}/{num_epochs}] "
          f"Train Loss: {epoch_train_loss:.4f}, "
          f"Val Loss: {epoch_val_loss:.4f}")
    
    # Early stopping check
    if epoch_val_loss < best_val_loss:
        best_val_loss = epoch_val_loss
        epochs_no_improve = 0
        best_model_state = model.state_dict()  # save best model
    else:
        epochs_no_improve += 1
    
    if epochs_no_improve >= patience:
        print("Early stopping triggered!")
        break

# Load the best model weights
model.load_state_dict(best_model_state)

Epoch [1/1000] Train Loss: 0.6390, Val Loss: 0.5551
Epoch [2/1000] Train Loss: 0.4815, Val Loss: 0.4457
Epoch [3/1000] Train Loss: 0.4232, Val Loss: 0.3942
Epoch [4/1000] Train Loss: 0.3830, Val Loss: 0.3644
Epoch [5/1000] Train Loss: 0.3676, Val Loss: 0.3554
Epoch [6/1000] Train Loss: 0.3625, Val Loss: 0.3507
Epoch [7/1000] Train Loss: 0.3520, Val Loss: 0.3458
Epoch [8/1000] Train Loss: 0.3450, Val Loss: 0.3431
Epoch [9/1000] Train Loss: 0.3471, Val Loss: 0.3408
Epoch [10/1000] Train Loss: 0.3463, Val Loss: 0.3408
Epoch [11/1000] Train Loss: 0.3308, Val Loss: 0.3385
Epoch [12/1000] Train Loss: 0.3410, Val Loss: 0.3379
Epoch [13/1000] Train Loss: 0.3424, Val Loss: 0.3371
Epoch [14/1000] Train Loss: 0.3518, Val Loss: 0.3379
Epoch [15/1000] Train Loss: 0.3354, Val Loss: 0.3409
Epoch [16/1000] Train Loss: 0.3388, Val Loss: 0.3399
Epoch [17/1000] Train Loss: 0.3364, Val Loss: 0.3389
Epoch [18/1000] Train Loss: 0.3411, Val Loss: 0.3399
Early stopping triggered!


<All keys matched successfully>

In [236]:
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for X_batch, y_batch in val_loader:
        logits = model(X_batch)
        # Sigmoid to get probabilities
        probs = torch.sigmoid(logits)
        print(probs)
        print(type(probs))
        print(probs.shape)
        # Convert to predicted class (threshold=0.5)
        preds = (probs >= 0.5).float()
        # Compare to ground truth
        correct += (preds == y_batch).sum().item()
        total += y_batch.size(0)

accuracy = correct / total
print(f"Validation Accuracy: {accuracy:.4f}")

tensor([[0.9385],
        [0.8440],
        [0.9693],
        [0.9715],
        [0.9914],
        [0.9846],
        [0.9181],
        [0.8434],
        [0.8877],
        [0.7065],
        [0.1418],
        [0.9476],
        [0.8413],
        [0.8831],
        [0.9553],
        [0.9658],
        [0.9550],
        [0.9891],
        [0.3160],
        [0.2305],
        [0.9390],
        [0.9626],
        [0.4737],
        [0.9338],
        [0.9691],
        [0.7628],
        [0.9480],
        [0.9656],
        [0.2833],
        [0.9314],
        [0.9724],
        [0.6444],
        [0.9639],
        [0.9814],
        [0.9276],
        [0.1053],
        [0.9927],
        [0.7850],
        [0.9571],
        [0.7740],
        [0.1799],
        [0.9794],
        [0.9872],
        [0.2485],
        [0.7727],
        [0.9010],
        [0.8816],
        [0.9651],
        [0.9716],
        [0.1996],
        [0.2129],
        [0.8442],
        [0.3395],
        [0.8663],
        [0.9733],
        [0

In [246]:
test_X = np.genfromtxt('data/test.csv', delimiter=',')
print(test_X.shape)

test_data = np.genfromtxt('data/test.csv', delimiter=',', skip_header=1)
print("\nTest data shape:", test_data.shape)

# Again, first column = ID, the rest are features
test_ids = test_data[:, 0].astype(int)
X_test = test_data[:, 1:]
X_test = scaler.fit_transform(X_test)
x_test_tensor = torch.from_numpy(X_test).float()

(731, 12)

Test data shape: (730, 12)


In [247]:
model.eval()

with torch.no_grad():
    # Get raw outputs (logits)
    logits = model(x_test_tensor)
    # Apply sigmoid to get probabilities
    probs = torch.sigmoid(logits)
    # Convert to numpy if needed
    probs_np = probs.cpu().numpy()

print("Probabilities shape:", probs_np.shape)
print("First 10 probabilities:", probs_np[:10])

Probabilities shape: (730, 1)
First 10 probabilities: [[0.9845109 ]
 [0.99056983]
 [0.9563838 ]
 [0.127108  ]
 [0.08427977]
 [0.80552214]
 [0.9055211 ]
 [0.9806857 ]
 [0.959904  ]
 [0.808076  ]]


In [249]:
submission_df = pd.DataFrame({
        'id': test_ids,
        'rainfall': probs_np.ravel()
    })
submission_df.to_csv('submission.csv', index=False)
print("\nSubmission file 'submission.csv' created!")


Submission file 'submission.csv' created!
