In [5]:
from __future__ import division
import numpy as np
import sys

X_train = np.genfromtxt('X_train.csv', delimiter=",")
y_train = np.genfromtxt('y_train.csv')
X_test = np.genfromtxt('X_test_all.csv', delimiter=",")



In [6]:
## can make more functions if required


def pluginClassifier(X_train, y_train, X_test):    
  # this function returns the required output 
   # Ensure shapes
    if y_train.ndim > 1:
        y_train = y_train.ravel()

    # Get unique classes (keeps label values, e.g., 0..9)
    classes = np.unique(y_train).astype(int)
    K = len(classes)
    n, d = X_train.shape

    # Allocate dicts for parameters
    priors = {}
    means = {}
    covs_inv = {}     # store inverses for speed
    covs_logdet = {}  # store log-determinants for log-density
    eps = 1e-6        # covariance regularization

    # Estimate per-class parameters (MLE)
    for k in classes:
        Xk = X_train[y_train == k]
        Nk = Xk.shape[0]

        # Prior
        priors[k] = Nk / float(n)

        # Mean
        mu_k = Xk.mean(axis=0)
        means[k] = mu_k

        # Full covariance (MLE uses 1/Nk factor; np.cov default is 1/(Nk-1))
        # We'll compute manually to control the divisor:
        diff = Xk - mu_k
        # (Nk x d)^T @ (Nk x d) -> (d x d)
        Sigma_k = (diff.T @ diff) / float(Nk)
        # Regularize for numerical stability
        Sigma_k += eps * np.eye(d)

        # Precompute inverse and logdet for the Gaussian log-likelihood
        try:
            sign, logdet = np.linalg.slogdet(Sigma_k)
            if sign <= 0:
                # Fallback if numerical issues
                # Increase regularization and recompute
                Sigma_k += (10 * eps) * np.eye(d)
                sign, logdet = np.linalg.slogdet(Sigma_k)
        except np.linalg.LinAlgError:
            # As a last resort, add more regularization
            Sigma_k += (100 * eps) * np.eye(d)
            sign, logdet = np.linalg.slogdet(Sigma_k)

        Sigma_inv_k = np.linalg.inv(Sigma_k)

        covs_inv[k] = Sigma_inv_k
        covs_logdet[k] = logdet

    # Predict for X_test using log posterior:
    # log p(y=k|x) ∝ log pi_k - 0.5[(x-μ)^T Σ^{-1} (x-μ) + log det Σ + d log(2π)]
    const_term = d * np.log(2.0 * np.pi)

    y_pred = np.empty(X_test.shape[0], dtype=int)
    for i, x in enumerate(X_test):
        best_k = None
        best_score = -np.inf
        for k in classes:
            mu = means[k]
            inv = covs_inv[k]
            logdet = covs_logdet[k]

            diff = x - mu
            # Mahalanobis term
            mquad = diff @ inv @ diff
            log_likelihood = -0.5 * (mquad + logdet + const_term)

            log_prior = np.log(priors[k] + 1e-12)  # safe log
            score = log_prior + log_likelihood

            if score > best_score:
                best_score = score
                best_k = k

        y_pred[i] = best_k

    return y_pred


In [7]:
final_outputs = pluginClassifier(X_train, y_train, X_test) # assuming final_outputs is returned from function

In [8]:
np.savetxt("y_pred.csv", final_outputs, fmt="%d", delimiter=",")
print("Predictions saved to y_pred.csv")

Predictions saved to y_pred.csv
