In [1]:
print("Hello world")

Hello world


In [None]:
import polars as pl

MONDAY_FILE = r"D:/Dissertation 2025/Results/Monday_clean.csv"
FRIDAY_FILE = r"D:/Dissertation 2025/Results/Friday_clean.csv"

sample_size = 5000   

monday = pl.read_csv(
    MONDAY_FILE,
    n_rows=sample_size,
    infer_schema_length=0,
    ignore_errors=True
)

friday = pl.read_csv(
    FRIDAY_FILE,
    n_rows=sample_size,
    infer_schema_length=0,
    ignore_errors=True
)

print(monday.shape, friday.shape)


(5000, 81) (5000, 81)


In [None]:
label_cols = {"Label", "y_binary", "y_family", "day"}

def convert_to_numeric(df):
    return df.with_columns([
        pl.col(c).cast(pl.Float64, strict=False)
        for c in df.columns
        if c not in label_cols
    ])

monday_num = convert_to_numeric(monday)
friday_num = convert_to_numeric(friday)

num_cols = [c for c in monday_num.columns if c not in label_cols]

# Converting Polars â†’ NumPy safely
import numpy as np
X = np.nan_to_num(monday_num.select(num_cols).to_numpy(), nan=0.0)
Y = np.nan_to_num(friday_num.select(num_cols).to_numpy(), nan=0.0)

print("Shape X:", X.shape)
print("Shape Y:", Y.shape)


Shape X: (5000, 77)
Shape Y: (5000, 77)


In [4]:
def rbf_mmd_safe(X, Y, gamma=1.0, batch_size=500):
    n = X.shape[0]
    m = Y.shape[0]

    XX = 0.0
    YY = 0.0
    XY = 0.0

    # Compute XX
    for i in range(0, n, batch_size):
        Xb = X[i:i+batch_size]
        dists = np.sum((Xb[:, None, :] - X[None, :, :]) ** 2, axis=2)
        XX += np.sum(np.exp(-gamma * dists))

    # Compute YY
    for i in range(0, m, batch_size):
        Yb = Y[i:i+batch_size]
        dists = np.sum((Yb[:, None, :] - Y[None, :, :]) ** 2, axis=2)
        YY += np.sum(np.exp(-gamma * dists))

    # Compute XY
    for i in range(0, n, batch_size):
        Xb = X[i:i+batch_size]
        dists = np.sum((Xb[:, None, :] - Y[None, :, :]) ** 2, axis=2)
        XY += np.sum(np.exp(-gamma * dists))

    # Normalize
    XX /= (n * n)
    YY /= (m * m)
    XY /= (n * m)

    mmd = XX + YY - 2 * XY
    return mmd


In [None]:
gamma = 1.0 / X.shape[1]   

mmd_value = rbf_mmd_safe(X, Y, gamma=gamma, batch_size=300)
print("\nRBF MMD between Monday and Friday (approx):", mmd_value)


  dists = np.sum((Xb[:, None, :] - X[None, :, :]) ** 2, axis=2)
  dists = np.sum((Yb[:, None, :] - Y[None, :, :]) ** 2, axis=2)
  dists = np.sum((Xb[:, None, :] - Y[None, :, :]) ** 2, axis=2)



RBF MMD between Monday and Friday (approx): 0.00047952097333077995
