In [1]:
import numpy as np
import pandas as pd
import os

In [2]:
# updates intervals based on margin
def adjust_intervals(intervals, margin):
    adjusted_intervals = intervals + np.array([margin, -margin])
    mask = adjusted_intervals[:, 1] < adjusted_intervals[:, 0]
    adjusted_intervals[mask] = adjusted_intervals[mask][:, ::-1]
    return adjusted_intervals

# get best mu for each
def getting_best_mu(intervals, margin=0):
    intervals = adjust_intervals(intervals, margin)
    endpoints = np.unique(intervals[np.isfinite(intervals)])
    y_min, y_max = intervals[:, 0], intervals[:, 1]
    lower_loss = np.maximum(0, y_min[:, None] - endpoints)**2  # Loss when mu is below y_min
    upper_loss = np.maximum(0, endpoints - y_max[:, None])**2  # Loss when mu is above y_max
    losses = np.sum(lower_loss + upper_loss, axis=0)
    min_loss_idx = np.argmin(losses)
    mu = endpoints[min_loss_idx]
    return mu

In [3]:
folder_path = '../../data'
datasets = [name for name in os.listdir(folder_path) if os.path.isdir(os.path.join(folder_path, name))]

In [4]:
for dataset in datasets:
    # Load data
    folds_df    = pd.read_csv(f'../../data/{dataset}/folds.csv')
    features_df = pd.read_csv(f'../../data/{dataset}/features.csv')
    target_df   = pd.read_csv(f'../../data/{dataset}/targets.csv')

    for test_fold in sorted(np.unique(folds_df['fold'])):

        # Split data into training and test sets
        train_indices = folds_df[folds_df['fold'] != test_fold].index
        test_indices  = folds_df[folds_df['fold'] == test_fold].index

        # Filter the DataFrames by index
        X_train = features_df.loc[train_indices].to_numpy()
        X_test  = features_df.loc[test_indices].to_numpy()
        y_train = target_df.loc[train_indices].to_numpy()

        target_mat_pred = [getting_best_mu(y_train)]*np.ones(X_test.shape[0])

        # save to csv
        prediction = pd.DataFrame({'pred': target_mat_pred})
        prediction.to_csv(f"predictions/{dataset}.{test_fold}.csv", index=False)