# Imports

In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import loadmat
import pickle
import os

import functions.functions as f


# Read and present data

In [31]:
preproc_filepath = ('/users/konstantinosvosinas/Desktop/PREP2025/vals_preproc.mat')
denoised_filepath = ('/users/konstantinosvosinas/Desktop/PREP2025/vals_denoised.mat')
Ks = range(6,13)
alphas = [500, 750, 1000, 1250, 1500]
def preprocess_data(filepath):
    data = loadmat(filepath)
    scan_names = [i[0][0] for i in data['vals'][0][0][0]]
    subjects = [f.split('_')[0] for f in scan_names]
    sample_rate = [2 if f.split('_')[1][4] == 'p' else 0.8 for f in scan_names]
    run_nos = [int(f.split('_')[2][4]) for f in scan_names]
    scans = [i[0] for i in data['vals'][0][0][1]]
    points = [len(i[0]) for i in data['vals'][0][0][1]]
    df = pd.DataFrame({'filename': scan_names, 'subject': subjects, 'run':run_nos, 'sample_rate':sample_rate, 'timepoints': points, 'scans': scans}).sort_values(by=['filename'], inplace=False)
    return df
data_df = preprocess_data(preproc_filepath)
data_df_denoised = preprocess_data(denoised_filepath)

def remove_mean_from_scans_total(df):
    df_cp = df.copy()
    df_cp['scans'] = df_cp['scans'].apply(lambda x: x - np.mean(x))
    return df_cp

def remove_mean_from_scans(df):
    df['scans_0mean'] = df['scans'].apply(lambda x: x - np.mean(x, axis=0, keepdims=True))
    return df

data_df = remove_mean_from_scans(data_df)
data_df_denoised = remove_mean_from_scans(data_df_denoised)


In [19]:
grouped_df = data_df.groupby('subject').agg({'run': list, 'timepoints': list, 'sample_rate': list}).reset_index()
grouped_df.rename(columns={'run': 'runs', 'timepoints': 'timepoints_per_run', 'sample_rate': 'sample_rate'}, inplace=True)
# Convert the 'timepoints_per_run' column to a tuple to make it hashable
grouped_df['timepoints_per_run'] = grouped_df['timepoints_per_run'].apply(tuple)

# Group subjects with the same exact timepoints
grouped_by_timepoints = grouped_df.groupby('timepoints_per_run').agg({'subject': list, 'sample_rate': list}).reset_index()
grouped_by_timepoints.rename(columns={'subject': 'subjects_with_same_timepoints'}, inplace=True)

grouped_by_timepoints['sample_rate'] = grouped_by_timepoints['sample_rate'].apply(lambda x: x[0][0])

# Replace 'subjects_with_same_timepoints' list with its length
grouped_by_timepoints['subjects_with_same_timepoints'] = grouped_by_timepoints['subjects_with_same_timepoints'].apply(len)

grouped_by_timepoints

Unnamed: 0,timepoints_per_run,subjects_with_same_timepoints,sample_rate
0,"(57, 750, 750, 750, 750, 750)",1,0.8
1,"(300, 300, 300, 101)",1,2.0
2,"(300, 300, 300, 300)",2,2.0
3,"(300, 300, 300, 300, 300)",11,2.0
4,"(300, 300, 300, 300, 300, 300, 300, 300)",1,2.0
5,"(375, 375, 375)",1,0.8
6,"(375, 375, 375, 375, 375)",6,0.8
7,"(750,)",1,0.8
8,"(750, 62, 750, 750, 750)",1,0.8
9,"(750, 750, 750, 750, 750)",3,0.8


# MVMD application

In [2]:
from mvmd.mvmd import mvmd
import os
import numpy as np
import scipy.io as sio
from tqdm import tqdm
Ks = range(6, 13)  
alphas = [500, 750, 1000, 1250, 1500]

def run_experiment_K(data_df, sample_rate, run_no, Ks, results_folder, alpha = 1000, tol = 1e-7, column = 'scans'):
    filtered_df = data_df[(data_df['sample_rate'] == sample_rate) & (data_df['run'] == run_no)].reset_index(drop=True)
    for K in tqdm(Ks):
        print(f"Running for K = {K}:")
        folder_name = f'{results_folder}/K_{K}'

        os.makedirs(folder_name, exist_ok=True)
        for i, row in filtered_df.iterrows():
            print(f"\tRunning for {row.subject}_{row.timepoints}, K = {K}, alpha = {alpha}, size = {row[column].T.shape}")
            u, u_hat, omega = mvmd(row[column].T, num_modes=K, alpha=alpha, tolerance=tol, freq=row['sample_rate'])
            sio.savemat(os.path.join(folder_name, f'{row.subject}_{row.timepoints}.mat'), {'u': u, "u_hat": u_hat, "omega": omega})

def run_experiment_alpha(data_df, sample_rate, run_no, alphas, results_folder, K=10, tol = 1e-7, column = 'scans'):
    filtered_df = data_df[(data_df['sample_rate'] == sample_rate) & (data_df['run'] == run_no)].reset_index(drop=True)
    for alpha in tqdm(alphas):
        print(f"Running for alpha = {alpha}:")
        folder_name = f'{results_folder}/alpha_{alpha}'
        os.makedirs(folder_name, exist_ok=True)
        for i, row in filtered_df.iterrows():
            print(f"\tRunning for {row.subject}_{row.timepoints}, K = {K}, alpha = {alpha}, size = {row[column].T.shape}")
            u, u_hat, omega = mvmd(row[column].T, num_modes=K, alpha=alpha, tolerance=tol, freq=row['sample_rate'])
            sio.savemat(os.path.join(folder_name, f'{row.subject}_{row.timepoints}.mat'), {'u': u, "u_hat": u_hat, "omega": omega})

In [None]:
Ks = [10]
run_experiment_K(data_df, 0.8, 1, Ks, 'Run1903/Results_denoised/Results_run-0_800ms_/', column='scans_0mean')
run_experiment_K(data_df, 0.8, 2, Ks, 'Run1903/Results_denoised/Results_run-1_800ms_/', column='scans_0mean')
run_experiment_K(data_df, 0.8, 3, Ks, 'Run1903/Results_denoised/Results_run-2_800ms_/', column='scans_0mean')
run_experiment_K(data_df, 0.8, 4, Ks, 'Run1903/Results_denoised/Results_run-3_800ms_/', column='scans_0mean')
run_experiment_K(data_df, 0.8, 5, Ks, 'Run1903/Results_denoised/Results_run-4_800ms_/', column='scans_0mean')