In [None]:
import os
import itertools
os.environ['OMP_NUM_THREADS'] = '1'
import numpy as np
import mne
from mne_connectivity import symmetric_orth
from hmmlearn import hmm
from scipy.signal import hilbert  # For Hilbert transform
from scipy.signal import resample, butter, lfilter # For downsampling
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
import networkx as nx
import seaborn as sns
from scipy.optimize import fminbound

from markovian_helpers import downsample_with_filtering, apply_orthogonalization, VariationalHMM
from numba import njit, jit
optimal_states_arr = []

# defining input and output directory
files_in = '../data/in/subjects/'
files_out = '../data/out/subjects/'


# loading list of subject names from txt file
names = open("./names.txt", "r")
subject_list = names.read().split('\n')
modes = ['EC', 'EO']
for subject in subject_list:
    for mode in modes:
        print(subject, mode)
        #defining input and output directories for each subject and mode
        dir_in = files_in + subject + '/' + mode + '/'
        dir_out = files_out + subject +"/" + mode +'/'

        orthogonalized_data = np.load( dir_out + "orth.npy")


        # Step 2: Determine the Optimal Number of States for the HMM

        # Compute the variance of your features to set a variance floor later
        feature_variances = np.var(orthogonalized_data, axis=0)

        # Choose a small fraction (e.g., 1% or 0.1%) of the maximum variance as the variance floor
        fraction_of_max_variance = 0.05  # Adjust as needed
        variance_floor = fraction_of_max_variance * np.max(feature_variances)

        # Handle NaNs and Infs in features (using masking)
        features = np.mean(orthogonalized_data, axis=2)
        features = np.ma.masked_invalid(features).filled(0)

        # Reshape orthogonalized_data using array.reshape (-1, 1)
        # New shape will be (samples/epochs, labels * sampling frequency)
        reshaped_data = orthogonalized_data.reshape(-1, 1)

        # Reduce dimensionality to speed up HMM fitting
        pca = PCA(n_components=0.99)  # Retain 99% of the variance

        # Fit PCA to the normalized data
        pca_data = pca.fit_transform(reshaped_data)

        # Standardize the PCA-transformed data
        scaler = StandardScaler()
        pca_data = scaler.fit_transform(pca_data)

        # Define the range of hidden states to explore
        state_numbers = range(3, 16)


        # Initialize lists to store AIC and BIC values
        aics = []
        bics = []

        # Define the range of state numbers to test based on previous literature
        state_numbers = range(3, 16)


        for n_states in state_numbers:
            # Initialize the HMM model with diagonal covariance
            model = hmm.GaussianHMM(n_components=n_states, n_iter=50, covariance_type='full', tol=1e-7, verbose=False,
                                    params='st', init_params='stmc')  # Add smoothing parameter

            # Fit the model using the PCA-transformed data
            model.fit(pca_data)

            # Calculate AIC and BIC for the current model
            log_likelihood = model.score(pca_data)
            n_params = n_states * (2 * pca_data.shape[1] - 1)  # Adjusted for diagonal covariance
            aic = 2 * n_params - 2 * log_likelihood
            bic = np.log(pca_data.shape[0]) * n_params - 2 * log_likelihood

            # Store the AIC and BIC values
            aics.append(aic)
            bics.append(bic)
    
        # Determine the optimal number of states based on the lowest AIC and BIC
        optimal_states_aic = state_numbers[np.argmin(aics)]
        optimal_states_bic = state_numbers[np.argmin(bics)]
        
        print(optimal_states_aic, optimal_states_bic)



        # Plotting
#         plt.bar(state_numbers, free_energies)
#         plt.xlabel("Number of States")
#         plt.ylabel("Free Energy")
#         plt.show()

#         # Find the optimal number of states
#         optimal_states = state_numbers[np.argmin(free_energies)]
#         print(f"Optimal number of states based on Varitional Bayes for Subject {subject}: {optimal_states}")
#         optimal_states_arr.append({subject:optimal_states})     



101 EC
