In [1]:
import numpy as np
from loader import load_oneIC

In [2]:
from sklearn.decomposition import PCA
from hmmlearn import hmm
import scipy.signal as signal
import pickle

In [3]:
import h5py

In [4]:
directory = "/home/INT/malfait.n/Documents/NIC_250819"
file = "FCK_LOCKED_IC_JYOTIKA_250819.mat"

path = f"{directory}/{file}"

mat_file = h5py.File(path, "r")
cells_refs = mat_file['FCK_LOCKED_IC_JYOTIKA']

n_IC = 4
n_subj = 23

In [5]:
# The embedx function copies the `x` array len(lags) times into `xe`
# with lags (i.e. time delays) between lags[0] and lags[-1] (we implement the time-delay array for the HMM).

def embedx(x, lags):
    
    Xe = np.zeros((x.shape[1], x.shape[0],  len(lags)))

    for l in range(len(lags)):
        Xe[:, :, l] = np.roll(x, lags[l], axis=0).swapaxes(0, 1)

    # Remove edges
    valid = np.ones((x.shape[0], 1), dtype=np.int8)
    valid[:np.abs(np.min(lags)), :] = 0
    valid[-np.abs(np.max(lags)):, :] = 0

    Xe = Xe[:, valid[:, 0] == 1, :]

    return Xe, valid


# The hmm_tde function finds parameters for the HMM,
# then uses them to determine the probability of presence of each found state over time.

def hmm_tde(y: np.array, n_states=3, n_iter=100, n_components=8, 
            covariance_type='full', model_type='GMMHMM', tol=0.01, n_mix=1, **kwargs):
    
    if model_type=='GMMHMM':
        model = hmm.GMMHMM(n_components=n_states, n_iter=n_iter,
                            covariance_type=covariance_type, tol=tol, n_mix=n_mix, **kwargs)
        
    elif model_type=='GaussianHMM':
        model = hmm.GaussianHMM(n_components=n_states, n_iter=n_iter,
                            covariance_type=covariance_type, tol=tol, **kwargs)
        
    elif model_type=='MultinomialHMM':
        model = hmm.MultinomialHMM(n_components=n_states, n_iter=n_iter, tol=tol, **kwargs)
    
    else: 
        return "Non-exixting model_type. Please choose 'GMMHMM' or 'GaussianHMM' or 'MultinomialHMM'. default='GMMHMM'"
        
    model.fit(y)
    gamma = model.predict_proba(y)

    return gamma, model

In [6]:
# The parameters we change to hope for some results
lags = np.arange(-29, 29)
n_lags = lags.shape[0]
n_iter=100
n_states_max=6    # for the Hidden Markov Model
n_components=40     # For the principal component analysis
covariance_type='diag'
model_type='GaussianHMM'
tol=0.01
n_mix=1

In [14]:
origin_dir = "tde-hmm/pkl_files/"
subj_list = [i for i in range(2,9)] + [i for i in range(10,13)] + [14, 23]

In [15]:
import csv

In [16]:
csvfile = open(origin_dir+'Scores.csv', 'a', newline='')
spamwriter = csv.writer(csvfile, delimiter=';',
                        quotechar='|', quoting=csv.QUOTE_MINIMAL)
spamwriter.writerow(["", "3 states", "4 states", "5 states", "6 states"])

for subj in subj_list:
    print(f"---- SUBJECT{subj} ----")
    # Create the input matrix for the TDE-HMM:
    print("Computing the imput matrix for the model")
    datall = []
    IC_list = []
    for IC in range(1, n_IC+1):
        try:
            file = open(origin_dir+f"su{subj}All_lg{n_lags}co{n_components}st{n_states}{n_mix}{model_type}_model.pkl", "rb")
            data, n_trials = load_oneIC(mat_file, cells_refs, subj, IC)
            big_timecourse = np.concatenate([data[f'raw_timecourse_256Hz'][i] for i in range(n_trials)])
            x = big_timecourse.reshape(-1, 1)
            xe, valid = embedx(x, lags)
            pca = PCA(n_components=n_components)
            y = pca.fit_transform(xe[0, :, :])
            datall.append(y)
            IC_list.append(IC)
            print(f"IC{IC} loaded")
        except:
            pass
    y = np.concatenate(datall, axis=1)
    # Get the scores
    row = [f"Subject {subj}"]
    for n_states in range(3, n_states_max+1):
        file = open(origin_dir+f"su{subj}All_lg{n_lags}co{n_components}st{n_states}{n_mix}{model_type}_model.pkl", "rb")
        model = pickle.load(file)
        row.append(model.score(y))
    spamwriter.writerow(row)

---- SUBJECT2 ----
Computing the imput matrix for the model
Loading the raw timecourse
IC1 loaded
Loading the raw timecourse
IC2 loaded
Loading the raw timecourse
IC3 loaded
Loading the raw timecourse
IC4 loaded
---- SUBJECT3 ----
Computing the imput matrix for the model
Loading the raw timecourse
IC1 loaded
Loading the raw timecourse
IC2 loaded
Loading the raw timecourse
IC3 loaded
Loading the raw timecourse
IC4 loaded
---- SUBJECT4 ----
Computing the imput matrix for the model
Loading the raw timecourse
IC1 loaded
Loading the raw timecourse
IC2 loaded
Loading the raw timecourse
IC3 loaded
Loading the raw timecourse
IC4 loaded
---- SUBJECT5 ----
Computing the imput matrix for the model
Loading the raw timecourse
IC1 loaded
Loading the raw timecourse
The independent component IC2 of the subject 5 is not in the .mat file.
Loading the raw timecourse
IC3 loaded
Loading the raw timecourse
IC4 loaded
---- SUBJECT6 ----
Computing the imput matrix for the model
Loading the raw timecourse
IC1 

In [17]:
csvfile.close()

In [8]:
subj = 2
n_states = 4

In [None]:
csvfile = open(origin_dir+'Scores.csv', 'a', newline='')
spamwriter = csv.writer(csvfile, delimiter=';',
                        quotechar='|', quoting=csv.QUOTE_MINIMAL)
spamwriter.writerow(["", "3 states", "4 states", "5 states", "6 states"])

In [10]:
print(f"---- SUBJECT{subj} ----")
# Create the input matrix for the TDE-HMM:
print("Computing the imput matrix for the model")
datall = []
# xeall = []
IC_list = []
for IC in range(1, n_IC+1):
    try:
        data, n_trials = load_oneIC(mat_file, cells_refs, subj, IC)
        big_timecourse = np.concatenate([data[f'raw_timecourse_256Hz'][i] for i in range(n_trials)])
        x = big_timecourse.reshape(-1, 1)
        xe, valid = embedx(x, lags)
#         xeall.append(xe)
        pca = PCA(n_components=n_components)
        y = pca.fit_transform(xe[0, :, :])
        datall.append(y)
        IC_list.append(IC)
        print(f"IC{IC} loaded")
    except:
        pass
y = np.concatenate(datall, axis=1)
# Get the scores
for n_states in range(3, n_states_max+1):
    file = open(origin_dir+f"su{subj}All_lg{n_lags}co{n_components}st{n_states}{n_mix}{model_type}_model.pkl", "rb")
    model = pickle.load(file)
    print(model.score(y))

---- SUBJECT2 ----
Computing the imput matrix for the model
Loading the raw timecourse
IC1 loaded
Loading the raw timecourse
IC2 loaded
Loading the raw timecourse
IC3 loaded
Loading the raw timecourse
IC4 loaded
-40794760.12396281
-41171060.15604349
-41529085.88658692
