## Imports

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import scipy.io
import itertools as it
import scipy.special as psi
plt.style.use('classic')
import seaborn as sns
import pandas as pd
import time 

from scipy.io import loadmat
from scipy import stats
from numpy.random import seed
from numpy.random import rand
from scipy.integrate import quad
from scipy.io import savemat
from tempfile import TemporaryFile
from scipy.io import loadmat
from sklearn.decomposition import PCA
from sklearn.manifold import MDS
from sklearn.decomposition import KernelPCA
from mpl_toolkits import mplot3d
from mPE_fn import mPE
from scipy.spatial import distance
from scipy.stats import entropy

## Load and Clean Data

In [2]:
# loading entire high-dimensional trajectory
path = '/rds/general/user/lr4617/home/4th_Year_Project/CAPTURE_rat_multidimensional/raw_data/normal/'
trajectories = os.listdir(path)

lengths = []
nan_cols = [12,13,14]
for n, traj_n in enumerate(trajectories):
    if traj_n != "traj_2":
        print(traj_n)
        path_n = path + traj_n + '/trajectories/'
        trajs = os.listdir(path_n)
        trajectory = loadmat(path_n + trajs[0])
        trajectory = trajectory['trajectory']
        # removing invalid values (e.g. NaN)
        # input data is already normalized (z-score) but needs to get rid of non-valued datapoints
        all_trajectories = np.zeros((trajectory.shape[0]*int(len(trajs)), trajectory.shape[1]-3))
        for i, time_bin in enumerate(trajs):
            if i <= 20:
                trajectory = loadmat(path_n + time_bin)
                trajectory = trajectory['trajectory'] 
                trajectory = np.delete(trajectory, nan_cols, 1)
                idx_2 = i*trajectory.shape[0]
                all_trajectories[idx_2:idx_2+trajectory.shape[0], 0:trajectory.shape[1]] = trajectory

        lengths.append(all_trajectories.shape[0])

        if n==0:
            rats = all_trajectories
        if n>0:
            rats = np.concatenate((rats, all_trajectories), axis=0)

print(rats.shape)

traj_5
traj_3
traj_4
traj_1
(38880000, 57)


## K-S Test same dimensions

In [3]:
def probability(sequence, decimals=1):
    '''
    input: 
        - 1D sequence of rv observations
    return: 
        - probability vector
    '''
    # round input sequence to avoid sparse probability vector
    sequence = np.round(sequence, decimals)
    unique = np.unique(sequence, axis=0)
    n_triplets_1 = len(unique)
    
    # fill probability vector
    prob_vector = 2*np.ones((n_triplets_1, 1))
    # already_seen = np.zeros((n_triplets_1, 3))
    idx_ = 0
    for i, row in enumerate(sequence):
        # if row.tolist() not in already_seen.tolist():
        occurrences = np.count_nonzero(sequence==row)
        # already_seen[idx_, :] = row
        # idx_ += 1
        idx = np.where(np.all(np.isclose(unique, row), axis=1))
        if prob_vector[idx[0]] == 2:
            prob_vector[idx[0]] = occurrences/(len(sequence))
            
    return prob_vector

In [4]:
trajectories = os.listdir(path)
n_rats = len(trajectories)

dims = np.arange(3)
print("CALCULATING PROBABILITY VECTORS")
for i in range(n_rats):
    print(i)
    sequence = rats[0:lengths[i], dims]
    prob_vector = probability(sequence)
    if i == 0:
        probs = sequence
    else:
        probs = np.concatenate((probs, sequence), axis=1)

print("CALCULATING K-S MATRIX")
# "ks_matrix" is symmetric (A=A')
ks_matrix = np.zeros((len(trajectories), len(trajectories)))
significance_level = 0.05
for ii in range(n_rats):
    for jj in range(n_rats):
        _, p_value = stats.ks_2samp(probs[:, ii], probs[:, jj])
        if p_value<significance_level:
            ks_matrix[ii, jj] = 0
        elif p_value>=significance_level:
            ks_matrix[ii, jj] = 1
            
print(trajectories)
print(ks_matrix)

CALCULATING PROBABILITY VECTORS
0
0
0.0003044605255126953
300
9.062580823898315
600
9.046406269073486
900
9.046587467193604
1200
9.034730434417725
1500
9.05178189277649
1800
9.048856973648071
2100
9.053689956665039
2400
9.049997806549072
2700
9.060191631317139
3000
8.95849609375
3300
8.641016006469727
3600
8.705408096313477
3900
8.700624704360962
4200
8.791344165802002
4500
8.637744188308716
4800
8.619877338409424
5100
8.62427830696106
5400
8.618142366409302
5700
8.684651136398315
6000
8.631887674331665
6300
8.630306482315063
6600
8.611292839050293
6900
8.607669353485107
7200
8.604480266571045
7500
8.610569477081299


KeyboardInterrupt: 