##### Load required packages

In [36]:
import scipy.io as sio
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from cmath import nan, isinf
from sklearn.metrics import confusion_matrix
import warnings
warnings.filterwarnings("ignore")

##### Load data

In [6]:
def load_data():
    # mat_load - all data
    mat_load = sio.loadmat('C:/Users/Zephyrus/Marija/projekti/MINHEN/data/allTraces.mat')['allTraces']
    # roi_load - ROI positions, not used in any function
    roi_load = sio.loadmat('C:/Users/Zephyrus/Marija/projekti/MINHEN/data/allROIPositions.mat')['allROIPositions']
    # ind_load - indices 
    ind_load = sio.loadmat('C:/Users/Zephyrus/Marija/projekti/MINHEN/data/blFinalHit.mat')['blFinalHit']
    # lista - list of indices for each session (so 4 sessions - 4 lists in lista)
    list1 = sio.loadmat('C:/Users/Zephyrus/Marija/projekti/MINHEN/data/indicesSession1.mat')['indSess1']
    list2 = sio.loadmat('C:/Users/Zephyrus/Marija/projekti/MINHEN/data/List2.mat')['List2']
    list3 = sio.loadmat('C:/Users/Zephyrus/Marija/projekti/MINHEN/data/List3.mat')['List3']
    list4 = sio.loadmat('C:/Users/Zephyrus/Marija/projekti/MINHEN/data/List4.mat')['List4']
    lista = [[k[0] for k in list1], [k[0] for k in list2], [k[0] for k in list3], [k[0] for k in list4]]

    return mat_load, roi_load, ind_load, lista

"index" is a list of beginnings for all rois

In [25]:
index = [0, 118, 88, 100, 98, 109, 78, 121, 76, 113, 97, 112, 118]
for roi in range(1,len(index)):
    #update next index
    index[roi] = index[roi] + index[roi-1]

In [26]:
index

[0, 118, 206, 306, 404, 513, 591, 712, 788, 901, 998, 1110, 1228]

## Data preparation and understanding

---
- 1 region     
- all info about speakers (a few speakers from different sides)   
- 4 sessions - recording in different time points
- different trials with pauses between them
- white noise - that we use  
- 50 info per neuron (per speaker)  
- an and aw states (2 states) 
- 100 neurons per region => image 100x250(5x50)     
- 8 of them (2 states x 4 sessions) x 30 (sampling points over time)     
- final 240 images (120 per state)
- oversampling input images with NaNs (maybe random oversampling) - some neurons move and deisappear     
- representing input images as different stimulus (100 x 50)
- combination of 2 different regions in one image / mice????
- including bandpass noise into data representation (the second dimension)
- representing input images as different stimulus (100 x 50)
---
INFO about roi_load    
dim - (1228, 2, 2, 4)      
positions of neurons; they can be used for correlation analysis      

- 0 - same     
- 1 - y and then x dimension     
- 2 - anest and awake     
- 3 - sessions
---
INFO about mat_load    
dim - (1228, 2, 4, 2, 5, 50, 31)    
- 0 - all the ROIs compacted together (the 1st and the 2nd index are to rois of the 1st mouse, the 3rd and the 4th index are to rois of the 2nd mouse, etc.)      
- 1 - 2 states   
- 2 - 4 sessions     
- 3 - the white noise (0th index) and the bandpass noise (1st index) - we always use the white noise          
- 4 - 5 speakers
- 5 - 50 trials (for 1 session each speaker is presented 50 times), certain trials are also wrond
- 6 - 31 samples, framerate of 31 Hz (5s of data), but the stimulus arrives at 11th sample (10 index in python)

blFinalHit.mat - if the measurement was correct or not

In [93]:
def data_per_roi(mat_load, ind_load, lista):
    for roi in range(1, len(index)):
        # data for the specific roi
        dat_tr = mat_load[index[roi-1] : index[roi]+1, :, :, 0, :, :, :]
        ind_tr = ind_load[index[roi-1] : index[roi]+1, :, :, 0, :, :]

        # divide data into two categories based on the state - an and aw
        dat_stan, ind_stan = dat_tr[:, 0, :, :, :, :], ind_tr[:, 0, :, :, :], 
        dat_staw, ind_staw = dat_tr[:, 1, :, :, :, :], ind_tr[:, 1, :, :, :]

        for sesion in range(4):
            
            lista_ses = lista[sesion]
            # choose session
            dat_sean, ind_sean = dat_stan[:, sesion, :, :, :], ind_stan[:, sesion, :, :]
            dat_seaw, ind_seaw = dat_staw[:, sesion, :, :, :], ind_staw[:, sesion, :, :]

            for trial in range(50):
                # data for the specific neuron
                dat_sean_df, ind_sean_df = dat_sean[:, :, trial, :], ind_sean[:, :, trial].astype('float')
                dat_seaw_df, ind_seaw_df = dat_seaw[:, :, trial, :], ind_seaw[:, :, trial].astype('float')
                # if activation is 0, then it is an invalid recording (maybe the neuron disappeared)
                ind_sean_df[ind_sean_df == 0] = 'nan' 
                ind_seaw_df[ind_seaw_df == 0] = 'nan' 

                # inicialization for the next step
                dat_sean_df_, dat_seaw_df_, ind_sean_df_, ind_seaw_df_ = dat_sean_df[0:1,:,:], dat_seaw_df[0:1,:,:], ind_sean_df[0:1,:], ind_seaw_df[0:1,:]
                # checking if the data for a specific roi is valid (in the list_ses)
                for matrix in range(0,np.shape(dat_sean_df)[0]-1):
                    if(index[roi-1]+matrix in lista_ses):
                        dat_sean_df_ = np.append(dat_sean_df_, dat_sean[matrix][:, trial, :].reshape(1,dat_sean[matrix][:, trial, :].shape[0],dat_sean[matrix][:, trial, :].shape[1]), axis=0)
                        dat_seaw_df_ = np.append(dat_seaw_df_, dat_seaw[matrix][:, trial, :].reshape(1,dat_seaw[matrix][:, trial, :].shape[0],dat_seaw[matrix][:, trial, :].shape[1]), axis=0)
                        ind_sean_df_ = np.append(ind_sean_df_, ind_sean[matrix][:, trial].reshape(1, ind_sean[matrix][:, trial].shape[0]), axis=0)
                        ind_seaw_df_ = np.append(ind_seaw_df_, ind_seaw[matrix][:, trial].reshape(1, ind_seaw[matrix][:, trial].shape[0]), axis=0)
                # del the first row of the matrix
                dat_sean_df, dat_seaw_df, ind_sean_df, ind_seaw_df = dat_sean_df_[1:,:,:], dat_seaw_df_[1:,:,:], ind_sean_df_[1:,:], ind_seaw_df_[1:,:]
                
                # in different time points we have the same indices so we set 0 to the inactive ones
                for time in range(31):
                    dat_sean_df[:,:,time] = np.multiply(np.array(dat_sean_df[:,:,time]),ind_sean_df)
                    dat_seaw_df[:,:,time] = np.multiply(np.array(dat_seaw_df[:,:,time]),ind_seaw_df)

                # (98 - neurons, 5 - speakers, 31 - time points)
                # 5 speakers x 31 time points - so in total we have matrix with NUM trials x 155
                #dat_sean_df = dat_sean_df.reshape(5,dat_sean_df.shape[0]*31)
                #dat_seaw_df = dat_seaw_df.reshape(5,dat_seaw_df.shape[0]*31)

                # exclude any nan recordings
                delete_ind = []
                for i in range(len(dat_sean_df)):
                    neuron = dat_sean_df[i] 
                    for j in range(len(neuron)):
                        if np.any(np.isnan(neuron[j])): 
                            delete_ind.append(i)
                            break
                dat_sean_df = np.delete(dat_sean_df, delete_ind, axis=0)
                delete_ind = []
                for i in range(len(dat_seaw_df)):
                    neuron = dat_seaw_df[i] 
                    for j in range(len(neuron)):
                        if np.any(np.isnan(neuron[j])): 
                            delete_ind.append(i)
                            break
                dat_seaw_df = np.delete(dat_seaw_df, delete_ind, axis=0)

                # this is some checking that I don't remember, but it didn't occure at any point
                dat_sean_df[dat_sean_df == np.inf], dat_seaw_df[dat_seaw_df == np.inf] = 0, 0
                if(np.any(dat_seaw_df == np.inf)==True or np.any(dat_sean_df == np.inf)==True or np.any(dat_sean_df == -np.inf)==True or np.any(dat_seaw_df == -np.inf)==True): print(1)
                
                #print(len(dat_sean_df), len(dat_seaw_df))
                # print if theres any nan
                if np.any(np.isnan(dat_sean_df)) or np.any(np.isnan(dat_seaw_df)): print('NAN')

                np.save("C:/Users/Zephyrus/Marija/projekti/MINHEN/an_aw/data/an" + "_mouse" + str((roi-1)//2+1) + "_roi" + str((roi-1)%2+1) + "_ses" + str(sesion+1) + "_tr" + str(trial+1) + ".npy", dat_sean_df)
                np.save("C:/Users/Zephyrus/Marija/projekti/MINHEN/an_aw/data/aw" + "_mouse" + str((roi-1)//2+1) + "_roi" + str((roi-1)%2+1) + "_ses" + str(sesion+1) + "_tr" + str(trial+1) + ".npy", dat_seaw_df)          

In [94]:
#mat_load, roi_load, ind_load, lista = load_data()

In [95]:
data_per_roi(mat_load, ind_load, lista)