# Large snippets of code from test_notebooks folder, for archiving and reference

### Load data for eye position and a CEBRA-Time model 

In [None]:
## Load data for eye position and a CEBRA-Time model ##

# for a single fish
filename = FILENAME
filename_trunc = filename.split('/')[-1][:-3] # fish and date only
data_folder = 'data/'

# choose where in dataset to sample
start, stop = 0, 0+TIMESTEPS


# extract eye position and neural data
# do not attempt to load the entire file 
print("Accessing data...")
with h5py.File(filename, 'r') as f:

    # eye position
    eye_pos_l = f['visuomotor']['eye_pos']['Left']
    eye_pos_r = f['visuomotor']['eye_pos']['Right']
    print(f"Full eye position dataset shape is: {eye_pos_l.shape}")

    # neural 
    neural = f['rois']['dfof']
    print(f"Full neural dataset shape is: {neural.shape}")

    # select first TIMESTEPS timesteps and random ROIS rois
    # neural
    neural_indexes = np.sort(
                        np.random.choice(
                                    np.arange(neural.shape[1]), size=ROIS, replace=False
                                    )
                        )
    neural = np.array(neural[start:stop, neural_indexes])

    # eye position
    eye_pos_l = np.array(eye_pos_l[start:stop])
    eye_pos_r = np.array(eye_pos_r[start:stop])

    print(f"Truncated dataset shapes are:\n \
            eye_pos_l: {eye_pos_l.shape}\n \
            eye_pos_r: {eye_pos_r.shape}\n \
            neural: {neural.shape}")

    assert(neural.shape == (TIMESTEPS, ROIS))


    # save datasets
    filename_eye_pos = f'{filename[-12:-3]}_eye_pos.npz'
    filename_dfof = f'{filename[-12:-3]}_dfof.npz'
    np.savez(f'{data_folder} + {filename_eye_pos}', eye_pos_l=eye_pos_l, eye_pos_r=eye_pos_r)
    np.savez(f'{data_folder} + {filename_dfof}', neural=neural)

print("Data accessed.")

eye_pos_l = cebra.load_data(f'{data_folder}{filename_eye_pos}', key="eye_pos_l")
print(f"{filename_eye_pos}_left loaded.")
eye_pos_r = cebra.load_data(filename_eye_pos, key="eye_pos_r")
print(f"{filename_eye_pos}_right loaded.")
neural = cebra.load_data(f'{data_folder}{filename_eye_pos}', key="neural")
print(f"{filename_dfof} loaded.")




### truncate nueral data to be a random selection of ROIs and a consecutive sequence of timesteps, with predefined shape 


In [None]:
## truncate nueral data to be a random selection of ROIs and a consecutive sequence of timesteps, with predefined shape ##

import numpy as np

# truncate neural
# select first TIMESTEPS timesteps and random ROIS rois
neural_indexes = np.sort(
                    np.random.choice(
                                np.arange(neural.shape[1]), size=ROIS, replace=False
                                )
                    )
neural = np.array(neural[start:stop, neural_indexes])
print(f'Truncated neural dataset shape is: {neural.shape}')

### singular (non-loop) implementation of creating binary masks for stimulus 'on' frames ##


In [None]:
## singular (non-loop) implementation of creating binary masks for stimulus 'on' frames ##

import h5py

filepath = '/media/storage/DATA/lfads_export/f1_221027.h5'
stim_types = {'left_spot':0, 'right_spot':1,  \
              'open_loop_grating':2, 'closed_loop_grating':3}
neural = np.zeros((100000,1))

with h5py.File(filepath, 'r') as f:

    # get stimulus presentations
    stimuli = f['visuomotor']['presentations']
    stim_type = stimuli['stim_type'].astype(int)
    stim_onset_fr = stimuli['onset_frame'].astype(int)
    stim_end_fr = stimuli['offset_frame'].astype(int)


    # find the presentation indexes with left or right spots
    stim_pres_idx_l = np.where(np.isin(stim_type, 1))[0]    # left spots
    stim_pres_idx_r = np.where(np.isin(stim_type, 2))[0]    # right spots

    # index stim onset frames with the presentation indexes
    stim_onset_fr = stimuli['onset_frame'].astype(int)
    stim_pres_fr_l = stim_onset_fr[stim_pres_idx_l]
    stim_pres_fr_r = stim_onset_fr[stim_pres_idx_r]

    # index stim end frames with the presentation indexes
    stim_end_fr = stimuli['offset_frame'].astype(int)
    stim_end_fr_l = stim_end_fr[stim_pres_idx_l]
    stim_end_fr_r = stim_end_fr[stim_pres_idx_r]

    # create masks of stim onset/stim end
    # left spot
    stim_on_l = np.zeros(neural.shape[0]).astype(int)
    stim_on_l[[stim_pres_fr_l, stim_end_fr_l]] = 1
    np.bitwise_xor.accumulate(stim_on_l) | stim_on_l
    # right spot
    stim_on_r = np.zeros(neural.shape[0]).astype(int)
    stim_on_r[[stim_pres_fr_r, stim_end_fr_r]] = 1
    np.bitwise_xor.accumulate(stim_on_r) | stim_on_r

    # find duration (in frames) of each presentation
    # (neural recording is at 5Hz)
    stim_dur_l = stim_end_fr_l - stim_pres_fr_l
    stim_dur_r = stim_end_fr_r - stim_pres_fr_r

### pre-loop version of the stimulus on frame mask 


In [None]:
## TESTING: Here is the full structure as a template for the cell above. Delete when above is implemented
# # load data (single fish)

# paths
filepath = FILEPATH
filename = filepath.split('/')[-1][:-3] # fish and date only
data_folder = 'data/'
filename_spot_pres_fr = f'{filename[-9:]}_spot_pres_fr.npz'
filename_dfof = f'{filename[-9:]}_dfof_stim_decode.npz'

# choose where in dataset to sample
start, stop = 0, 0+TIMESTEPS

# extract eye position and neural data
# do not attempt to load the entire file 
print("Accessing data...")

# load data if it is already saved, and LOAD == True
if LOAD == True:
    try:
        spot_pres_fr = cebra.load_data(f'{data_folder}{filename_spot_pres_fr}', key="spot_pres_fr")
        print(f"{filename_spot_pres_fr}_left loaded.")
        spot_pres_fr = cebra.load_data(f'{data_folder}{filename_spot_pres_fr}', key="spot_pres_fr")
        print(f"{filename_spot_pres_fr}_right loaded.")
        neural = cebra.load_data(f'{data_folder}{filename_dfof}', key="neural")
        print(f"{filename_dfof} loaded.")
    
    except:
        pass
        print("Couldn't load data into CEBRA")

else:
    with h5py.File(filepath, 'r') as f:

        # neural
        neural = f['rois']['dfof']
        print(f"Full neural dataset shape is: {neural.shape}")

        
        # get stimulus presentations
        stimuli = f['visuomotor']['presentations']
        stim_type = stimuli['stim_type'].astype(int)

        for stim in STIMS:



        # find the presentation indexes with left or right spots
        stim_pres_idx_l = np.where(np.isin(stim_type, 1))[0]    # left spots
        stim_pres_idx_r = np.where(np.isin(stim_type, 2))[0]    # right spots

        # print spot information
        print(f'Out of a total {stim_type.size} stimulus presentations:\n \
        {spot_pres_fr_l.size} left spots\n \
        {spot_pres_fr_r.size} right spots')

        # index stim onset frames with the presentation indexes
        stim_onset_fr = stimuli['onset_frame'].astype(int)
        stim_pres_fr_l = stim_onset_fr[stim_pres_idx_l]
        stim_pres_fr_r = stim_onset_fr[stim_pres_idx_r]

        # index stim end frames with the presentation indexes
        stim_end_fr = stimuli['offset_frame'].astype(int)
        stim_end_fr_l = stim_end_fr[stim_pres_idx_l]
        stim_end_fr_r = stim_end_fr[stim_pres_idx_r]

        # create masks of stim onset/stim end
        # left spot
        stim_on_l = np.zeros(neural.shape[0])
        stim_on_l[[stim_pres_fr_l, stim_end_fr_l]] = 1
        np.bitwise_xor.accumulate(stim_on_l) | stim_on_l
        # right spot
        stim_on_r = np.zeros(neural.shape[0])
        stim_on_r[[stim_pres_fr_r, stim_end_fr_r]] = 1
        np.bitwise_xor.accumulate(stim_on_r) | stim_on_r

        # find duration (in frames) of each presentation
        # (neural recording is at 5Hz)
        stim_dur_l = stim_end_fr_l - stim_pres_fr_l
        stim_dur_r = stim_end_fr_r - stim_pres_fr_r
        
        spot_pres_fr = np.column_stack((spot_pres_frames_l, spot_pres_fr_r))

        # assert shapes
        assert(neural.shape == (TIMESTEPS, ROIS))
        assert(spot_pres_frames.shape == (spot_pres_frames_l.size, 2))

        # save data
        np.savez(f'{data_folder}{filename_spot_pres_fr}', spot_pres_fr=spot_pres_fr)
        np.savez(f'{data_folder}{filename_dfof}', neural=neural)

        # load data
        spot_pres_fr = cebra.load_data(f'{data_folder}{filename_spot_pres_fr}', key="spot_pres_fr")
        print(f"{filename_spot_pres_fr}_left loaded.")
        neural = cebra.load_data(f'{data_folder}{filename_dfof}', key="neural")
        print(f"{filename_dfof} loaded.")
    

print(spot_pres_fr_l)
        

### Older, more convoluted loading step before deciding to load dfof from HDD

In [None]:
### load data for a single fish ###

##  params ##

# variables
stim_types = STIM_TYPES     # dict of all possible stims
stims = STIMS               # stim types chosen for analysis

# paths
filepath = FILEPATH
filename = filepath.split('/')[-1][:-3] # fish and date only
data_folder = 'data/'
data_folder_HDD = '/media/storage/DATA/tom/'
filename_stim_pres_frames = f'{filename[-9:]}_stim_pres_frames.npz'
filename_dfof = f'{filename[-9:]}_dfof_stim_decode.npz'

## loading ##

# only generate new data files if LOAD is not specified
if LOAD:
    try:
        # load data
        # default to loading directly from hdf5 for large datasets
        stim_pres_frames = cebra.load_data(f'{data_folder}{filename_stim_pres_frames}', key="stim_pres_frames")
        print(f"Stimulus presentation frames loaded.")
        with h5py.File(filepath, 'r') as f:
            try:
                neural = f['rois']['data']
                neural = cebra.load_data(filepath, key='rois/dfof')
                print("Neural data loaded")

            except Exception as e:
                print(e)
                try:
                    neural = cebra.load_data(f'{data_folder_HDD}{filename_dfof}', key="neural")
                    print(f"Loaded all datasets")

                except:
                    print(f"Could not load data.")
    except:
        print(f"Could not load data.")

else:
    with h5py.File(filepath, 'r') as f:

        ## neural ##

        neural = f['rois']['dfof']
        print(f"Full neural dataset shape is: {neural.shape}")

        ## stimuli ##

        # get stimulus presentations
        stimuli = f['visuomotor']['presentations']
        stim_type = stimuli['stim_type'].astype(int)
        stim_on_fr = stimuli['onset_frame'].astype(int)
        stim_end_fr = stimuli['offset_frame'].astype(int)

        # initialise lists for the chosen stimuli
        (stim_pres_idx_list, stim_on_fr_list,
        stim_end_fr_list, stim_on_mask_list, stim_dur_list)  = [],[],[],[],[]


        # loop through chosen stimuli and find boolean masks for their 'on' frames
        for stim in stims:

            # convert stim name to stim number
            stim_num = stim_types[stim] 
            print(f'Attempting to parse stim: {stim}') 

            # find the presentation indexes for the specified stim type
            # must account for data index starting at 1
            this_stim_pres_indexes = np.where(np.isin(stim_type, stim_num + 1))[0]
            stim_pres_idx_list.append(this_stim_pres_indexes)

            # index stim onset frame numbers with the presentation indexes
            this_stim_on_frames = stim_on_fr[this_stim_pres_indexes]
            stim_on_fr_list.append(this_stim_on_frames)

            # index stim end frame numbers with the presentation indexes
            this_stim_end_frames = stim_end_fr[this_stim_pres_indexes]
            stim_end_fr_list.append(this_stim_end_frames)

            # create a boolean mask of stimulus presentation frames (1 == stimulus on, 0 == stimulus off)
            this_stim_on_mask = np.zeros(neural.shape[0]).astype(int)
            this_stim_on_mask[[this_stim_on_frames, this_stim_end_frames]] = 1
            # perform bitwise XOR operation on consecutive elements of stim_on_mask. This will convert all 
            # but stim_off frame to 1s. Combining with "OR stim_on_mask" will also include the stim_off frame
            stim_on_mask = np.bitwise_xor.accumulate(this_stim_on_mask) | this_stim_on_mask
            stim_on_mask_list.append(this_stim_on_mask)

            # find duration (in frames) of each presentation of the stimulus
            # recording rate is 5 Hz
            stim_dur_list.append(this_stim_end_frames - this_stim_on_frames)

            # assert shapes
            assert(stim_on_mask_list[0].size == neural.shape[0])

            print(f'Stim type {stim} parsed successfully.')

        if SAVE:
            # save all data as .npz 
            # (large datasets saved to HDD)
            stim_on_mask_dataset = np.column_stack(stim_on_mask_list[:])
            np.savez(f'{data_folder}{filename_stim_pres_frames}', stim_pres_frames=stim_on_mask_dataset)
            print(f"Stim presentation dataset saved.")
            np.savez(f'{data_folder_HDD}{filename_dfof}', neural=neural)
            print(f"Neural dataset saved.")
            print(f"All datasets saved.")


        # load data
        # assume all data is saved as .npz
        stim_pres_frames_ = cebra.load_data(f'{data_folder}{filename_stim_pres_frames}', key="stim_pres_frames")
        print(f"Stimulus presentation frames loaded.")
        neural = cebra.load_data(f'{data_folder_HDD}{filename_dfof}', key=neural)
        print("Neural data loaded")
        print("All data loaded.")