In [2]:
# initial imports

import h5py
import numpy as np
from scipy import stats
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import pickle
import pandas as pd

import torchvision
import torch
from torchvision.io import read_image
from torchvision.models import resnet50, ResNet50_Weights
import torch.optim as optim


import matplotlib as mpl
from matplotlib import pyplot as plt

In [131]:
# Set session, bins
year = '2027'
month = '04'
day = '03'

start_bin = -.1
end_bin = .301
blank_or_nonblank = 'blank'

In [132]:
# Load the data

pathname = f'/Users/parsatalaie/Desktop/Marmoset Datasets/{year}{month}{day}_all_psth.h5'

f = h5py.File(pathname)
list(f.keys())

['data',
 'imro_table',
 'stim_indices',
 'trial_params',
 'trial_params_short',
 'zero_coordinates']

In [133]:
# Add path to and import mkturk analysis tools

from sys import path

print(path)

path.append('/Users/parsatalaie/Desktop/Issa Data')

from data_analysis_tools_mkTurk.utils_meta import get_recording_path
from data_analysis_tools_mkTurk.general import df_2_psth_mat
from data_analysis_tools_mkTurk.IO import ch_dicts_2_h5, h5_2_trial_df, h5_2_df

['/Users/parsatalaie/Desktop/Marmoset Data Analysis', '/opt/anaconda3/lib/python312.zip', '/opt/anaconda3/lib/python3.12', '/opt/anaconda3/lib/python3.12/lib-dynload', '', '/opt/anaconda3/lib/python3.12/site-packages', '/opt/anaconda3/lib/python3.12/site-packages/aeosa', '/Users/parsatalaie/Desktop/Issa Data', '/Users/parsatalaie/Desktop/Issa Data', '/Users/parsatalaie/Desktop/Issa Data', '/Users/parsatalaie/Desktop/Issa Data', '/Users/parsatalaie/Desktop/Issa Data', '/Users/parsatalaie/Desktop/Issa Data']


In [134]:
# Create, print trial_params

trial_params = h5_2_trial_df(pathname)

  value = self._g_getattr(self._v_node, name)


In [135]:
# List scenefiles

trial_params['scenefile'].unique()

array(['/mkturkfiles/scenebags/West/BlankStim_300ms.json',
       '/mkturkfiles/scenebags/West/neural_stim_4_0ABCDEFGHIJ.json',
       '/mkturkfiles/scenebags/West/20231025_Rust_NaturalImages300_300ms.json',
       '/mkturkfiles/scenebags/West/20231025_Var6vbslir_set0_im151_elias_dur300ms_lab_updated.json',
       '/mkturkfiles/scenebags/West/20231025_Var6vbslir_set0_im151_neptune_dur300ms_lab_updated.json'],
      dtype=object)

In [136]:
# Rust
scenefiles = ['/mkturkfiles/scenebags/West/20231025_Rust_NaturalImages300_300ms.json']

In [24]:
# HvM
scenefiles = ['/mkturkfiles/scenebags/West/hvm10_table_45_20240906.json',
       '/mkturkfiles/scenebags/West/hvm10_elephant_45_20240906.json',
       '/mkturkfiles/scenebags/West/hvm10_dog_45_20240906.json',
       '/mkturkfiles/scenebags/West/hvm10_bear_45_20240906.json',
       '/mkturkfiles/scenebags/West/hvm10_chair_45_20240906.json',
       '/mkturkfiles/scenebags/West/hvm10_car_45_20240906.json',
       '/mkturkfiles/scenebags/West/hvm10_turtle_45_20240906.json',
       '/mkturkfiles/scenebags/West/hvm10_plane_45_20240906.json',
       '/mkturkfiles/scenebags/West/hvm10_apple_45_20240906.json',
       '/mkturkfiles/scenebags/West/hvm10_head_45_20240906.json']

In [137]:
# Select stimulus presentations associated with requested scenefiles:
filter = trial_params.scenefile.isin(scenefiles)
rust_trials = trial_params[filter]
array_filter = np.array(rust_trials[['trial_num', 'rsvp_num']])

# Read spike count data from HDF5 for requested trials:
time_window = [start_bin, end_bin] # Beginning and end of peristimulus time window for each stim, relative to trigger in seconds
rust_data = h5_2_df(pathname, trials=array_filter, time_window=time_window)

# Sort rust_data to match trial_params, rust_trials
rust_data.sort_index(inplace=True)

Fetching trial parameters...
... done (0.021474599838256836 sec).
inds_df.shape = (4050, 2)


  value = self._g_getattr(self._v_node, name)


Pre-fetching PSTHs from HDF5...
... done.
Duration=0.011935516198476156 minutes
Fancy slicing numpy array...
... done.
Duration=0.00904009739557902 minutes


In [138]:
# Create + apply mask to remove nans
mask = rust_data.apply(lambda x : np.all(np.isnan(x.psth)), axis=1)
final_df = rust_data[-mask]
final_df.shape

(3364, 12)

In [139]:
# Convert psth to spike matrix
final_spike_arr = np.array(list(final_df.psth))
final_spike_arr.shape

(3364, 384, 40)

In [140]:
# Check for remaining nan
np.isnan(final_spike_arr).sum()

0

In [141]:
# Create avg_spikes
avg_spikes = np.mean(final_spike_arr, axis=2)
avg_spikes.shape

(3364, 384)

In [142]:
# Create natimg_path_list
path_series = final_df['stim_idx'].apply(lambda x : 
                                           f'/Users/parsatalaie/Downloads/rust_natimgs/Nat300_{x+1}.png')
natimg_path_list = path_series.tolist()
len(natimg_path_list)

3364

In [31]:
# HVM NATIMG_PATH_LIST
start_idx = 51
alter_hvm_path = lambda hvm_path : '/Users/parsatalaie/Desktop/Marmoset Datasets/' + hvm_path[start_idx:]
natimg_paths = final_df['img_full_path'].apply(alter_hvm_path)
natimg_path_list = natimg_paths.tolist()
len(natimg_path_list)

8585

In [143]:
year = '2027'

In [144]:
avg_spikes = avg_spikes.transpose()
avg_spikes.shape

(384, 3364)

In [145]:
# Save avg_spikes
np.save(f'./{year}_{month}_{day}/marm_avg_spikes_{year}{month}{day}', avg_spikes)

In [146]:
# pickle it

import pickle

with open(f'./{year}_{month}_{day}/natimg_path_list_{year}{month}{day}', 'wb') as fp:   #Pickling
    pickle.dump(natimg_path_list, fp)

In [None]:
'''
Check if issue is filtering of df: start with all trials
'''

In [41]:
# h5_2_df the whole file (same time_window)

time_window = [start_bin, end_bin]
all_data = h5_2_df(pathname, time_window=time_window)

Fetching trial parameters...
... done (0.02410578727722168 sec).
Pre-fetching PSTHs from HDF5...


  value = self._g_getattr(self._v_node, name)


... done.
Duration=0.01466134786605835 minutes
Fancy slicing numpy array...
... done.
Duration=0.01987988551457723 minutes


In [47]:
scenefile_mask = all_data['scenefile'] == '/mkturkfiles/scenebags/West/20231025_Rust_NaturalImages300_300ms.json'
test_all_data = all_data[scenefile_mask]

In [57]:
x = test_all_data.sort_values(by='rsvp_num')['stim_idx'].tolist()
y = ''.join(str(i) for i in x)

In [42]:
# Apply masks

nan_mask = all_data.apply(lambda x : np.all(np.isnan(x.psth)), axis=1)
fix_all_data = all_data[-nan_mask]
scenefile_mask = fix_all_data['scenefile'] == '/mkturkfiles/scenebags/West/20231025_Rust_NaturalImages300_300ms.json'
final_all_data = fix_all_data[scenefile_mask]
final_all_data.shape

(3364, 11)

In [43]:
# Convert psth to spike matrix
final_spike_arr = np.array(list(final_all_data.psth))
final_spike_arr.shape

# Check for remaining nan
np.isnan(final_spike_arr).sum()

# Create avg_spikes
avg_spikes = np.mean(final_spike_arr, axis=2)
print(avg_spikes.shape)

# Create natimg_path_list
path_series = final_all_data['stim_idx'].apply(lambda x : 
                                           f'/Users/parsatalaie/Downloads/rust_natimgs/Nat300_{x+1}.png')
natimg_path_list = path_series.tolist()
len(natimg_path_list)

(3364, 384)


3364

In [45]:
final_all_data.head()

Unnamed: 0,monkey,date,trial_num,rsvp_num,stim_id,stim_idx,scenefile,img_full_path,psth_bins,source_path,psth
12,West,20240403,372,0,RustDiCarlo/1_sz_0.8_light00_posX_0_posY_1_pos...,1,/mkturkfiles/scenebags/West/20231025_Rust_Natu...,/mnt/smb/locker/issa-locker/Data/West/Saved_Im...,"[-0.1, -0.09000000000000001, -0.08, -0.07, -0....",/Users/parsatalaie/Desktop/Marmoset Datasets/2...,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
451,West,20240403,372,1,RustDiCarlo/34_sz_0.8_light00_posX_0_posY_1_po...,34,/mkturkfiles/scenebags/West/20231025_Rust_Natu...,/mnt/smb/locker/issa-locker/Data/West/Saved_Im...,"[-0.1, -0.09000000000000001, -0.08, -0.07, -0....",/Users/parsatalaie/Desktop/Marmoset Datasets/2...,"[[1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0,..."
543,West,20240403,372,2,RustDiCarlo/41_sz_0.8_light00_posX_0_posY_1_po...,41,/mkturkfiles/scenebags/West/20231025_Rust_Natu...,/mnt/smb/locker/issa-locker/Data/West/Saved_Im...,"[-0.1, -0.09000000000000001, -0.08, -0.07, -0....",/Users/parsatalaie/Desktop/Marmoset Datasets/2...,"[[1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0,..."
3002,West,20240403,373,0,RustDiCarlo/226_sz_0.8_light00_posX_0_posY_1_p...,226,/mkturkfiles/scenebags/West/20231025_Rust_Natu...,/mnt/smb/locker/issa-locker/Data/West/Saved_Im...,"[-0.1, -0.09000000000000001, -0.08, -0.07, -0....",/Users/parsatalaie/Desktop/Marmoset Datasets/2...,"[[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0,..."
1764,West,20240403,373,1,RustDiCarlo/133_sz_0.8_light00_posX_0_posY_1_p...,133,/mkturkfiles/scenebags/West/20231025_Rust_Natu...,/mnt/smb/locker/issa-locker/Data/West/Saved_Im...,"[-0.1, -0.09000000000000001, -0.08, -0.07, -0....",/Users/parsatalaie/Desktop/Marmoset Datasets/2...,"[[1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0,..."


In [44]:
# Check if equivalent

try:
    pd.testing.assert_frame_equal(final_all_data.sort_index(), final_df)
    print("DataFrames are identical")
except AssertionError:
    print("DataFrames are different")

NameError: name 'final_df' is not defined

In [7]:
year = '2026'

In [5]:
# Create new_ch_depth

coords = f['site_coordinates']
depth_ch_idx = coords['axis1'][:]

new_ch_depth = np.zeros((384))

for i in range(384):
    idx = depth_ch_idx[i]
    new_ch_depth[i] = coords['block0_values'][:][:, 3][idx]

In [8]:
# Save list of ch depths

np.save(f'./{year}_{month}_{day}/ch_depth_{year}{month}{day}', new_ch_depth)