In [5]:
import numpy as np
from neo.io.neuroscopeio import NeuroScopeIO
from scipy.signal import resample
from scipy.signal import welch
from scipy.stats import zscore
import h5py
import matplotlib.pyplot as plt

%matplotlib inline

# we are using data from: https://crcns.org/data-sets/hc/hc-11/about-hc-11
# Grosmark, A.D., and Buzsáki, G. (2016). Diversity in neural firing dynamics supports both rigid and learned hippocampal sequences. Science 351, 1440–1443.
# Chen, Z., Grosmark, A.D., Penagos, H., and Wilson, M.A. (2016). Uncovering representations of sleep-associated hippocampal ensemble spike activity. Sci. Rep. 6, 32193.

# We are also using code from: https://github.com/zhd96/pi-vae/blob/main/examples/pi-vae_rat_data.ipynb
# Zhou, D., Wei, X.
# Learning identifiable and interpretable latent models of high-dimensional neural activity using pi-VAE.
# NeurIPS 2020. https://arxiv.org/abs/2011.04798

In [6]:
# path to the preprocessed matfile from the pi-VAE authors:
# https://drive.google.com/drive/folders/1lUVX1IvKZmw-uL2UWLxgx4NJ62YbCwMo?usp=sharing

path_mat = "../../data_untracked/Achilles_10252013_sessInfo.mat"

# path to the eeg data from: https://crcns.org/data-sets/hc/hc-11/about-hc-11
path_lfp = "../../../Achilles_10252013/Achilles_10252013.eeg"

In [None]:
LFP_fs = 40
resample_rate = 1250 / LFP_fs
reader = NeuroScopeIO(filename=path_lfp)
seg = reader.read_segment(lazy=False)
t, c = np.shape(seg.analogsignals[0])
ds = []
for i in range(c):
    print(i)
    lfp = np.array(seg.analogsignals[0][:, i])
    print(len(lfp))
    # resample
    n_samples = int(len(lfp) / resample_rate)
    print(lfp.shape)
    lfp_ds = resample(lfp, n_samples)
    ds.append(lfp_ds)
LFP = np.array(ds)
np.save("../../data_untracked/Achilles_10252013_lfp_40Hz.npy", LFP)


LFP =LFP.squeeze()
# zscore lfp
LFP = zscore(LFP, axis=None)

In [8]:
# Note: this is assuming LFP_fs and the fs of the data in the mat file are the same

with h5py.File(path_mat, "r") as f:
    # load spike info
    spikes_times = np.array(f["sessInfo"]["Spikes"]["SpikeTimes"])[0]
    spikes_cells = np.array(f["sessInfo"]["Spikes"]["SpikeIDs"])[0]
    pyr_cells = np.array(f["sessInfo"]["Spikes"]["PyrIDs"])[0]
    # load location info
    locations = np.array(f["sessInfo"]["Position"]["OneDLocation"])[0]
    locations_times = np.array(f["sessInfo"]["Position"]["TimeStamps"])[:, 0]
    # load maze epoch range
    maze_epoch = np.array(f["sessInfo"]["Epochs"]["MazeEpoch"])[:, 0]

# cut off start and end where the rat is not in the maze
time_in_maze = (spikes_times >= maze_epoch[0]) * (spikes_times <= maze_epoch[1])
spikes_times = spikes_times[time_in_maze]
spikes_cells = spikes_cells[time_in_maze]

# only consider spikes from pyramidal cells
cell_mask = np.isin(spikes_cells, pyr_cells)
spikes_times = spikes_times[cell_mask]
spikes_cells = spikes_cells[cell_mask];
bin_size = 25; ## change bin size to whatever you want here

binned_spike_times = np.array(np.floor(spikes_times*1000/bin_size), dtype='int');

spike_by_neuron = np.zeros((binned_spike_times.max() - binned_spike_times.min()+1, pyr_cells.shape[0]));

cell_dic = {};
for i,v in enumerate(pyr_cells):
    cell_dic[int(v)] = i;
    
for it in range(binned_spike_times.shape[0]):
    spike_by_neuron[binned_spike_times[it]-binned_spike_times.min(), cell_dic[int(spikes_cells[it])]] += 1;
    
# bin spike times and obtain first and last spike bin timing
binned_spike_times = np.floor(spikes_times * LFP_fs).astype("int")
first_spike_t = binned_spike_times.min()
last_spike_t = binned_spike_times.max()

# bin location times
tph_binned_time = np.arange(first_spike_t, last_spike_t + 1)
binned_locations_times = np.floor(locations_times * LFP_fs).astype("int")

# create a vector of locations at each binned timepoint
locations_vec = np.zeros(last_spike_t - first_spike_t + 1) + np.nan
for bin, loc in zip(binned_locations_times, locations):
    locations_vec[bin - first_spike_t] = loc

In [None]:
spike_by_neuron.shape

In [None]:
fig,ax = plt.subplots(2, figsize = (3,2),sharex= True)
t1 = 55000
t2 = 60000
ax[0].plot(locations_vec[t1:t2])
ax[1].imshow(spike_by_neuron[t1:t2].T,cmap='Greys',aspect='auto', vmax = 1)
ax[1].set_xlabel('Time (dt = 25 ms)')
ax[1].set_ylabel('Neuron')
ax[0].set_ylabel('Location')

In [None]:
subset = locations_vec[t1:t2]


ratio = len(subset[~np.isnan(subset)])/len(subset)

fig,ax = plt.subplots(2, figsize = (3,2),sharex= True)
ax[0].plot(subset[~np.isnan(subset)])
ax[1].imshow(spike_by_neuron[t1:t2][~np.isnan(subset)].T,cmap='Greys',aspect='auto', vmax = 1)
ax[1].set_xlabel('Time (dt = 25 ms)')
ax[1].set_ylabel('Neuron')
ax[0].set_ylabel('Location')

In [None]:

fig = plt.figure(figsize=(4,4))
ax1 = plt.subplot(111);
fsz = 14;
ll = 4000
plt.plot(locations_vec[:ll])
ax1.set_xlabel('Time (s)',fontsize=fsz,fontweight='normal');
ax1.set_ylabel('Position (m)',fontsize=fsz,fontweight='normal');
ax1.spines['top'].set_visible(False)
ax1.spines['right'].set_visible(False)
plt.setp(ax1.get_xticklabels(), fontsize=fsz);
plt.setp(ax1.get_yticklabels(), fontsize=fsz);
ax1.set_xticks((0,2000,4000,6000));
ax1.set_xticklabels((0,50,100,150));

In [None]:

fig = plt.figure(figsize=(4,4))
ax1 = plt.subplot(111);
fsz = 14;
ll = 4000
plt.plot(locations_vec[:ll])
ax1.set_xlabel('Time (s)',fontsize=fsz,fontweight='normal');
ax1.set_ylabel('Position (m)',fontsize=fsz,fontweight='normal');
ax1.spines['top'].set_visible(False)
ax1.spines['right'].set_visible(False)
plt.setp(ax1.get_xticklabels(), fontsize=fsz);
plt.setp(ax1.get_yticklabels(), fontsize=fsz);
ax1.set_xticks((0,2000,4000,6000));
ax1.set_xticklabels((0,50,100,150));
#ax1.set_xticks((0,2000,4000,6000,8000,10000))
#ax1.set_xticklabels((0,50,100,150,200,250))
ax1.yaxis.set_major_formatter(FormatStrFormatter('%.1f'))
plt.scatter(idx_split[idx_split<ll], locations_vec[idx_split[idx_split<ll]], c='red')

In [None]:
spike_by_neuron_use = spike_by_neuron[~np.isnan(locations_vec)];

In [None]:
mazeLFP = []
for i in range(LFP.shape[0]):
    mazeLFP.append(LFP[i][np.unique(tph_binned_time)])

mazeLFP = np.array(mazeLFP).T

# this effectively takes the LFP values only when the location data is available
# if one trains the model using spikes of full maze epoch rather then sampling the spikes
# when location is available, this step is not relevant
mazeLFP = mazeLFP[~np.isnan(locations_vec)]

In [None]:
np.save("../../data_untracked/mazeLFP.npy", np.array(mazeLFP))

In [None]:
nperseg = 1024
# taking the mean along the channels is fine since they're highly correlated
frequencies0, psd0 = welch(np.mean(mazeLFP, axis=1), fs=LFP_fs, nperseg=nperseg)

plt.figure(figsize=(10, 6))
plt.semilogy(frequencies0, psd0, alpha=0.8, zorder=0, label="lfp")
plt.xlabel("Frequency (f)", labelpad=10, fontsize=14)
plt.ylabel("PSD of LFP", labelpad=10, fontsize=14)
plt.subplots_adjust(bottom=0.3)
plt.legend()
plt.show()