In [None]:
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
from pathlib import Path
from neo.io.klustakwikio import KlustaKwikIO

# download the data from the CRCNS website
# https://crcns.org/data-sets/hc/hc-2/about-hc-2
# Mizuseki K, Sirota A, Pastalkova E, BuzsÃ¡ki G., Neuron. 2009 Oct 29;64(2):267-80.
# (http://www.ncbi.nlm.nih.gov/pubmed/19874793).

In [None]:
# path to hc2 data
fs = 20_000
datafile_dir = "../../../Documents/crcns/hc2/ec013527/ec013527"

reader = KlustaKwikIO(dirname=str(datafile_dir), sampling_rate=fs)
block = reader.read_block()

# put all spike data into an array:
spiketrains = block.segments[0].spiketrains
n_units_pre = len(spiketrains)
spikes = []
for i in range(len(spiketrains)):
    if (
        block.segments[0].spiketrains[i].annotations["cluster"] > 1
    ):  # select only clustered units
        spikes.append(np.array(spiketrains[i]))
n_units = len(spikes)

# plot all spikes
cmap = mpl.colormaps["Pastel1"]
color = cmap(1)
plt.figure(figsize=(10, 5))
plt.eventplot(spikes, linewidths=0.5, colors=color)
plt.xlabel("Time (s)")
plt.ylabel("Neuron")
# hide the right and top spines
plt.gca().spines["right"].set_visible(False)
plt.gca().spines["top"].set_visible(False)
plt.gca().spines["left"].set_visible(False)
plt.gca().spines["bottom"].set_visible(False)
plt.tight_layout()

In [None]:
# in seconds. this is approximately 5 seconds longer than the actual recording,
# so last bits are full of zeros which we'll cut later
duration = 1065
fs = 20000
dt = 1 / fs

t = np.arange(0, duration, dt)
num_neurons = len(spikes)

In [None]:
# convert spike times into a binary matrix
def spike_times_to_binary(spike_times, time_points):
    num_neurons = len(spike_times)
    num_time_points = len(time_points)
    binary_matrix = np.zeros((num_neurons, num_time_points))

    for i, times in enumerate(spike_times):
        time_indices = np.searchsorted(time_points, times)
        binary_matrix[i, time_indices] = 1

    return binary_matrix


binary_matrix = spike_times_to_binary(spikes, t)

In [None]:
# converting binary matrix to count matrix
def bin_spike_data(binary_matrix, dt_original, dt_new):
    # check if the new time resolution is a multiple of the original time resolution
    if (dt_new / dt_original % 1) != 0:
        raise ValueError(
            "New time resolution should be a multiple of the original time resolution."
        )

    # calculate the downsampling factor
    factor = int(dt_new / dt_original)

    # reshape the binary matrix to have a third dimension corresponding to the downsampling factor
    reshaped_matrix = binary_matrix.reshape(binary_matrix.shape[0], -1, factor)

    # sum along the third dimension to get the count matrix
    count_matrix = np.sum(reshaped_matrix, axis=2)

    return count_matrix


count_matrix = bin_spike_data(binary_matrix, dt, 0.01)

In [None]:
# drop neurons with too low spiking rate
threshold = 0.1

# take the sum of each row of count_matrix
row_sum = np.sum(count_matrix, axis=1)
thresholded = (row_sum / 1000) > threshold
print(
    "dropping {:.2f}% of neurons".format(
        (1 - np.sum(thresholded) / len(thresholded)) * 100
    )
)

# drop the rows that is false in thresholded in count_matrix
count_matrix_filtered = count_matrix[thresholded]

In [None]:
# split the data into training and test set
train_size = int(0.8 * count_matrix_filtered.shape[1])

np.save("../../data_untracked/train_hpc2.npy", count_matrix_filtered[:, :train_size])
np.save("../../data_untracked/test_hpc2.npy", count_matrix_filtered[:, train_size:])