### Check WD (change if necessary) and file loading

In [4]:
# Show current directory
import os
curr_dir = os.getcwd()
print(curr_dir)

# Check if the current WD is the file location
if "/src/lab_data" not in os.getcwd():
    # Set working directory to this file location
    file_location = f"{os.getcwd()}/thesis-lava/src/lab_data"
    print("File Location: ", file_location)

    # Change the current working Directory
    os.chdir(file_location)

    # New Working Directory
    print("New Working Directory: ", os.getcwd())

/home/monkin/Desktop/feup/thesis
File Location:  /home/monkin/Desktop/feup/thesis/thesis-lava/src/lab_data
New Working Directory:  /home/monkin/Desktop/feup/thesis/thesis-lava/src/lab_data


## Import lab data from .mat files and process it

In [5]:
import scipy.io as sio

# Load the data
data = sio.loadmat('spike_data.mat')

# Print the data structure
print(data.keys())

dict_keys(['__header__', '__version__', '__globals__', 'spike_times_elec_ms', 'total_recording_time_ms'])


# Total Recording Time in ms

In [19]:
print(f"Total recording time (ms): {data['total_recording_time_ms'][0][0]} ms")

# print with 3 decimal places
print(f"Recording time in minutes: {round(data['total_recording_time_ms'][0][0] / (60*1000), 4) } minutes")

Total recording time (ms): 299922.8 ms
Recording time in minutes: 4.9987 minutes


## See the shape of the spike times in the .mat file

In [20]:
# Load the data into a dictionary
spike_times = data['spike_times_elec_ms']

# Print the shape of the data
print(f"Shape of the outer object: {spike_times.shape}")

print(f"Shape of the first element: {spike_times[0].shape} -> {spike_times[0][0].shape}")

# Print the spikes times of the first 5 channels
print(spike_times[0:5])

Shape of the outer object: (252, 1)
Shape of the first element: (1,) -> (1, 6)
[[array([[ 69486.8, 173984.7, 193738.7, 210319.3, 269287.5, 270162.6]])]
 [array([[  1427.1,   1430.4,   1433.3, ..., 299314.5, 299903.1, 299912.1]])]
 [array([[203210.7]])]
 [array([[24641.9, 62274.3]])]
 [array([[  2794.2,   5002.2,   5016.6,   5904.6,   5908. ,   5910.3,
            5912.9,   5915.4,   5918. ,   5921.5,   5924.5,   5926.9,
            5929.8,   5933.5,   5935.8,   5938.1,   5946.9,   5954.6,
            5959.6,   5962.1,   5964.3,   5966.5,   5970.6,   5974. ,
            6014.4,   9839.3,  13765.5,  22071.5,  22075. ,  22179.6,
           22190.7,  22198.9,  26725.7,  29601. ,  29605.1,  30832. ,
           30839.8,  30843.6,  33404.5,  33406.7,  33408.8,  33418. ,
           33420.6,  33422.8,  33426.3,  33430.2,  33432.7,  33434.9,
           33438.1,  33440.4,  33442.8,  33445.1,  33449.8,  33452.8,
           33458.2,  33461.8,  33464.9,  33470.1,  33475.2,  33479. ,
           33489

So the spike times are stored in a 1D array. Each element of the array is a list of spike times for a given neuron. The spike times are in ms.

# Change the structure of the data

Let's change the structure of the data to a 2D array that is ordered by time. This way, we can use the input data of various channels together by following the time order.

Thus, each row will represent a spiking event and contain 2 columns for the spike time and the channel index respectively. The structure is exemplified below:

| Spike Time (ms) | Channel Index |
|-----------------|---------------|
| 3               | 1             |
| 8               | 0             |
| 12              | 2             |
| 13              | 3             |
| 13              | 6             |
| 14              | 5             |


## Select the channels to be used
For the sake of simplicity, we can define a list of channels to be used.

In [27]:
# channels_used: set = {1, 2, 3, 4, 5, 6, 7, 8}
channels_used = set()

# Add all the channels in the given range
for i in range(0, 252):
    channels_used.add(i)

In [28]:
import numpy as np

# Create a list to store the ordered spike times
all_spike_times = []

# Iterate over each neuron channel
for (idx, channel) in enumerate(spike_times):
    # print("index: ", idx, "channel: ", channel)
    # If the set of channels_used is empty, use all the channels
    if len(channels_used) != 0 and idx not in channels_used:    # If the channel is not in the set, skip it
        continue

    curr_spike_times = channel[0] if len(channel) > 0 else channel     # Remove the extra dimension
    # print(f"Processing channel with shape {curr_spike_times.shape}")

    for spike_time in curr_spike_times.flatten():   # Flatten the array to iterate over all the spike times
        # print(f"Processing spike {spike_time} from channel {idx}")
        # Add the spike time and the channel to the list of all spikes
        all_spike_times.append((spike_time, idx))

# Define the data type for the numpy array
dtype = [('time', float), ('channel', int)]

# Convert the list to a numpy array
all_spike_times = np.array(all_spike_times, dtype=dtype)

# Print the first 10 spike times
print(all_spike_times[:10])

# Show the shape of the all_spike_times list
print(all_spike_times.shape)


[( 69486.8, 0) (173984.7, 0) (193738.7, 0) (210319.3, 0) (269287.5, 0)
 (270162.6, 0) (  1427.1, 1) (  1430.4, 1) (  1433.3, 1) (  1462.8, 1)]
(40020,)


As we can see, we now have a `numpy array` with 2 columns and a number of rows equal to the total number of spikes in the dataset. The first column contains the spike times and the second column contains the channel index. 

The next step is to sort the array by the spike times.

In [29]:
# Sort the spike times array by the time column
ordered_spike_times = np.sort(all_spike_times.copy(), order='time')

# Print the first 10 spike times
print(ordered_spike_times[0:10])

# Print the shape of the ordered spike times
print(ordered_spike_times.shape)

[( 99.5, 229) (303.6,   7) (502.5, 229) (510.6,  71) (528. ,  54)
 (540.9,   7) (589.3, 225) (631.6, 100) (633.8, 100) (758.3, 229)]
(40020,)


## Validate that the data is sorted correctly

In [30]:
# Print the spiking times of the channel 229
print("First 5 spikes of channel 229: ", all_spike_times[all_spike_times['channel'] == 229][:5])

# Print the spiking times of the channel 7
print("First 5 spikes of channel 7: ", all_spike_times[all_spike_times['channel'] == 7][:5])

First 5 spikes of channel 229:  [(  99.5, 229) ( 502.5, 229) ( 758.3, 229) ( 802.4, 229) (1326.8, 229)]
First 5 spikes of channel 7:  [( 303.6, 7) ( 540.9, 7) ( 782.6, 7) (1063.8, 7) (1434.7, 7)]


Indeed, the coherence between the spike times and the channel index is preserved after the sorting. So, we now have a 2D array that is ordered by time `ordered_spike_times`.

# Write the processed data to a .csv file

Finally, we write the processed data to a .csv file. This way, we can use it in the Spiking Neural Networks (SNN) model.

The .csv file will have the following structure:

| time (ms)       | channel_idx   |
|-----------------|---------------|
| 3               | 1             |
| 8               | 0             |
| 12              | 2             |
| 13              | 3             |

In [31]:
import csv

file_name = "lab_data_all_channels.csv"
csv_cols = ['time', 'channel_idx']

with open(file_name, 'w', newline='') as csvfile:
    # Create a CSV writer
    writer = csv.DictWriter(csvfile, fieldnames=csv_cols)

    # Write the header
    writer.writeheader()

    # Write the data
    for spike in ordered_spike_times:
        writer.writerow({'time': spike[0], 'channel_idx': spike[1]})