In [1]:
# Align times from annotated data, imaging data, and audio data
# Output: time stamps in the same units
#         audio data, imaging data, annotated data

# First open all three files
# Then find the time stamps for each of the three files
# Then align the time stamps
# Then output the aligned time stamps and the data

# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.io import wavfile

Load in all data

In [2]:
from datetime import datetime

# Load in data
labeled = pd.read_csv('2023_03_31_7_03_30_annotated.txt', sep='\t', 
                                                          header=None, 
                                                          names=['start', 'end', 'note'])
calcium = pd.read_csv('2023_03_31_undirected_song_joint_spkfn.csv')
fs, audio = wavfile.read('2023_03_31_7_03_30.wav')

Transform calcium data for later use

In [3]:
# Convert calcium datastruct to matrix
# the columns that have the word 'neuron' in it
col_names = calcium.columns
neuron_cols = [col for col in col_names if 'neuron' in col]
total_neurons = len(neuron_cols)
calcium_mat = calcium[neuron_cols].to_numpy()

#dF = np.diff(calcium_mat, axis=0)
f5 = np.percentile(calcium_mat, 5, axis=0)
dF_f0 = (calcium_mat-f5) / f5

Align all time data

In [13]:
# Assign time stamps to each data set
calcium_ms = calcium['stamp_msSinceEpoch']
audio_t0 = np.arange(len(audio))/fs

# Base time from manually entered data (EVERYTHING SHOULD BE SYNCED TO THIS TIME)
base_dt = datetime(year=2023, month=3, day=31, hour=7, minute=1, second=35, microsecond=239000)
base_ms = 1680271295239
ca_fs = 1/15

# Start time of wav file
base_wav = datetime(year=2023, month=3, day=31, hour=7, minute=3, second=30)

# Calcium time relative to base time
calcium_t = (calcium_ms - base_ms)/1000

# Audio data time relative to base time
audio_delay = (base_wav - base_dt).total_seconds()
audio_t = audio_t0 - audio_delay

# Align start and end time stamps
labeled['start_t'] = labeled['start']-audio_delay
labeled['end_t'] = labeled['end']-audio_delay

# Limit the data to only positive start_t and reindex
labeled = labeled[labeled['start_t'] > 0]
labeled = labeled.reset_index(drop=True)

start_idx = np.zeros(len(labeled))
# add the labeled start index to the calcium data
for i in range(len(labeled)):
    idx = np.argmin(np.abs(calcium_t - labeled['start_t'][i]))
    #if(idx <= 3*ca_fs):
    start_idx[i] = idx

# add the start index to the labeled data
labeled['start_idx'] = start_idx.astype(int)

Align all notes for one neuron

In [17]:
# Show only the labeled data for a certain syllable
for neuron in range(total_neurons):
    for syl in labeled['note'].unique():
        offset = 30

        labeled_syl = labeled[labeled['note'] == syl]
        labeled_syl = labeled_syl.reset_index(drop=True)

        # Plot the labeled data for a certain syllable
        traces = np.zeros((len(labeled_syl), total_neurons, offset))

        # Fill in the calcium traces for each syllable
        for i in range(len(labeled_syl)):
            idx = labeled_syl['start_idx'][i]
            traces[i] = dF_f0[idx:idx+offset].T

        # Plot all traces above each other
        plt.plot(np.arange(offset)*ca_fs, np.mean(traces[:,neuron,:], axis=0))

    plt.xlabel('Time (s)')
    plt.legend(labeled['note'].unique())
    plt.title('Neuron ' + str(neuron))
    plt.savefig('Figures/neuron_' + str(neuron) + '.png')
    plt.close()

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


In [21]:
labeled['note'].unique()

array(['i', 'x', 'y', 'j', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'k', 'l',
       nan], dtype=object)