## Import Packages

In [1]:
import numpy as np
import nexfile # a .py file
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Optimizer
import matplotlib.pyplot as plt
import copy
import scipy
import scipy.io
import scipy.linalg
import scipy.stats

## Data Loading Functions

In [2]:
def loadnpz(name, allow_pickle=False):
    """
    loadnpz loads compressed files
    Args:
       name (str): directory of npz file
       allow_pickle (bool): argument to allow pickle
    Returns:
       data (np array): np array from compressed npz file
    """
    if allow_pickle:
        data = np.load(name, allow_pickle=True)  # Over-rule default False (loading pickled data can execute arbitrary code)
    else:
        data = np.load(name)
    data = data.f.arr_0  # Gets np array from data, which is currently an instance of class NpzFile, which has f attribute (numpy.lib.npyio.NpzFile)
    return data

def loadFileNames(data_dir):
    """
    Gives the names of the files in the folder folderName1, which is simply the numeric data folders from /Rodent WFU DNMS
    Args:
        data_dir (str): local directory that stores the folder for neuron data
    Returns:
        fileNames (list): list of all .nex filenames as full directory name as str (./Rodent_WFU_DNMS/1193/1193u044merge-clean.nex)
    """
    dir_name =  data_dir + '/driveNeuron' # Local directory as string for Rodent WFU DNMS
    folderNames = os.listdir(dir_name) # Get list for directory contents
    rat_nums = [] # List to append folders with integer name as rats' id
    
    # If subdirectory in list is integer then add to list rat_nums
    for num in folderNames: 
        try:
            int(num)
            rat_nums.append(num)
        except:
            True    
    
    fileNames = [] # the full directory of each event for all the rats
    
    # Generate list of all WFU Rat DMS file names with full directory name
    for num in rat_nums: 
        folder = dir_name + "/" + num + "/" # folder directory for each rat
        events = os.listdir(folder) # list of one rat's .nex file name for each event
        for a in range(len(events)): 
            events[a] = folder + events[a] # concatenate the full directory name for each event .nex file
        fileNames = fileNames + events 

    return fileNames


In [3]:
# ***Change this according to local directory***
data_dir = '/Volumes/TOSHIBA/data'

## Event Data Preprocessing (save data to .npz files) 

In [4]:
def saveEventData(data_dir, eventNames, fileLabel):
    """
    This function saves: 
    the spike trains around the events, 
    input names (the wire/cell names of each input neuron spike channel), 
    the index of the event being predicted,
    the index label of valid sessions
    Args:
       data_dir (str): local directory that stores the folder for neuron data
       eventNames (list of str): names of events
       fileLabel (str): label for saved files
    Returns:
       None
    """
    
    #"validArgs" will be the set of sessions with valid data that can be read with nexfile reader.
    validArgs = []

    #This gives the file names of the sessions.
    fileNames = loadFileNames(data_dir)
        
    # loop over all the sessions (of all the rats), total 519 sessions
    for a in range(len(fileNames)): 
        fileName = fileNames[a] # full directory of one file
        folderName1 = fileName.split('/')[3] # folder name, int id num of rat
        
        #There is an issue with the data in the folder labeled 1127, that causes it to be different from all the other sessions.
        if folderName1 != '1127':
            try:
                #This loads the data
                reader = nexfile.Reader()
                fileData = reader.ReadNexFile(fileName)
                validArgs.append(a)
            except:
                True

            #This gets the names of all the variables which have timestamps data
            variableNames = []
            for b in range(len(fileData['Variables'])):
                if 'Timestamps' in fileData['Variables'][b].keys():
                    varName = fileData['Variables'][b]['Header']["Name"]
                    variableNames.append(varName)                  
            variableNames = np.array(variableNames)
                        
            #This gets the neuron spike data and the event time data.
            inputNums = []
            inputNames = []
            outputNums = []
            outputNames = []
            timeData = []
            b = 0
            for b0 in range(len(fileData['Variables'])):
                if 'Timestamps' in fileData['Variables'][b].keys():
                    #This gives the timing data for the variable
                    times1 = fileData['Variables'][b]['Timestamps']
                    times1 = np.array(times1)
                    timeData.append(np.copy(times1))

                    #This puts the variable number in "spikeData" for each relevent variable
                    name = variableNames[b]
                    #This appends the arg in "fileData['Variables']" which corresponds to the variable "name"
                    if ('wire' in name) and ('cell' in name):
                        inputNums.append(b) 
                        inputNames.append(name)
                        
                    #This appends the arg in "fileData['Variables']" which corresponds to the variable "name"
                    if name in events:
                        outputNums.append(b) 
                        outputNames.append(name)
                        
                    b += 1
            outputNums = np.array(outputNums)[np.argsort(np.array(outputNames))]
            
            #outputType will give the number of the event in "events" for each time of each event.
            numOutput = 0 
            outputType = np.array([])
            
            for b0 in range(len(outputNums)):
                b = outputNums[b0]
                times0 = timeData[b] #This gives the timing of events.
                numOutput += times0.shape[0]

                b1 = np.argwhere(events == outputNames[b0])[0, 0] #This gives the argument in "eventss" of this variable.

                outputType = np.concatenate(( outputType, np.zeros(times0.shape) + b1  ))

            #spikeTimer will include the spike trains in the 10 second interval around each event.
            #spikeTimer = np.zeros((numOutput, len(inputNums), 2000 ))
            spikeTimer = np.zeros((numOutput, len(inputNums), 5000 ))          
            b0 = 0
            for b in outputNums: #Iterating through output events
                times0 = timeData[b] #This is the times of this event
                for c in range(times0.shape[0]):
                    timeNow = times0[c] #This is a particular time of a particular event
                    d0 = 0
                    for d in inputNums:
                        spikes = timeData[d] - timeNow #This gives the timing of neuron spikes relative to the event.
                        spikes = spikes[np.abs(spikes) < 5] #This gives only neuron spikes within 5 seconds of the event.
                        spikes = spikes + 5 #This gives the timing of these neuron spikes relative to a 10 second window around the event
                        spikes = np.floor(spikes * 500).astype(int) #This rounds the time to the nearest 500th of a second.
                        spikeTimer[b0, d0, spikes] = 1 #This converts the spike times to a binary spike train, and adds it to the full spike train array.
                        d0 += 1
                    b0 += 1
        
            np.savez_compressed(data_dir+'/eventData/seperate/data_' + fileLabel + '_' + str(a) + '.npz', spikeTimer) #This saves the neuron spike data 
            inputNames = np.array(inputNames)
            np.savez_compressed(data_dir+'/eventData/seperate/input_' + fileLabel + '_' + str(a) + '.npz', inputNames) #This saves the wire/cell names of each input neuron spike channel.
            np.savez_compressed(data_dir+'/eventData/seperate/output_' + fileLabel + '_' + str(a) + '.npz', outputType) #This saves the index of the event being predicted

    validArgs = np.array(validArgs)
    np.savez_compressed(data_dir+'/eventData/seperate/validArgs_' + fileLabel + '.npz', validArgs) #This saves which sessions have valid data.


In [5]:
def combineEventData(data_dir, eventNames, fileLabel):
    """
    This function saves: 
        the neuron spike data,
        the index of the events,
        the valid session index,
        the binary of which neurons exist in each session
    Args:
       data_dir (str): local directory that stores the folder for neuron data
       eventNames (list of str): names of events
       fileLabel (str): label for saved files
    Returns:
       None
    """

    M = 50
    #M = 25

    fileNames = loadFileNames(data_dir)

    validArgs = loadnpz(data_dir + '/eventData/seperate/validArgs_' + fileLabel + '.npz')
    inputNamesAll = np.array([])
    outputTypeAll = np.array([])
    keyAll = np.array([])
    for a0 in range(len(validArgs)):
        a = validArgs[a0]
        inputNames = loadnpz(data_dir + '/eventData/seperate/input_' + fileLabel + '_' + str(a) + '.npz')
        outputType = loadnpz(data_dir + '/eventData/seperate/output_' + fileLabel + '_' + str(a) + '.npz')

        #print (np.unique(outputType))

        #print (fileNames[a])
        #print (inputNames)

        #This loop removes '_ver_0' from names
        for b in range(len(inputNames)):
            if inputNames[b][-len('_ver_0'):] == '_ver_0':
                inputNames[b] = inputNames[b][:-len('_ver_0')]

        #This combines the data from sessions
        inputNamesAll = np.concatenate((inputNamesAll, inputNames))
        outputTypeAll = np.concatenate((outputTypeAll, outputType))
        keyAll = np.concatenate((keyAll, np.zeros(outputType.shape[0]) + a0  ))

    keyAll = keyAll.astype(int)

    inputNamesUnique = np.unique(inputNamesAll) #This is a list of unique neuron channel names

    np.savez_compressed(data_dir + '/eventData/general/uniqueNames.npz', inputNamesUnique)



    #This array will contain all the combined neuron spike data.
    dataAll = np.zeros((outputTypeAll.shape[0], inputNamesUnique.shape[0], 100 ))
    #dataAll = np.zeros((outputTypeAll.shape[0], inputNamesUnique.shape[0], 200 ))

    #sensorLocation is a binary array showing which neuron channels exist in this session
    sensorLocation = np.zeros(( validArgs.shape[0], inputNamesUnique.shape[0] ))
    count1 = 0
    for a0 in range(len(validArgs)):
        a = validArgs[a0]
        
        print(a0 , '/' , len(validArgs))

        inputNames = loadnpz(data_dir + '/eventData/seperate/input_' + fileLabel + '_' + str(a) + '.npz')
        inputArgs = []
        for b in range(len(inputNames)):
            if inputNames[b][-len('_ver_0'):] == '_ver_0': #Removing "ver_0" from name
                inputNames[b] = inputNames[b][:-len('_ver_0')] #Removing "ver_0" from name
            #This finds the number corresponding to the neuron channel name of inputNames[b]
            arg1 = np.argwhere(inputNamesUnique == inputNames[b])[0, 0]
            inputArgs.append(arg1)
        inputArgs = np.array(inputArgs).astype(int) #inputArgs is the arguments of the subset of inputNamesUnique which is equal to 'inputNames'


        sensorLocation[a0, inputArgs] = 1

        data = loadnpz(data_dir + '/eventData/seperate/data_' + fileLabel + '_' + str(a) + '.npz')
        data = data.reshape((data.shape[0], data.shape[1], data.shape[2] // M, M ))
        data = np.sum(data, axis=3) #This modifies the timing to measure how many spikes have occured in a time period of M/500 seconds. For M = 50, it is one 10th of a second.

        shape1 = data.shape
        #data1 = data.reshape((shape1[0]*shape1[1]*shape1[2],))
        #plt.hist(data1, bins=100)
        #plt.show()
        #quit()

        #data[data > 1] = 1

        #data[data > 2] = 2
        #data = data / 2

        #data[data > 5] = 5
        #data = data / 3

        #data[data > 3] = 3
        #data = data / 3

        data = np.log(data + 1) #This is a numerical transformation of the number of spikes which occur. This transformation prevents the values in "data" from being overly large in cases where many spikes occur rapidly.
        size1 = data.shape[0]

        #plt.plot(np.sum(np.sum(data, axis=1), axis=1))
        #plt.show()

        dataAll[count1:count1+size1, inputArgs] = np.copy(data) #This adds the spike data to the array of all spike data.

        count1 += size1


    #quit()

    #plt.plot(np.sum(np.sum(dataAll, axis=1), axis=1))
    #plt.show()


    np.savez_compressed(data_dir + '/eventData/combined/data_' + fileLabel + '.npz', dataAll ) #This saves the neuron spike data
    np.savez_compressed(data_dir + '/eventData/combined/outputType_' + fileLabel + '.npz', outputTypeAll ) #This saves the event labels
    np.savez_compressed(data_dir + '/eventData/combined/keys_' + fileLabel + '.npz', keyAll ) #This saves the session numbers
    np.savez_compressed(data_dir + '/eventData/combined/inputLocation_' + fileLabel + '.npz', sensorLocation ) #This saves the binary of which neurons exist in each session.


In [6]:
# "includeName" specifies which events are being predicted. It saves the neuron spike trains around these events.
# 'ALL_A_NM_M' 'ALL_BPs' 'ALL_B_NM_M' 'ALL_NM_PHASE' 'ALL_S_PHASE'
# 'A_MATCH' 'A_NONMATCH' 'A_SAMPLES' 'AllFile' 'B_MATCH' 'B_NONMATCH'
# 'B_SAMPLES' 'DELAY' 'FAILURE' 'LASTNP' 'NOSEPOKE' 'NP' 'OFFER2' 'OFFERA'
# 'OFFERB' 'RETRACT' 'REWARDCOUNT' 'STARTSYNC' 'STOPSYNC' 'SUCCESS' 'TRIAL'
# eventNames = ['A_MATCH', 'A_NONMATCH', 'B_MATCH', 'B_NONMATCH', 'A_SAMPLES', 'B_SAMPLES']
# eventNames = np.array(eventNames)

# fileLabel_dict = {"AM&S": [0,4], "ANM&S": [1,4], "BM&S": [2,5], "BNM&S": [3,5], "AM&NM&S": [0,1,4], "BM&NM&S": [2,3,5]}

# for i in fileLabel_dict:
#     print(i, fileLabel_dict[i]) # i = fileLabel
#     print(eventNames[fileLabel_dict[i]]) # eventNames
    
#     saveEventData(data_dir, eventNames[fileLabel_dict[i]], i)
# #     investigateEventData(data_dir, eventNames[fileLabel_dict[i]], i)
#     combineEventData(data_dir, eventNames[fileLabel_dict[i]], i)
#     print("Complete " + i)

In [1]:
def add_events(outputNames, outputNums, timeData, new_events, included_events):
    '''
    This function sort the included_events events by timestamp and extract the events that are valid (Sample event followed by Match or Nonmatch event) 
    Args:
        outputNames (list of str): the name of events used as output
        outputNums (list of int): the index of events for output in timeData
        timeData: the Timestamps of all variables
        new_events (list of str): ['A_S|M', 'A_S|NM'] or ['B_S|M', 'B_S|NM'] (Match must be before NM events) ***coding structure needs fixing
        included_events: (list of str) events needed to find the specific event given the other event happened (All A or B events)
    Returns:
        updated timeData, outputNums, outputNames
    '''
    # find the index of included_events in outputNames & outputNums
    for i in included_events:
        if "_MATCH" in i:
            if i in outputNames: # check if i is outputNames
                M_idx = outputNames.index(i)
                _M = timeData[outputNums[M_idx]] # '_MATCH' timestamps
                _M = [(j, i) for j in _M]
            else: # if not, the list of timestamps is empty
                _M = [] 
                
        if "_NON" in i:
            if i in outputNames:
                NM_idx = outputNames.index(i)
                _NM = timeData[outputNums[NM_idx]] # '_NONMATCH' timestamps
                _NM = [(j, i) for j in _NM]
            else:
                _NM = []
            
        if "_SAMPLE" in i:
            if i in outputNames:
                S_idx = outputNames.index(i)        
                _S = timeData[outputNums[S_idx]] # '_SAMPLES' timestamps
                _S = [(j, i) for j in _S]
            else:
                _S = []
    
    _all = sorted(_S + _NM + _M) # sort all events of included events (one position of A or B) 
#     print('_S', len(_S))
#     print('_M', len(_M))
#     print('_NM', len(_NM))
#     print('_all', len(_all))
#     print(_all)
    
    # to store the index of events timestamps
    t_idx_SM = [] 
    t_idx_SNM = []
    t_idx_M = []
    t_idx_NM = []
    # loop over all the ordered timestamps 
    for i in np.arange(0, len(_all)-1):
        # record the timestamp index of Sample given Match event and the following Match event
        if '_SAMPLES' in _all[i][1] and '_MATCH' in _all[i+1][1]: 
            t_idx_SM.append(i)
            t_idx_M.append(i+1)
        # record the timestamp index of Sample given NM event and the following NM event
        if '_SAMPLES' in _all[i][1] and '_NONMATCH' in _all[i+1][1]: 
            t_idx_SNM.append(i)
            t_idx_NM.append(i+1)
    
    S_M_timestamps = np.array([_all[i][0] for i in t_idx_SM]) # stores the Timestamps of Sample given Match events
    S_NM_timestamps = np.array([_all[i][0] for i in t_idx_SNM]) # stores the Timestamps of Sample given NM events   
    M_timestamps = np.array([_all[i][0] for i in t_idx_M]) # stores the Timestamps of Match events
    NM_timestamps = np.array([_all[i][0] for i in t_idx_NM]) # stores the Timestamps of NM events
    
    # if the events are not in correct sequence, update the timeData with corrected timestamps
    if (len(S_M_timestamps) != len(_M)) or (len(S_NM_timestamps) != len(_NM)):
#         print('Target Timestamps SM', len(S_M_timestamps))
#         print('Target Timestamps SNM', len(S_NM_timestamps))
#         print('Target Timestamps M', len(M_timestamps))
#         print('Target Timestamps NM', len(NM_timestamps))
#         print('_M', len(_M))
#         print('_NM', len(_NM))

#         print(len(timeData[outputNums[M_idx]]), len(M_timestamps))
        timeData[outputNums[M_idx]] = M_timestamps

#         print(len(timeData[outputNums[NM_idx]]), len(NM_timestamps))
        timeData[outputNums[NM_idx]] = NM_timestamps
#         print(timeData[outputNums[NM_idx]])
#         print(S_NM_timestamps)

    # append the filtered sample timestamps at the end of the timeData
    timeData.append(S_M_timestamps)
    # append the index of timeData to outputNums
    outputNums = np.append(outputNums, len(timeData)-1)

    timeData.append(S_NM_timestamps)
    outputNums = np.append(outputNums, len(timeData)-1)
    
    for i in new_events:
        outputNames.append(i)
    
    return timeData, outputNums, outputNames



In [149]:
def saveEventData2(data_dir, eventNames, fileLabel, rm_groups = []):
    """
    This function saves: 
        the spike trains around the events, 
        input names (the wire/cell names of each input neuron spike channel), 
        the index of the event being predicted,
        the index label of valid sessions
        the names of output events 
    Args:
       data_dir (str): local directory that stores the folder for neuron data
       eventNames (list of str): names of all events
       fileLabel (str): label for saving files
       rm_groups (list of str): names of events to be removed from eventNames to form outputNames
    Returns:
       None
    """

#     output = []
    
    #"validArgs" will be the set of sessions with valid data that can be read with nexfile reader.
    validArgs = []

    #This gives the file names of the sessions.
    fileNames = loadFileNames(data_dir)
    
    print(fileLabel)
    
    # loop over all the sessions (of all the rats), total 519 sessions
    for a in np.arange(len(fileNames)): 
        fileName = fileNames[a] # full directory of one file
        folderName1 = fileName.split('/')[3] # folder name, int id num of rat
        
        #There is an issue with the data in the folder labeled 1127, that causes it to be different from all the other sessions.
        if folderName1 != '1127':
            try:
                #This loads the data
                reader = nexfile.Reader()
                fileData = reader.ReadNexFile(fileName)
                validArgs.append(a)
            except:
                True

            #This gets the names of all the variables which have timestamps data
            variableNames = []
            for b in range(len(fileData['Variables'])):
                if 'Timestamps' in fileData['Variables'][b].keys():
                    varName = fileData['Variables'][b]['Header']["Name"]
                    variableNames.append(varName)                  
            variableNames = np.array(variableNames)
            
                        
            # This gets the neuron spike data and the event time data.
            inputNums = [] # index of inputs in timeData
            inputNames = [] # input names
            outputNums = [] # index of output events in timeData (same size ad outputNames)
            outputNames = [] # output event names
            timeData = [] # list of Timestamps for each variable with Timestamps
            b = 0 # to record index of timeData (keep count of timeData)
            for b0 in np.arange(len(fileData['Variables'])):
                if 'Timestamps' in fileData['Variables'][b0].keys():
                    #This gives the timestamps for all the variables with 'Timestamps'
                    times = fileData['Variables'][b0]['Timestamps']
                    times = np.array(times)
                    timeData.append(np.copy(times))

                    #This puts the variable number in "spikeData" for each relevent variable
                    name = fileData['Variables'][b0]['Header']["Name"]

                    #This appends the arg in "fileData['Variables']" which corresponds to the neuron signal input
                    if ('wire' in name) and ('cell' in name):
                        inputNums.append(b) 
                        inputNames.append(name)

                    #This appends the arg in "fileData['Variables']" which corresponds to the output events
                    if name in eventNames:
                        outputNums.append(b) 
                        outputNames.append(name)

                    b+=1

            outputNums = np.array(outputNums)[np.argsort(np.array(outputNames))]
            
            # adding new groups of Sample events given conditions
            timeData, outputNums, outputNames = add_events(outputNames, outputNums, timeData, new_events = ['A_S|M', 'A_S|NM'], included_events = ['A_MATCH', 'A_NONMATCH', 'A_SAMPLES'])
            timeData, outputNums, outputNames = add_events(outputNames, outputNums, timeData, new_events = ['B_S|M', 'B_S|NM'], included_events = ['B_MATCH', 'B_NONMATCH', 'B_SAMPLES'])
#             print("outputNames", outputNames)
#             print("outputNums", outputNums)
#             print("timeData", len(timeData))
            
            # remove the events that are not needed 
            if len(rm_groups)!= 0:
                for e in rm_groups:
                    idx = outputNames.index(e)
                    outputNames.pop(idx)
                    outputNums = np.delete(outputNums, idx)

            #outputType will give the number of the event in "events" for each time of each event.
            numOutput = 0 
            outputType = np.array([]) # index of each output events in eventNames

            for b0 in np.arange(len(outputNums)):
                times = timeData[outputNums[b0]] # This gives the Timestamps of an output event.
                numOutput += times.shape[0]

#                 events_idx = eventNames.index(outputNames[b0]) # This gives the argument in "events" of this variable.
#                 print(eventNames)
#                 print(outputNames[b0])
                events_idx = b0
                outputType = np.concatenate((outputType, np.zeros(times.shape) + events_idx))
#             print("outputType", outputType)

            # spikeTimer will include the spike trains in the 10 second interval around each event.
            #spikeTimer = np.zeros((numOutput, len(inputNums), 2000 ))
            spikeTimer = np.zeros((numOutput, len(inputNums), 5000 ))          
            b0 = 0
            for b in outputNums: #Iterating through output events
                times0 = timeData[b] #This is the times of this event
                for c in range(times0.shape[0]):
                    timeNow = times0[c] #This is a particular time of a particular event
                    d0 = 0
                    for d in inputNums:
                        spikes = timeData[d] - timeNow #This gives the timing of neuron spikes relative to the event.
                        spikes = spikes[np.abs(spikes) < 5] #This gives only neuron spikes within 5 seconds of the event.
                        spikes = spikes + 5 #This gives the timing of these neuron spikes relative to a 10 second window around the event
                        spikes = np.floor(spikes * 500).astype(int) #This rounds the time to the nearest 500th of a second.
                        spikeTimer[b0, d0, spikes] = 1 #This converts the spike times to a binary spike train, and adds it to the full spike train array.
                        d0 += 1
                    b0 += 1
                    
            np.savez_compressed(data_dir+'/eventData/seperate/data_' + fileLabel + '_' + str(a) + '.npz', spikeTimer) #This saves the neuron spike data (8--8th version of minor modification)
            inputNames = np.array(inputNames)
            np.savez_compressed(data_dir+'/eventData/seperate/input_' + fileLabel + '_' + str(a) + '.npz', inputNames) #This saves the wire/cell names of each input neuron spike channel.
            np.savez_compressed(data_dir+'/eventData/seperate/output_' + fileLabel + '_' + str(a) + '.npz', outputType) #This saves the number of the type of event being predicted
            print("Saved " + fileLabel + '_' + str(a))

#     # to check the number of each saved events         
#     output = np.concatenate((output, outputType))
#     a, c = np.unique(output, return_counts=True)
#     for i in np.arange(len(a)):
#     print(outputNames[i], c[i])
        
    validArgs = np.array(validArgs)
    np.savez_compressed(data_dir+'/eventData/seperate/validArgs_' + fileLabel + '.npz', validArgs) #This saves which sessions have valid data.
    np.savez_compressed(data_dir+'/eventData/seperate/outputNames_' + fileLabel + '.npz', outputNames) # This saves the outputNames for reference of the index in output
    
    

In [150]:
eventNames = ['A_MATCH', 'A_NONMATCH', 'A_SAMPLES', 'B_MATCH', 'B_NONMATCH', 'B_SAMPLES', 'A_S|M', 'A_S|NM', 'B_S|M', 'B_S|NM']
fileLabel = "6" # 'S|M & S|NM'


In [151]:
saveEventData2(data_dir, eventNames, fileLabel, rm_groups = ['A_S|M', 'A_S|NM', 'B_S|M', 'B_S|NM'])  # ['A_SAMPLES', 'B_SAMPLES'])


6
Saved 6_0
Saved 6_1
Saved 6_2
Saved 6_3
Saved 6_4
Saved 6_5
Saved 6_6
Saved 6_7
Saved 6_8
Saved 6_9
Saved 6_10
Saved 6_11
Saved 6_12
Saved 6_13
Saved 6_14
Saved 6_15
Saved 6_16
Saved 6_17
Saved 6_18
Saved 6_19
4 4
40 37
11 11
25 24
Saved 6_20
Saved 6_21
Saved 6_22
Saved 6_23
Saved 6_24
Saved 6_25
Saved 6_26
Saved 6_27
Saved 6_28
Saved 6_29
Saved 6_30
Saved 6_31
Saved 6_32
Saved 6_33
Saved 6_34
Saved 6_35
Saved 6_36
Saved 6_37
Saved 6_38
Saved 6_39
Saved 6_40
Saved 6_41
Saved 6_42
Saved 6_43
Saved 6_44
Saved 6_45
Saved 6_46
Saved 6_47
Saved 6_48
Saved 6_49
Saved 6_50
Saved 6_51
Saved 6_52
Saved 6_53
Saved 6_54
Saved 6_55
Saved 6_56
Saved 6_57
Saved 6_58
Saved 6_59
Saved 6_60
Saved 6_61
Saved 6_62
Saved 6_63
Saved 6_64
Saved 6_65
22 19
24 23
Saved 6_66
Saved 6_67
43 38
6 6
Saved 6_68
Saved 6_69
Saved 6_70
Saved 6_71
35 31
14 13
5 4
26 25
Saved 6_72
Saved 6_73
Saved 6_74
Saved 6_75
Saved 6_76
Saved 6_77
Saved 6_78
Saved 6_79
Saved 6_80
Saved 6_81
Saved 6_82
Saved 6_83
Saved 6_84
Saved 6

In [153]:
fileLabel = '6' # 'S|M & S|NM(NS)'
combineEventData(data_dir, eventNames, fileLabel)


0 / 519
1 / 519
2 / 519
3 / 519
4 / 519
5 / 519
6 / 519
7 / 519
8 / 519
9 / 519
10 / 519
11 / 519
12 / 519
13 / 519
14 / 519
15 / 519
16 / 519
17 / 519
18 / 519
19 / 519
20 / 519
21 / 519
22 / 519
23 / 519
24 / 519
25 / 519
26 / 519
27 / 519
28 / 519
29 / 519
30 / 519
31 / 519
32 / 519
33 / 519
34 / 519
35 / 519
36 / 519
37 / 519
38 / 519
39 / 519
40 / 519
41 / 519
42 / 519
43 / 519
44 / 519
45 / 519
46 / 519
47 / 519
48 / 519
49 / 519
50 / 519
51 / 519
52 / 519
53 / 519
54 / 519
55 / 519
56 / 519
57 / 519
58 / 519
59 / 519
60 / 519
61 / 519
62 / 519
63 / 519
64 / 519
65 / 519
66 / 519
67 / 519
68 / 519
69 / 519
70 / 519
71 / 519
72 / 519
73 / 519
74 / 519
75 / 519
76 / 519
77 / 519
78 / 519
79 / 519
80 / 519
81 / 519
82 / 519
83 / 519
84 / 519
85 / 519
86 / 519
87 / 519
88 / 519
89 / 519
90 / 519
91 / 519
92 / 519
93 / 519
94 / 519
95 / 519
96 / 519
97 / 519
98 / 519
99 / 519
100 / 519
101 / 519
102 / 519
103 / 519
104 / 519
105 / 519
106 / 519
107 / 519
108 / 519
109 / 519
110 / 519


## Data Checking

In [154]:
data_all = loadnpz(data_dir+'/eventData/combined/data_6.npz') # S|M & S|NM(NS).npz')
data_all.shape

(95786, 152, 100)

In [155]:
output_all = loadnpz(data_dir+'/eventData/combined/outputType_6.npz') # _S|M & S|NM(NS).npz')
output_all.shape

(95786,)

In [156]:
np.unique(output_all)

array([0., 1., 2., 3., 4., 5.])

In [157]:
a, c = np.unique(output_all, return_counts=True)
eventNames = loadnpz(data_dir+'/eventData/seperate/outputNames_6.npz') #_S|M & S|NM(NS).npz')
# ['A_MATCH', 'A_NONMATCH', 'B_MATCH', 'B_NONMATCH', 'A_S|M', 'A_S|NM', 'B_S|M', 'B_S|NM'] 

for i in np.arange(len(a)):
    print(eventNames[i], c[i])

print("A_MATCH + A_NONMATCH: ", c[0] + c[1])
print("B_MATCH + B_NONMATCH: ",c[2] + c[3])
# print("A diff: ", (c[0] + c[1]) - (c[4] + c[5])) # 23719
# print("B_diff: ", (c[2] + c[3]) - (c[6] + c[7])) # 23914

# some Sample events are followed by another Sample events (Not always in order of Sample & Match/ Nonmatch events)
# thus the difference in extracted Sample events given different conditions

A_MATCH 5321
A_NONMATCH 18571
A_SAMPLES 23896
B_MATCH 5850
B_NONMATCH 18355
B_SAMPLES 23793
A_MATCH + A_NONMATCH:  23892
B_MATCH + B_NONMATCH:  29746


In [112]:
key_all = loadnpz(data_dir+'/eventData/combined/keys_S|M & S|NM(NS).npz')
key_all

array([  0,   0,   0, ..., 518, 518, 518])

In [79]:
inputLocation = loadnpz(data_dir+'/eventData/combined/inputLocation_S|M & S|NM(NS).npz')
inputLocation.shape

(519, 152)

## Unused Functions

In [16]:
def investigateEventData(data_dir, eventNames, fileLabel):
    """
    This function saves *******
    """
    
    eventNames = ['A_MATCH', 'A_NONMATCH', 'B_MATCH', 'B_NONMATCH', 'A_SAMPLES', 'B_SAMPLES', 'NOSEPOKE']
    
    #"validArgs" will be the set of sessions index with valid data that are saved from the function saveEventData().
    validArgs = loadnpz(data_dir+'/eventData/seperate/validArgs_' + fileLabel + '.npz') 

    #full directory of each session
    fileNames = loadFileNames(data_dir)

    # loop over all the sessions 
    for a0 in range(len(validArgs)): 
        a = validArgs[a0]
        fileName = fileNames[a] 
        reader = nexfile.Reader()
        fileData = reader.ReadNexFile(fileName)

        #This gets the names of all the variables which have timestamps data
        variableNames = []
        for b in range(len(fileData['Variables'])):
            if 'Timestamps' in fileData['Variables'][b].keys():
                varName = fileData['Variables'][b]['Header']["Name"]
                variableNames.append(varName)
#             else: 
#                 print("b", b)
#                 print("no timestamp", fileData['Variables'][b]['Header']["Name"])
                
        variableNames = np.array(variableNames)

#         print (len(fileData['Variables']))
#         print("1size", len(variableNames))
        
        #This gets the neuron spike data and the event time data.
        variableNames2 = []
        inputNums = []
        inputNames = []
        outputNums = []
        outputNames = []
        spikeData = []
        b = 0
        for b0 in range(len(fileData['Variables'])):
            if 'Timestamps' in fileData['Variables'][b].keys():       
                #This gives the timing data for the variable
                spikes = fileData['Variables'][b]['Timestamps']
                spikes = np.array(spikes)
                spikeData.append(np.copy(spikes))

                #This puts the variable number in "spikeData" for each relevent variable
                name = variableNames[b]
                variableNames2.append(name)
                
                #This appends the arg in "fileData['Variables']" which corresponds to the variable "name"
                if ('wire' in name) and ('cell' in name):
                    inputNums.append(b)
                    inputNames.append(name)
                if name in eventNames:
                    outputNums.append(b)
                    outputNames.append(name)
                b += 1
#             else:
#                 print("b0", b0)
#                 print("no timestamp", fileData['Variables'][b0]['Header']["Name"])

        variableNames2 = np.array(variableNames2)
        
#         print ("2", variableNames2)
#         print("2size", len(variableNames2))

        #ALL_S_PHASE: A_SAMPLES, B_SAMPLES
        #NOSEPOKE
        #TRIAL
        existSpike = np.concatenate((spikeData[outputNums[0]] , spikeData[outputNums[1]] , spikeData[outputNums[2]] , spikeData[outputNums[3]]))
#         if False:
#             for b in range(len(variableNames2)):
#                 if not 'cell' in variableNames2[b]:
#                     print (variableNames2[b])
#                     spike1 = spikeData[b]
#                     print (spike1.shape)
#                     print (np.intersect1d(spike1, existSpike).shape)

        #quit()

        #arg1 = np.argwhere(variableNames2 == 'NOSEPOKE')[0, 0] #NOSEPOKE, REWARDCOUNT
        #arg2 = np.argwhere(variableNames2 == 'TRIAL')[0, 0]
        #spike1 = spikeData[arg1]
        #spike2 = spikeData[arg2]

        #print (spike1.shape)
        #print (spike2.shape)
        #print (np.intersect1d(spike1, spike2).shape)
        #print (variableNames2)
        #quit()


        outputNums = np.array(outputNums)[np.argsort(np.array(outputNames))]

        numOutput = 0
        outputType = np.array([])
        for b0 in range(len(outputNums)):
            b = outputNums[b0]
            spikes0 = spikeData[b]
            numOutput += spikes0.shape[0]

            b1 = np.argwhere(eventNames == outputNames[b0])[0, 0]

            outputType = np.concatenate(( outputType, np.zeros(spikes0.shape) + b1  ))


        spikeTimer = np.zeros((numOutput, len(inputNums), 2000 ))

        b0 = 0
        for b in outputNums:
            spikes0 = spikeData[b]
            for c in range(spikes0.shape[0]):
                timeNow = spikes0[c]

                d0 = 0
                for d in inputNums:
                    spikes = spikeData[d] - timeNow
                    spikes = spikes[np.abs(spikes) < 2]
                    spikes = spikes + 2
                    spikes = np.floor(spikes * 500).astype(int)

                    spikeTimer[b0, d0, spikes] = 1

                    #print (timeNow)
                    #print (spikes)
                    #quit()
                    d0 += 1
                #print (len(spikes))
                #quit()

                b0 += 1

        print (np.unique(outputType))


#         plt.plot(np.sum(np.sum(spikeTimer, axis=1), axis=1))
#         plt.show()

        np.savez_compressed(data_dir + '/eventData/seperate/investigate/data_' + fileLabel + '_' + str(a) + '.npz', spikeTimer)
        inputNames = np.array(inputNames)
        np.savez_compressed(data_dir + '/eventData/seperate/investigate/input_' + fileLabel + '_' + str(a) + '.npz', inputNames)
        np.savez_compressed(data_dir + '/eventData/seperate/investigate/output_' + fileLabel + '_' + str(a) + '.npz', outputType)

    validArgs = np.array(validArgs)
    np.savez_compressed(data_dir + '/eventData/seperate/investigate/validArgs_' + fileLabel + '.npz', validArgs)


In [72]:
def saveLocationNum(data_dir):

    #This loads the list of all neuron channel names
    inputNamesUnique = loadnpz(data_dir + '/eventData/general/uniqueNames.npz')

    locNames = [] #This list will include the brain location for all input neuron channels
    hemNames = [] #This list will include the hemisphere for all input neuron channels
    for a in range(inputNamesUnique.shape[0]):
        name = inputNamesUnique[a]
        name = name.split('_')
        #name = name[1:-4]


        hemName = name[1]
        locName = name[2]

        locNames.append(locName)
        hemNames.append(hemName)


    locNames = np.array(locNames)
    hemNames = np.array(hemNames)
    locNamesUnique, locNames = np.unique(locNames, return_inverse=True) #Converts brain location to number
    hemNamesUnique, hemNames = np.unique(hemNames, return_inverse=True) #Converts hemisphere to number

    #Saves information
    np.savez_compressed(data_dir + '/eventData/general/brainLocationNamesUnique.npz', locNamesUnique)
    np.savez_compressed(data_dir + '/eventData/general/brainLocationNames.npz', locNames)
    np.savez_compressed(data_dir + '/eventData/general/brainHemisphereNames.npz', hemNames)


In [73]:
saveLocationNum(data_dir)

In [74]:
def saveWireNum(data_dir):

    inputNamesUnique = loadnpz(data_dir + '/eventData/general/uniqueNames.npz') #This loads the names of the neuron channels

    wireName = [] #This list includes the wire names for all neuron input channels
    for a in range(inputNamesUnique.shape[0]):
        name = inputNamesUnique[a]
        name = name.split('_')
        name = name[1:-2]
        name = '_'.join(name)
        wireName.append(name)

    wireName = np.array(wireName)
    wireNameUnique, wireName = np.unique(wireName, return_inverse=True) #This converts the wire name to a number


    np.savez_compressed(data_dir + '/eventData/general/wireNames.npz', wireName) #This saves the wire number for each neuron input channel


In [75]:
saveWireNum(data_dir)