# Generate Mock Data

In [98]:
import os
import random
from pathlib import Path
from collections import Counter
from datetime import datetime
import numpy as np
import sys

import database.config as config
import mock_data.mock_data_utils as utils

In [8]:
patient_id = 1
session_nr = 1

In [201]:
class GenerateData:
    """
    Create mock neural data (spikes, lfp) 
    and the corresponding meta-data (channel names, anatomical location).
    Also creates the 
    """
    
    def __init__(self, patient_id, session_nr, stimulus_len=83.33):
        
        self.patient_id = patient_id
        self.session_nr = session_nr
        self.stimulus_len = stimulus_len
        
        self.nr_channels = 80
        self.nr_units = random.randint(20, 100)
        self.nr_channels_per_region = 8
        self.unit_types = ["MU", "SU"]
        self.brain_regions = ["LA", "LAH", "LEC", "LMH", "LPHC", 
                              "RA", "RAH", "REC", "RMH", "RPCH"]

        self.rec_length = 5400000
        self.rectime_on = random.randint(1347982266000, 1695051066000)
        self.rectime_off = self.rectime_on + self.rec_length + random.randint(300000, 900000)
        
        self.spike_trains = self.generate_spike_trains()
        self.channel_dict = self.generate_channelwise_unit_distribution()
        
        ## stimulus data
        
        self.len_context_files = random.randint(4000, 5400) # generate length of events.nev & DAQ file. 
        self.datetime = datetime.utcfromtimestamp(int(self.rectime_on)/1000).strftime('%Y-%m-%d_%Hh%Mm%Ss')
        
        self.signal_tile = self.generate_pings()
        self.stim_on_time = self.generate_stimulus_onsets()[0]
        self.stim_off_time = self.generate_stimulus_onsets()[1]
        
        
    def format_save_dir(self, subdir=None):
        """
        Formats the subdir in which everything will be saved.
        """

        save_dir = Path(f"/home/alana/Documents/phd/code/epiphyte/project/data/patient_data/{self.patient_id}/session_{self.session_nr}/")
            
        if subdir:
            save_dir = save_dir / subdir
            
        save_dir.mkdir(parents=True, exist_ok=True)
               
        return save_dir
            
    def generate_spike_trains(self):
        """
        Generates mock spike trains for a "patient."
        """
        
        spike_trains = (
            np.sort([uniform(self.rectime_on, self.rectime_off) for _ in range(int(uniform(50, 5000)))])
            for _ in range(self.nr_units)
        )
        return list(spike_trains)
    
    def generate_channelwise_unit_distribution(self):
        """
        Distributes the number of units across "channels".
        """
        
        channel_units = [
            int(random.uniform(1, self.nr_channels+1)) for _ in range(self.nr_units)
        ]
        
        channel_dict = {
            csc: [random.choice(self.unit_types) for _ in range(repeats)]
            for (csc, repeats) in Counter(channel_units).items()
        }
        
        return channel_dict
    
    def generate_channel_list(self):
        """
        Creates a list of channel names to resemble that of an actual surgical output.
        Each entry consists of "<hemisphere abbr><brain region><channel number>".
        """

        channel_list = [
            f"{region}{i+1}" 
            for region in self.brain_regions
            for i in range(self.nr_channels_per_region)
                       ]
        
        return channel_list
    
    def save_spike_trains(self, save_dir=None):
        """
        Calls the generate_spike_trains() method and resulting trains 
        in the local "data" directory, unless otherwise specified.
        """

        save_dir = self.format_save_dir(subdir="spiking_data")
        
        i = 0
        for csc, unit_types in self.channel_dict.items():
            su_ct = 1
            mu_ct = 1

            for t in unit_types:
                if t == "SU":
                    unit_counter = su_ct
                    su_ct += 1
                elif t == "MU":
                    unit_counter = mu_ct
                    mu_ct += 1
                
                filename = f"CSC{csc}_{t}{unit_counter}.npy"
                np.save(save_dir / filename, self.spike_trains[i])
                i += 1
                
    def save_channel_names(self, save_dir=None):
        """
        Makes and saves a txt file listing the channel names
        for each channel of the implanted "electrodes".
        """
                   
        save_dir = self.format_save_dir()
            
        channel_names = self.generate_channel_list()
        
        file = save_dir / "ChannelNames.txt"
        f1 = open(file, "w+")
        for csc_name in channel_names:
            f1.write(f"{csc_name}.ncs\n")
        f1.close()
    
    ##############
    ## stimulus data generation
    ##############
    
    def generate_pings(self):
        """
        Recreate how Neuralynx interfaces with a local computer. 
        """       
        # recreate pings
        if self.len_context_files % 8 == 0:
            reps = int(self.len_context_files / 8)
        else:
            reps = int(self.len_context_files / 8) + 1

        signal_tile = np.tile([1,2,4,8,16,32,64,128], reps)
        signal_tile = signal_tile[:self.len_context_files]

        return signal_tile
    
    def generate_events(self):
        """
        Generate mock Events.nev file. Save as Events.npy file. 
        """

        # recreate event timestamps
        events = np.linspace(self.rectime_on, self.rectime_off, num=self.len_context_files)
        events_mat = np.array(list(zip(events, self.signal_tile)))
        
        return events, events_mat
    
    def save_events(self, save_dir=None):
        """
        Save the generated mock Events.npy file. 
        """
        
        events, events_mat = self.generate_events()
        
        save_dir = self.format_save_dir(subdir="event_file")
        
        ev_name = save_dir / "Events.npy"
        
        np.save(ev_name, events_mat)
        
    def generate_stimulus_onsets(self):
        """
        Generate the onset and offset timestamps for the stimulus.
        """
        
        # generate projected end time for the DAQ log, in unix time microseconds
        # movie_len_unix = (stimulus_len * 60 * 1000 * 1000)       
        stim_on_time = (self.rectime_on + random.randint(120000, 180000)) * 1000
        stim_off_time = (stim_on_time + (self.stimulus_len * 60 * 1000)) * 1000
        
        return stim_on_time, stim_off_time
    
    def seed_and_interval(self):
        
        add_interval = int((self.stim_off_time) / self.len_context_files)
        seed = int(self.stim_on_time + add_interval*1.25)
        return add_interval, seed
        
    def generate_daq_log(self):
        """
        Generate the DAQ log. 
        """
        
        add_interval, seed = self.seed_and_interval()
        
        pre = []
        post = []

        for i in range(self.len_context_files):
            interval_diff = (np.random.normal(1000, 200) / 2)

            pre.append(int(seed - interval_diff))
            post.append(int(seed + interval_diff))
            seed += add_interval 

        return list(zip(self.signal_tile, np.arange(self.len_context_files), pre, post))
    
    def save_daq_log(self):
        """
        Saves the generated DAQ log.
        """
        
        log_lines = self.generate_daq_log()
        
        save_dir = self.format_save_dir(subdir="daq_files")
        log_loc = save_dir / f"timedDAQ-log-{self.datetime}.log"
        
        with open(log_loc, 'a') as file:
            file.write("Initial signature: 255	255\n255\t255\t\ndata\tStamp\tpre\tpost\n")
            for datum in log_lines:
                file.write("{}\t{}\t{}\t{}\n".format(datum[0], datum[1], datum[2], datum[3]))
            file.close()
            
    def generate_perfect_watchlog(self):
        """
        Generate a movie watchlog file without pauses or skips.
        """
        
        _, seed = self.seed_and_interval()
        
        nr_movie_frames = int(self.stimulus_len * 60 / 0.04)
        perfect_pts = [round((x * 0.04), 2) for x in range(1, nr_movie_frames+1)] 
        
        cpu_time = []
        for i in range(nr_movie_frames):
            seed += 41000
            cpu_time.append(seed)
        
        return nr_movie_frames, perfect_pts, cpu_time
    
    def save_perfect_watchlog(self):
        
        nr_movie_frames, perfect_pts, cpu_time = self.generate_perfect_watchlog()
        
        save_dir = self.format_save_dir(subdir="watchlogs")
        
        wl_name = f"ffplay-watchlog-{self.datetime}.log"
        
        with open(save_dir / wl_name, 'a') as file:
            file.write("movie_stimulus.avi\n")
            for i in range(nr_movie_frames):
                file.write("pts\t{}\ttime\t{}\n".format(perfect_pts[i], cpu_time[i]))
            file.close()
        

In [202]:
pat1_neural_data = GenerateData(patient_id, session_nr)
pat1_neural_data.save_spike_trains(save_dir=None)
pat1_neural_data.save_channel_names(save_dir=None)

pat1_neural_data.save_events()
pat1_neural_data.save_daq_log()
pat1_neural_data.save_perfect_watchlog()

### (1) Generate Spike Times

Specify seed information for mock spike trains:

In [2]:
patient_id = 1
session_nr = 1

nr_units_patient_1 = 100
begin_recording_time = 449860058000
stop_recording_time =  455889058000
nr_units_per_brain_region_1 = [7, 12, 9, 10, 15, 13, 8, 11, 12, 3]

Generate mock spikes for one "patient":

In [3]:
utils.generate_spikes(patient_id, session_nr, nr_units_patient_1, begin_recording_time, stop_recording_time)

### (2) Create Channel Names File

Channels names indicate which brain region a given unit was recorded from. 

Here, channel files consist of a look-up table used to determine where a spike train originates. Once in the database, the region labels allow for dynamically switching between region-specific analyses.

In [4]:
utils.generate_channel_file(patient_id, session_nr, nr_units_patient_1, nr_units_per_brain_region_1)

### (3) and (4) Create Events File and timedDAQ log:

Generates mock Events.nev file, a proprietary file format from Neuralynx. 

Necessary for matching spike train data to stimulus information. 

In conjunction with the DAQ file, allows a linear mapping between the two different timescales (Neural recording device time and local computer time) so that neural events can be matched to stimulus evvents and vice versa.

In [5]:
utils.make_events_and_daq(patient_id, session_nr, begin_recording_time, stop_recording_time)

Recording length, in usec:  6029000000
End time of recording, in epoch time:  1590534256608515
Length of interval iteratively added:  1359413


### (4) Create Watch Log File

Using the same seed time as used for the timedDAQ file, generate a watchlog file for a given "patient":

In [6]:
#utils.generate_perfect_watchlog(patient_id, session_nr)
utils.generate_playback_artifacts(patient_id, session_nr)

Length of the stimulus, in usec:  4728999960.0
End of stimulus, in epoch time:  1590532956608475.0
Length of interval iteratively added :  37613


### Repeat (1 - 4) for more "patients":

In [7]:
# Set seed info
patient_id = 2
session_nr = 1
nr_units_patient_2 = 84 
begin_recording_time = 349871349000
stop_recording_time  = 355900349000
nr_units_per_brain_region_2 = [12, 9, 10, 5, 7, 11, 8, 7, 12, 3]

# Run generators
utils.generate_spikes(patient_id, session_nr, nr_units_patient_2, begin_recording_time, stop_recording_time)
utils.generate_channel_file(patient_id, session_nr, nr_units_patient_2, nr_units_per_brain_region_2)
utils.make_events_and_daq(patient_id, session_nr, begin_recording_time, stop_recording_time)
utils.generate_playback_artifacts(patient_id, session_nr)

Recording length, in usec:  6029000000
End time of recording, in epoch time:  1590534256608515
Length of interval iteratively added:  1277601
Length of the stimulus, in usec:  4728999960.0
End of stimulus, in epoch time:  1590532956608475.0
Length of interval iteratively added :  37613


In [8]:
# Set seed info
patient_id = 3
session_nr = 1
nr_units_patient_3 = 59
begin_recording_time = 248860058000
stop_recording_time = 254889058000
nr_units_per_brain_region_3 = [6, 5, 4, 6, 7, 10, 3, 7, 5, 6]

# Run generators
utils.generate_spikes(patient_id, session_nr, nr_units_patient_2, begin_recording_time, stop_recording_time)
utils.generate_channel_file(patient_id, session_nr, nr_units_patient_2, nr_units_per_brain_region_2)
utils.make_events_and_daq(patient_id, session_nr, begin_recording_time, stop_recording_time)
utils.generate_playback_artifacts(patient_id, session_nr)

Recording length, in usec:  6029000000
End time of recording, in epoch time:  1590534256608515
Length of interval iteratively added:  1154980
Length of the stimulus, in usec:  4728999960.0
End of stimulus, in epoch time:  1590532956608475.0
Length of interval iteratively added :  37613


### (5) Create Stimulus Meta-Data 

For the purposes of demonstration, the meta-data consists of frames/movie time points in which a movie character was on-screen. 

Since the stimulus meta-data is taken only from the stimulus, this information only needs to be generated once per "experimental" paradigm. 

In [9]:
nr_movie_frames = 125725      # movie length: 5029 seconds (AVI file); 5029/0.04 = 125725
perfect_pts = [round((x * 0.04), 2) for x in range(1, nr_movie_frames+1)]  

annotator_ids = []
for i in range(len(config.annotators)):
    annotator_ids.append(config.annotators[i]['annotator_id'])

path_to_movie_annotations = "mock_data/movie_annotation"

if not os.path.exists(os.path.join(config.PATH_TO_REPO, path_to_movie_annotations)):
    os.makedirs(os.path.join(config.PATH_TO_REPO, path_to_movie_annotations))

nr_character_labels = 2

start_times_1 = [0, 5000.04, 7000.04, 12000.04]
stop_times_1 = [5000,7000,12000,12575]
values_1 = [1,0,1,0]
character1 = np.array([values_1, start_times_1, stop_times_1]) 
np.save("{}/{}/{}_character1_{}_20191212_character.npy".format(
    config.PATH_TO_REPO, path_to_movie_annotations, 1, random.choice(annotator_ids)), character1)


start_times_2 = [0, 400.04, 4000.04, 10000.04, 10500.04]
stop_times_2 = [400,4000,10000,10500,12575]
values_2 = [0,1,0,1,0]
character2 = np.array([values_2, start_times_2, stop_times_2]) 
np.save("{}/{}/{}_character2_{}_20191010_character.npy".format(
    config.PATH_TO_REPO, path_to_movie_annotations, 2, random.choice(annotator_ids)), character2)

start_times_3 = [0, 100.04, 500.04]
stop_times_3 = [100, 500, 12575]
values_3 = [0,1,0]
annot4 = np.array([values_3, start_times_3, stop_times_3]) 
np.save("{}/{}/{}_location1_{}_20200101_location.npy".format(
    config.PATH_TO_REPO, path_to_movie_annotations, 3, random.choice(annotator_ids)), annot4)
