In [None]:
#the different to "aligned_transitions_allROIs" is that here the interpolation (optional) is included + grouper.class

In [1]:
# Import libraries
import ast
import csv
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.ticker as ticker
import pandas as pd
import glob, os
import re
import math
from tqdm import tqdm
from scipy.stats import stats
from scipy.stats import mannwhitneyu
import itertools
import seaborn as sns

# To make the plot in the notebook and not in an extra window
%matplotlib notebook 

# Implement error messages (Default should be True)
error_on_missing_timestamps = False
error_on_time_light_mismatch = False
error_on_time_behavior_mismatch = False
error_on_missing_behaviors = False
error_on_invalid_behavior_range = False

Part 1: Generation of single data frame per sample, including behavior annotation, timestamp and dff

In [2]:
#Open multiple .csv from single directory. Define existing behaviors. Define sample_ID and experiment_ID.
# Chris's FRAN is zero based!!, whereas old annotations are 1 based
#directory for behavior data

behavior_directories = [r'/Users/nadine/Documents/Zlatic_lab/close-loop/Notes/behavior_csv_cl_A4/',
                        r'/Users/nadine/Documents/Zlatic_lab/close-loop/Notes/behavior_csv_cl_A9/',
                        r'/Users/nadine/Documents/Zlatic_lab/close-loop/Notes/behavior_csv_ol/',
                        #r'/Users/nadine/Documents/Zlatic_lab/close-loop/Notes/behavior_csv_stim_artefact/'
                       ] 

behavior_files = []
for d in behavior_directories:
    behavior_files.extend(
        glob.glob(os.path.join(d, "*.csv"))) #join pathname with filename, 

# Behavior columns available in CSV files
available_behaviors = ('fw', 'bw', 'stim', 'hunch', 'turn', 'other', 'HP', 'left turn', 'right turn')

# Regular expression (define the expression filenames are searched for)
# '.' single character, matched everything, '*' 0>> occurences, '/' path delimiter, '\d' 0-9 digit,
# '+' 1>> occurences, 'L' here character from filename
# () outcome here: 2 groups, useful for extraction
# [] optional list, eg 1 --> 1
# ? character non or once 

# Behavior reg-ex (regular expression)
behavior_sample_re = re.compile('.*/(\d\d-\d\d-\d\dL\d+(-\d+)?)-behavior-(.+).csv')

# Function: readall_behavior iterates through all csv (sorted) 
# and appends the files into the list (ls) and returns dictionary
def readall_behavior(all_files, printit=False):
    data = {}
    for filename in sorted(all_files):
        # Find sample ID, file name pattern: YY-MM-DDLXDETAIL.csv,
        # exp_id = DETAIL: several measurements of same sample 
        # (cl (closeloop, RGECO/ Chronos), ol (openloop, RGECO/ Chronos), 
        # blocks (Raghav: GCaMP/Chrimson))
        # Larva ID: YY-MM-DDLX
        # Look for filename_components, which are true for pattern
        match = behavior_sample_re.match(filename)
        if not match:
            raise ValueError('Unexpected filename format: {}'.format(filename))
        filename_components = match.groups()
        #define filename_components sample_id (first group), and exp_id (sec group)
        part_sample_id, _, exp_id = filename_components         
        sample_id = "{}-{}".format(part_sample_id, exp_id)
        
        df = pd.read_csv(filename, index_col=None, header=0, delimiter = ';')
        df.fillna(0, inplace=True) #replace NaN with zero
        df['sample_id'] = sample_id  #add sample_id column
        df['exp_id'] = exp_id #add exp_id column
        data[sample_id] = df
        #Count 'True' for each column ('behavior') in each single behavior.csv)
        #print(filename, df[df == 1].count()) 
        #print(df)
    return data

behavior_data = readall_behavior(behavior_files)
#print(behavior_data['17-11-06L2-cl'])



In [3]:
# Frequency of each behavior in all imported behavior.csv by using the returned 'ls' from 
# the function readAll: concatenate the 'behavior_files' (global variable). 'True' for each 
# column ('behavior_type') in the concatenated file (df_behavior).
# Sorting has to be = False (warning message without 'sort')
df_behavior = pd.concat(behavior_data.values(), axis = 0, ignore_index = True, sort = False) #add sorting
print(df_behavior[df_behavior == 1].count()) 

START            11
END               0
fw             4779
bw              831
stim            339
hunch           411
turn           1968
other           173
HP              728
left turn      1003
right turn      965
sample_id         0
exp_id            0
Unnamed: 11       0
dtype: int64


In [4]:
# Import and merge fluorescence data: Several LM files for the same sample_id exists, but differ in cell_id).
# List of LM data with two extra columns: sample_id and cell_id
# Open LM files from different directories
lightmicroscope_directories = [r'/Users/nadine/Documents/Zlatic_lab/close-loop/Notes/Basin_traces/', 
                               r'/Users/nadine/Documents/Zlatic_lab/close-loop/Notes/Handle-like_Traces',
                               r'/Users/nadine/Documents/Zlatic_lab/close-loop/Notes/a00c_traces',
                               r'/Users/nadine/Documents/Zlatic_lab/close-loop/Notes/candidate_neuron_traces',
                               r'/Users/nadine/Documents/Zlatic_lab/close-loop/Notes/All_ROI/26082017L6_allROIs_Claire/traces'
                              ] 

# Iterate through LM data and extend files in a list from within and between directory and 
# build a list of files from all directories
# (Note: append would 'extend' lists and not single files)
lightmicroscope_files = []
for d in lightmicroscope_directories:
    lightmicroscope_files.extend(
        glob.glob(os.path.join(d, "*.csv"))) #join pathname with filename

# Lightmicroscopic data reg-ex (regular expression)
lightmicroscope_sample_re = re.compile('.*/(\d\d-\d\d-\d\dL\d+(-\d+)?)-(.*)-(.*).csv')

# Function: readall_lm iterates through all LM_csv (sorted) 
# and returns a dictionary{key:value} 
# samples = {sample_id:cell-id}
def readall_lm(all_files):
    samples = {}
    for filename in sorted(all_files):
        # Find sample ID, file name pattern: YY-MM-DDLXDETAIL.csv,
        # Larva ID: YY-MM-DDLX, DETAIL = cell_id
        # Look for filename_components, which are true for pattern
        match = lightmicroscope_sample_re.match(filename)
        if not match:
            raise ValueError('Unexpected filename format: {}'.format(filename))
        filename_components = match.groups()
        part_sample_id, _, cell_id, exp_id = filename_components
        
        sample_id = "{}-{}".format(part_sample_id, exp_id)
        
        # Read LM.files 
        df = pd.read_csv(filename, index_col=None, header=0, delimiter = ',')
        # Replace NaN with zero
        df.fillna(0, inplace=True)
        
        # Add cellname to each column as prefix
        # lambda is a non defined function (longer version: def lambda(x):)
        # Rename of columns after the format cell_id, name) eg: Basin A9
        # inplace = True: column names are overwritten (if False: new dataframe)
        df.rename(lambda x: '{}_{}'.format(cell_id, x), axis = 'columns', inplace = True)
        # Get the sample_id (key) from the dictionary? to make a list [sample_cells] and 
        # if sample_id exists, append the list
        # if sample_id does not exists, start a new list
        # reminder: there can be several cell_id per sample_id
        sample_cells = samples.get(sample_id)
        if not sample_cells:
            samples[sample_id] = sample_cells = {
                'data': [],
                'exp_id': exp_id,
            }
        sample_cells['data'].append(df)
        
    return samples

lm_samples = readall_lm(lightmicroscope_files)

# New dictionary: lm_data{} to build a single dataframe with all cell_ids combined 
# for a single sample. Iterate over dict from same sample in one dataframe. 
# df.items iterate over pairs and build a list

lm_data = {}

# Iterate over all light samples and merge all found files
# for each sample into a single data frame (per sample)
for sample_id, sample_info in lm_samples.items():
    cells_dataframes = sample_info['data']
    #check if number of cells >= 1
    if not cells_dataframes:
        raise ValueError('No cells found for sample {}'.format(sample_id))
    #first element in the list
    lm_df = None

    #iteration through other df
    for cdf in cells_dataframes:
        if lm_df is None:
            lm_df = cdf
        else:
            if len(lm_df.index) != len(cdf.index):
                raise ValueError('Data frame frame to merge has not same row count as target', sample_id)
            lm_df = pd.merge(lm_df, cdf, left_index = True, right_index = True)
            
    lm_df['sample_id'] = sample_id  #add sample_id column
    lm_df['exp_id'] = sample_info['exp_id']
    lm_data[sample_id] = lm_df
#print(list(lm_data.keys()))
#print(lm_samples)

In [5]:
# Import txt-files from of the absolute time/frame from the Ca-imaging (lm-data). 
# All txt-files have to be transposed, which is a memory intensive step. After the 
# data are complete, the transposed files should be exported (ToDo). Time-data are 
# combined with sample-ID and experiment-ID.

timelapse_directory =(r'/Users/nadine/Documents/Zlatic_lab/close-loop/Notes/timelapse/') 
timelapse_files = glob.glob(os.path.join(timelapse_directory, "*.txt")) #join pathname with filename

# Behavior reg-ex (regular expression)
time_sample_re = re.compile('.*/(\d\d-\d\d-\d\dL\d+(-\d+)?)-time-(.+).txt')

# Function: readall_timelapse iterates through all txt (sorted) and appends the 
# files into the dict (data) and returns ls
def readall_time(all_files, printit=False):
    data = {}
    for filename in sorted(all_files):
        # Find sample ID, file name pattern: YY-MM-DDLXDETAIL.csv,
        # exp_id = DETAIL: several measurements of same sample (cl (closeloop), ol (openloop), blocks (Raghav))
        # Larva ID: YY-MM-DDLX
        #look for filename_components, which are true for pattern
        match = time_sample_re.match(filename)
        if not match:
            raise ValueError('Unexpected filename format: {}'.format(filename))
        filename_components = match.groups()
        part_sample_id, _, exp_id = filename_components #define filename_components sample_id (first group), and exp_id (sec group)  
        sample_id = "{}-{}".format(part_sample_id, exp_id)
        
        df = pd.read_csv(filename, header=1, index_col=None, delim_whitespace = True)
        df = df.T #transposing because read_csv imports as row
        df = df.reset_index() #transpose function sets data as index
        df.rename(columns={'index':'time'}, inplace=True) #rename reset index column to time
        df['time'] = df.time.astype(float)
        data[sample_id] = df
        
    return data

In [6]:
# Keep in mind that some of the files has to be changed because of the discrepancy in timestamps
#Will fixed this in vsc
timelapse_cache = 'timelapse.cache'

try:
    with open(timelapse_cache, 'r') as timelapse_cache_file:
        # TODO
        cache_data = timelapse_cache_file.read()
        time_data = ast.literal_eval(cache_data)
except FileNotFoundError as e:
    print('No cache file found, recomputing')
    # No cache file found, recompute
    time_data = readall_time(timelapse_files)
    # Write cache
    

No cache file found, recomputing


In [7]:
sample_data = {}

# Time data are merged into light data and checked if number length of lm = timestamp.  
# Due to technical conditions, some time.txt-file have too many or not enough time data compared
# to the corresponding LM data. The discrepancy is fixed by either dropping the extra timepoints or 
# by taking the average of the difference between each timepoint and extend the dataframe. 
# The first 10 timepoints are not included to account for instability of the microscope in 
# the beginning due to the moving parts. 
# Maximal difference between timepoints fyi.

for sample_id, sample_df in lm_data.items():
    # Add time stamps to data frame of current sample by merging
    # The time data frame for the current sample, which is expected
    # to match the light data (based on index).
    timestamp_df = time_data.get(sample_id)
    if timestamp_df is None:
        msg = '{}: could not find timestamp data for sample'.format(sample_id)
        if error_on_missing_timestamps:
            raise ValueError(msg)
        # Ignore, if missing data shouldn't cancel the whole process.
        print(msg)
        continue
        
    n_timestamps = len(timestamp_df)
    n_lightdata = len(sample_df)
    
    # The timestamp and light recordings are done by different systems.
    # This can cause the existence of additional time points/ or missing time points in a
    # dataset, which will be filtered out in the merge operation below.
    if n_lightdata != n_timestamps:
        msg = '{}: time data ({} entries) doesn\'t match light data ({} entries)'.format(
                sample_id, n_timestamps, n_lightdata)
        if error_on_time_light_mismatch:
            raise ValueError(msg)
        print(msg)
        diffs = np.diff(timestamp_df['time'])[10:] #from 10th row onwards
        diffs_avg = diffs.mean(axis=0)
        #diff between timedata and lightdata
        missing_data = len(sample_df) - len(timestamp_df)
        
        #add 'diffs_avg' to fill in missing_timedata
        if missing_data > 0:
            last_valid_index = len(timestamp_df) - 1
            last_timestamp = timestamp_df.iloc[last_valid_index]['time']
            if pd.isna(last_timestamp):
                raise ValueError('Unexpected last valid timestamp for sample {} at index {}'.format(
                        sample_id, last_valid_index))
            for i in range(0, missing_data):
                last_valid_index += 1
                timestamp_df.loc[last_valid_index] = timestamp_df.iloc[last_valid_index - 1]['time'] + diffs_avg
        elif missing_data < 0:
            drop_start = len(timestamp_df) + missing_data
            drop_end = len(timestamp_df)
            timestamp_df.drop(list(range(drop_start, drop_end)))

    # Merge timedata into light data
    # Use an 'inner' join/merge to exclude time points that don't have matching light data.
    new_sample_df = pd.merge(sample_df, timestamp_df, left_index = True, right_index = True, how='inner')
    
    # Store newly created data frame for sample (dictionary)
    sample_data[sample_id] = new_sample_df
    
print('Matched {} light data sets with their respective time points'.format(len(sample_data)))

# Max.diffs for timestamps
# diffs defined earlier
#mx = diffs.max()
#print('max-diff', mx)

#plt.hist(diffs, bins=10, alpha=0.5)
#plt.show() 

17-08-24L5-cl: time data (6686 entries) doesn't match light data (6685 entries)
17-08-26L1-cl: time data (6990 entries) doesn't match light data (6989 entries)
17-08-26L3-cl: time data (2935 entries) doesn't match light data (4110 entries)
17-08-27L2-cl: time data (6470 entries) doesn't match light data (6469 entries)
17-08-28L3-cl: time data (6228 entries) doesn't match light data (6225 entries)
17-08-29L2-cl: time data (6817 entries) doesn't match light data (6805 entries)
17-11-03L7-cl: time data (1399 entries) doesn't match light data (2657 entries)
17-11-04L1-cl: time data (6325 entries) doesn't match light data (6324 entries)
17-11-06L1-cl: time data (1923 entries) doesn't match light data (6493 entries)
17-11-08L3-cl: time data (3240 entries) doesn't match light data (6474 entries)
17-11-26L1-cl: time data (6487 entries) doesn't match light data (6469 entries)
17-11-29L3-cl: time data (6567 entries) doesn't match light data (6561 entries)
17-11-30L2-cl: time data (6640 entries) 

In [8]:
# Combine behavior data with light data into a single data frame
# per sample ID. To do so, add behavior data to light data frames,
# because the light data is already organizes by frame. To accomodate
# frame ranges without an behavior data, a column named "quiet" is
# added which is True in these cases and False otherwise. Additionally,
# for each behavior column, a behavior start and end column as well as
# an overlap column is added so that parallel and successive behaviors
# of the same type can be differentiated.

for sample_id, sample_df in sample_data.items():
    sample_behavior = behavior_data.get(sample_id)
    if sample_behavior is None:
        msg = 'Could not find behavior data for sample "{}"'.format(sample_id)
        if error_on_missing_behaviors:
            raise ValueError(msg)
        print(msg)
        continue

    # Add extra columns for behavior
    for behavior in available_behaviors:
        sample_df[behavior] = False
        sample_df['{}_start'.format(behavior)] = False
        sample_df['{}_end'.format(behavior)] = False
        sample_df['{}_overlap'.format(behavior)] = False
    
    # Add 'quiet' column. Set it initially to True and mark frames
    # with actual behavior as quiet = False.
    sample_df['quiet'] = True
    
    n_light_entries = len(sample_df)

    # Iterate over behavior data and add data to target data frame
    for i, row in sample_behavior.iterrows():
        # Start and end are 1-based, make them 0-based
        start = int(row['START'])
        end = int(row['END'])
        
        if type(row['START']) == str:
            print(sample_id)
            print(start, end)
        
        if start >= end:
            msg = "{}: start ({}) needs to be strictly smaller than end ({})".format(sample_id, start, end)
            if error_on_invalid_behavior_range:
                raise ValueError(msg)
            print(msg)
            continue
        
        # Make sure we capture start/end times that are a fractional number.
        if row['START'] - start > 0 or row['END'] - end > 0:
            raise ValueError('{}: start and end frame number can\'t contain fractions'.format(sample_id))
            
        # Ignore behavior entries with an end frame higher than available light data.
        # The behavior data is one-based, which is why a strict larger than test should
        # be correct.
        if end > n_light_entries:
            msg = 'Sample: {} - Behavior row with range {}-{} exceeds light time points ({}): {}'.format(
                sample_id, start, end, n_light_entries, row)
            if error_on_time_behavior_mismatch:
                raise ValueError(msg)
            print(msg)
            continue
            
        # Find behavior observed in row
        observed_behaviors = []
        for behavior in available_behaviors:
            if row[behavior]:
                observed_behaviors.append(behavior)
        
        # We assume that not more than two behaviors are observed at the same time
        if len(observed_behaviors) > 2:
            raise ValueError('Found multiple behaviors in row {} of sample {}'.format(i, sample_id))
        
        # Add observed behavior information to target data frames in all
        # rows in behavior range.
        for b in observed_behaviors:
            # Iterate over frames valid for current behavior. Every valid
            # frame is mapped into the canonical (light/cell) data frame,
            # which is 0-indexed.
            for j in range(start, end + 1):
                # Behavior ranges are 1-indexed
                current_frame = j - 1
                # If the current behavior has already been observed at this frame,
                # set overlap to True, because we are about to mark this behavior
                # again as observed for this frame.
                if sample_df.at[current_frame, b]:
                    sample_df.at[current_frame, '{}_overlap'.format(b)] = True
                else:
                    sample_df.at[current_frame, b] = True
                
                # Mark this row as not quiet, because we observed
                # a behavior in the current frame.
                sample_df.at[current_frame, 'quiet'] = False

            sample_df.at[start - 1, '{}_start'.format(b)] = True
            sample_df.at[end - 1, '{}_end'.format(b)] = True
            
    # Mark quiet ranges with _start, _end and _overlap. By definion,
    # quiet_overlap is always False.
    sample_df['quiet_start'] = False
    sample_df['quiet_end'] = False
    sample_df['quiet_overlap'] = False
    last_sample_idx = n_light_entries - 1
    for i, row in sample_df.iterrows():
        sample_df.at[i, 'quiet_start'] = row['quiet'] and (i == 0 or not sample_df.at[i - 1, 'quiet'])
        sample_df.at[i, 'quiet_end'] = row['quiet'] and (i == last_sample_idx or not sample_df.at[i + 1, 'quiet'])


Could not find behavior data for sample "17-08-24L3-cl"
Could not find behavior data for sample "17-08-26L3-cl"
Could not find behavior data for sample "17-08-31L2-cl"
Could not find behavior data for sample "17-11-03L5-cl"
Could not find behavior data for sample "17-11-03L7-cl"
Could not find behavior data for sample "17-11-06L1-cl"
Could not find behavior data for sample "17-11-06L3-cl"


Part 2: Data-analysis

In [9]:
# Define a class with sample_id, cell_type, event_name and filter_pattern

class CellTraceConfig:
    
    def __init__(self, sample_id, cell_type, event_name, filter_pattern=None):
        self.sample_id = sample_id
        self.cell_type = cell_type
        self.event_name = event_name
        self.filter_pattern = filter_pattern
        
    def get_filter_regex(self):
        filter_regex = '^{}_'.format(self.cell_type)
        if self.filter_pattern:
            filter_regex += '.*{}.*'.format(self.filter_pattern)
        return filter_regex
    
    def get_event_start_col(self):
        return '{}_start'.format(self.event_name)

    def add_event_time_points_to_plot(self, source_df, plot):
        for idx, row in source_df.iterrows():
            plot.annotate(self.event_name, xy=(row['time'], 1))
            plt.axvline(row['time'], color='k', linestyle='-')  
            
# Define a class with sample_id, cell_type, event_time and filter_pattern (for behavioral_transitions)
# Put '' [empty string] if you dont want any cell type

class CellTransConfig:
    
    def __init__(self, sample_id, cell_type, event_time, filter_pattern=None, first_event=None, second_event=None):
        self.sample_id = sample_id
        self.cell_type = cell_type
        self.event_time = event_time
        self.filter_pattern = filter_pattern
        self.first_event = first_event
        self.second_event = second_event
        
    def get_filter_regex(self):
        if self.cell_type is None:
            cell_str = r"[a-zA-Z0-9]+"
        else:
            cell_str = self.cell_type
            
        filter_regex = '^{}_'.format(cell_str)
        if self.filter_pattern:
            filter_regex += '.*{}.*'.format(self.filter_pattern)
        return filter_regex
    
    
    
# Define a class for filtering after behavioral_transitions for either only cell_type or filter_pattern or both.
# For example to average not only over all A00cs but all A00c_midL.

class DataFilter():
    def __init__(self, cell=None, pattern=None):
        self.cell = cell if cell is not None else '.*' # Makes argument optional
        self.pattern = pattern if pattern is not None else '.*' # Makes argument optional
        
    def get_cell_filter_regex(self):
        filter_regex = '.*_{}_.*_.*'.format(self.cell)
        return filter_regex
    
    def get_pattern_filter_regex(self):
        filter_regex = '.*_.*_{}_.*'.format(self.pattern)
        return filter_regex
    
    def get_cellpattern_filter_regex(self):
        filter_regex = '.*_{}_{}_.*'.format(self.cell, self.pattern)
        return filter_regex
        
    def __str__(self):
        return "{}_{}".format(self.cell, self.pattern)   
    
    
# Define class to group the columns after cell_type/ pattern or both using the class Datafilter

class TransitionGrouper:
    def __init__(self, transitions_df):
        self.transitions_df = transitions_df
        
        sample_ids, cells, patterns, *_ = zip(*[column.split("_") for column in self.transitions_df.columns])
        
        self.sample_ids = sorted(set(sample_ids))
        self.cells = sorted(set(cells))
        self.patterns = sorted(set(patterns))
        
    def get_regex(self, cell_name=None, pattern=None):
        data_filter = DataFilter(cell=cell_name, pattern=pattern)
        if cell_name is not None and pattern is None:
            return data_filter, data_filter.get_cell_filter_regex()
        if cell_name is not None and pattern is not None:
            return data_filter, data_filter.get_cellpattern_filter_regex()
        if cell_name is None and pattern is not None:
            return data_filter, data_filter.get_pattern_filter_regex()
        raise ValueError("Both cell_name and pattern are None! :(")
    
    def group_cells(self):
        output = dict()
        for cell_name in self.cells:
            data_filter, regex = self.get_regex(cell_name)
            cell_df = self.transitions_df.filter(regex=regex)
            output[cell_name] = (str(data_filter), cell_df)
        return output
    
    def group_patterns(self):
        output = dict()
        for pattern in self.patterns:
            data_filter, regex = self.get_regex(pattern=pattern)
            pattern_df = self.transitions_df.filter(regex=regex)
            output[pattern] = (str(data_filter), pattern_df)
        return output
    
    def group_cellpattern(self):
        output = dict()
        for cell_name, pattern in itertools.product(self.cells, self.patterns):
            data_filter, regex = self.get_regex(cell_name, pattern)
            cellpattern_df = self.transitions_df.filter(regex=regex)
            output[(cell_name, pattern)] = (str(data_filter), cellpattern_df)
        return output

# Specific after Post-transitions for multiple transition kinds, used for plotting. For multiple transition 
# events, group after transition (first, or second event) <most useful> with option to group
# after celltype, filterpattern, sample_id, observations.
class TransitionType:
    def __init__(self, sample_id=".*", cell=".*", filter_pattern=".*", n_obs=".*", first_event=".*", second_event=".*"):
        self.sample_id = sample_id
        self.cell = cell
        self.filter_pattern = filter_pattern
        self.n_obs = n_obs
        self.first_event = first_event
        self.second_event = second_event
        
        self.pattern = "{}_{}_{}_{}_{}_{}"
        
    def get_filter_regex(self, use_all=False, use_cell=False, use_sample=False, use_filter_pattern=False, use_n_obs=False, use_first_event=False, use_second_event=False):
        filter_regex = self.pattern.format(self.sample_id if use_sample or use_all else ".*",
                                          self.cell if use_cell or use_all else ".*",
                                          self.filter_pattern if use_filter_pattern or use_all else ".*",
                                          self.n_obs if use_n_obs or use_all else ".*",
                                          self.first_event if use_first_event or use_all else ".*",
                                          self.second_event if use_second_event or use_all else ".*")
        return filter_regex    
    
    


In [10]:
class PostBehaviorTransition:
        
    def __init__(self, sample_id, event, post_event, max_delay=0):
        self.sample_id = sample_id
        self.post_event = post_event
        self.event = event
        self.max_delay = max_delay

def find_behavior_before(sample_id, sample_df, first_event, second_event, 
                         max_delay=0,
                         first_event_duration=None, 
                         second_event_duration=None):
    """For the data frame of a single sample <df>, find all behaviors
    of type <first_event> that is followed by the event <second_event>,
    separated by <max_delay> time. The end of <second_event> is expected
    to happen strictly after the end of <first_event>. The start time
    of <second_event> however can overlap with the end time of <first_event>.
    In this case, the time difference is negative, and still smaller than
    <max_delay>. The start time of <second_event> can be before, at or after the
    end of <first_event>.
    """
    results = []
    first_event_start_col = '{}_start'.format(first_event)
    first_event_end_col = '{}_end'.format(first_event)
    second_event_start_col = '{}_start'.format(second_event)
    second_event_end_col = '{}_end'.format(second_event)
    second_event_overlap_col = '{}_overlap'.format(second_event)
    
    first_event_start_time = None
    first_event_end_time = None
    second_event_start_time = None
    second_event_end_time = None
    
   
    for i, row in sample_df.iterrows():
        # Look for start of second behavior and remember its time.
        if row[second_event_start_col] and not row[second_event_overlap_col]:
            #print("{} starts at {}".format(second_event, row["time"]))
            second_event_start_time = row['time']
        if row[first_event_end_col]:
            #print("{} ends at {}".format(first_event, row["time"]))
            first_event_end_time = row['time']
        if row[first_event_start_col]:
            #print("{} starts at {}".format(first_event, row["time"]))
            first_event_start_time = row['time']
        for column in sample_df.columns:
            if (first_event_start_time is not None and
                column.endswith("_start") and
                "quiet" not in column and
                column != first_event_start_col and
                column != second_event_start_col and
                first_event not in column and
                second_event not in column):
                if row[column]:
                    #print("{} ended at {}, but then found {} at {}".format(first_event, first_event_end_time, column, row["time"]))
                    first_event_start_time = None
                    first_event_end_time = None
                    second_event_start_time = None
                    second_event_end_time = None
                    
        
        # As long as we haven't collected all needed time points,
        # keep on searching.
        if None in (first_event_start_time, first_event_end_time,
                    second_event_start_time):
            continue
        
        # Define rules for event_start_time and event_end_time
        if first_event_start_time > second_event_start_time:
            continue
        if first_event_start_time > first_event_end_time:
            continue
        
        # Test if first_event_start_time = second_event_start_time
        if abs(first_event_start_time - second_event_start_time) < 0.00001:
            print('{}: start time (first) event {} and start time of (second) event {} are the same: {}'.format(
                sample_id, first_event, second_event, first_event_start_time))
            
        if second_event_end_time is None:
            for j, row in sample_df.loc[i:, :].iterrows():
                if row[second_event_end_col]:
                    second_event_end_time = row["time"]
                    break
        if second_event_end_time is None:
            print("warning: end time not found for second event")

        # Test time between first event end and second event start. If it
        # is smaller than <max_delay>, store start of second event as result.
        # The first event end time being greater than the second event start
        # time, is explicitly allowed.
        # implement event duration (for quiet)
        if (second_event_start_time - first_event_end_time) <= max_delay:
            if first_event_duration is not None and first_event_end_time - first_event_start_time < first_event_duration:
                continue
            if second_event_duration is not None and second_event_end_time - second_event_start_time < second_event_duration:
                continue
            
            results.append({
                'sample_id': sample_id,
                'first_event_start': first_event_start_time,
                'first_event_end': first_event_end_time,
                'second_event_start': second_event_start_time,
                'second_event_end': second_event_end_time,
                'first_event': first_event,
                'second_event': second_event
            })
        
        # Reset behavior tracking variables to find new pattern match.
        first_event_start_time = None
        first_event_end_time = None
        second_event_start_time = None
        second_event_end_time = None
        
    return results

# Open single samples 

#behavior_transitions = [
#    PostBehaviorTransition('17-08-26L6-cl', 'bw', 'turn', 4.9)
    #PostBehaviorTransition('17-08-26L6-cl', 'stim', 'bw', 4.9)
#]


# Open all samples multiple Transitions
behavior_transitions = [
    PostBehaviorTransition(name, 'fw', 'bw', 10) for name in lm_data] + [
    PostBehaviorTransition(name, 'bw', 'fw', 10) for name in lm_data] + [
    PostBehaviorTransition(name, 'fw', 'turn', 3) for name in lm_data] + [
    PostBehaviorTransition(name, 'turn', 'fw', 2) for name in lm_data] + [
    PostBehaviorTransition(name, 'bw', 'turn', 2) for name in lm_data] + [
    PostBehaviorTransition(name, 'turn', 'bw', 2) for name in lm_data]


found_transitions = []
for bt in tqdm(behavior_transitions):
    sample_df = sample_data.get(bt.sample_id)
    if not any(["bw" in column for column in sample_df.columns]):
        continue
    if sample_df is None:
        raise ValueError('No data found for sample {}'.format(bt.sample_id))
    transitions = find_behavior_before(bt.sample_id, sample_df, bt.event, 
                                       bt.post_event, bt.max_delay, 
                                       first_event_duration = None, 
                                       second_event_duration = None) #For 'quiet' change *_event_duration. Defaul = None.
    
    if transitions:
        found_transitions.append(transitions)


print(len(found_transitions)) # Number of data sets 
print(sum([len(sample_transitions) for sample_transitions in found_transitions])) # Number of transitions/ not working!!
#print(found_transitions) # Transitions


 40%|███▉      | 184/462 [02:27<03:16,  1.42it/s]

17-11-03L1-cl: start time (first) event fw and start time of (second) event turn are the same: 1905.242


 57%|█████▋    | 262/462 [03:30<02:15,  1.48it/s]

17-11-03L1-cl: start time (first) event turn and start time of (second) event fw are the same: 1905.242


100%|██████████| 462/462 [06:13<00:00,  1.28it/s]

210
1270





In [None]:
'''
# Duration between diff behavior transition (only works for one type of transitions)

gap_Ptrans = []
for sample in found_transitions:
    for found_transition in sample:
        gap_Ptrans.append((found_transition["second_event_start"])-(found_transition["first_event_end"]))
        
        # Test minus valus
        if ((found_transition["second_event_start"])-(found_transition["first_event_end"])) < -4:
            print(bt.sample_id, found_transition["first_event_end"], found_transition["second_event_start"]) 
            
#print(gap_Ptrans)

avg_duration = np.mean(gap_Ptrans)
max_duration = np.max(gap_Ptrans)
min_duration = np.min(gap_Ptrans)

print(avg_duration)
print(max_duration)
print(min_duration)

# Histogram
#fig = plt.figure()
#plt.hist(gap_Ptrans, bins=20, alpha=0.5, color='k')
#plt.show()
'''

In [11]:
# Define celltype, filter-pattern for transitions. 

cell_Ptrans_configs = []
all_Ptrans_events = []

for sample in tqdm(found_transitions):
    for found_transition in sample:
        
        # For all behavior except stimulus as first event 
        cell_Ptrans_configs.append(CellTransConfig(found_transition["sample_id"], "A00c",
                                                    found_transition["second_event_start"], 
                                                    filter_pattern = None, 
                                                    first_event=found_transition["first_event"], 
                                                    second_event=found_transition["second_event"]))
        
        # For stimulus as first_event
        #cell_Ptrans_configs.append(CellTransConfig(found_transition["sample_id"], "A00c",
        #                                           found_transition["first_event_start"],
        #                                           filter_pattern = None,
        #                                           first_event=found_transition["first_event"], 
        #                                           second_event=found_transition["second_event"]))

        
# Extract for specific time window and align several events. 
# Define timepoints pre and post an event (event_df). 
# Set the window range left and right from the event (in seconds)
left_half_window_size = 20 # If negative it goes further to right half (Good for skipping stimulus)
right_half_window_size = 20

windows = []
n_behavior_per_sample = {}

for ctc in tqdm(cell_Ptrans_configs):
    sample_df = sample_data.get(ctc.sample_id)
    n_behavior = n_behavior_per_sample.get(ctc.sample_id, 1)
    n_behavior_per_sample.setdefault(ctc.sample_id, 1)
    if sample_df is None:
        raise ValueError('{}: could not find sample data'.format(ctc.sample_id))
        continue    
   
    # Extract columns matching our cell type and the optional filter pattern.
    cell_subset_df = sample_df.filter(regex=ctc.get_filter_regex()) #Get subset of cells 
    cell_subset_df.set_index(sample_df.time, inplace=True) #Set time to index (essential for min/max...)
    cell_subset_df.reset_index(inplace = True) # Add index and time = column
    
    n_behavior_per_sample[ctc.sample_id] += 1
    window_start = ctc.event_time - left_half_window_size
    window_end = ctc.event_time + right_half_window_size
        
    # Get subset of rows between window_start and window_end
    # Including event_start
    trans = cell_subset_df[(cell_subset_df.time >= window_start) & (cell_subset_df.time <= window_end)]
    
    # Normalizing the data to align on beginning of selected
    # behavior (event_df = Zero) by substracting events in window
    trans.loc[:, 'time'] = trans['time'] - ctc.event_time
    
    # Add sample_id to each column as prefix and n_behavior as suffix to distinguish events within a sample
    trans.rename(lambda x: '{}_{}_{}_{}_{}'.format(ctc.sample_id, x, n_behavior, ctc.first_event, ctc.second_event), axis = 'columns', inplace = True) 

    # Rename time collum to time
    trans.rename(columns={ trans.columns[0]: 'time' }, inplace = True) 
    all_Ptrans_events.append(trans) 


# Removes first event and takes it as left_window in pd.merge_ordered and iterates than through all_events
all_Ptrans_df = all_Ptrans_events.pop(0)
for right_df in all_Ptrans_events:
    all_Ptrans_df = pd.merge_ordered(all_Ptrans_df, right_df, on="time", how="outer")

# Resets the index as time and drops time column
all_Ptrans_df.index = all_Ptrans_df["time"]
del all_Ptrans_df["time"]        

# Index intepolation (linear interpolatione not on all_df, because index [=time] is not eaqually distributed)
int_all_Ptrans_df = all_Ptrans_df.interpolate(method='index', axis=0, limit=None, inplace=False, limit_direction='both')
#print(int_all_Ptrans_df.columns)
#print(int_all_Ptrans_df)

100%|██████████| 210/210 [00:00<00:00, 6462.96it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item_labels[indexer[info_axis]]] = value
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return super(DataFrame, self).rename(**kwargs)
100%|██████████| 1270/1270 [02:16<00:00,  8.62it/s]


In [13]:
import pdb
#print(n_behavior)

def mean_across_n_behav(df):
    """
    Given a dataframe where each column represents one of n replicates,
    and the column titles end with '_{n}', return a new dataframe
    whose columns are the means across replicates.
    """
    cell_patterns_to_n_behav = dict()
    
    # populate dict whose keys are the cell pattern prefix (i.e. sample ID, cell type, pattern)
    # and values are list of column names which are that prefix with '_{n}'
    for col in sorted(df.columns):
        split = col.split('_')
        key = "_".join(split[:-1]) #-1 (for identified celltypes), -3 (for C_C)
        if key not in cell_patterns_to_n_behav:
            cell_patterns_to_n_behav[key] = []
        cell_patterns_to_n_behav[key].append(col)

    #print(cell_patterns_to_n_behav)

    out_df = pd.DataFrame(index=df.index)
    # go through the above dict, creating a new column for each key,
    # and populating that column with the mean of the columns specified
    # by the value (list of column names)
    for key, vals in cell_patterns_to_n_behav.items():
        tmp_df = df[vals]
        means = tmp_df.mean(axis=1)
        out_df[key] = means
    
    return out_df

#int_pre_mean = mean_across_n_behav(int_pre_data)
#int_post_mean = mean_across_n_behav(int_post_data)
int_all_Ptrans_df_mean = mean_across_n_behav(int_all_Ptrans_df)

def filter_cols_over_threshold(df, threshold=0.2):
    """
    Given a data frame, produce a new data frame
    only with columns which have at least one value over threshold.
    """
    
    is_over_threshold = df > 0.2

    useful_columns = []
    for col in is_over_threshold.columns:
        #pdb.set_trace() #python debugger
        if sum(is_over_threshold[col]) > 0:
            useful_columns.append(col)

    return df[useful_columns]

#int_pre_useful = filter_cols_over_threshold(int_pre_mean)
#int_post_useful = filter_cols_over_threshold(int_post_mean)
#int_all_Ptrans_df_useful = filter_cols_over_threshold(int_all_Ptrans_df_mean)

In [None]:
# heatmap

In [None]:
#Statistic

In [None]:
#For t-test
 
#fwcast = int_all_Ptrans_df
#mfwcast=fwcast.mean(axis=0)
#mfwcast_df=mfwcast.to_frame()
#mfwcast_df.columns=['fwcast']

#bwcast = int_all_Ptrans_df
#mbwcast=bwcast.mean(axis=0)
#mbwcast_df=mbwcast.to_frame()
#mbwcast_df.columns=['bwcast']

#castfw = int_all_Ptrans_df
#mcastfw=castfw.mean(axis=0)
#mcastfw_df=mcastfw.to_frame()
#mcastfw_df.columns=['castfw']

#castbw = int_all_Ptrans_df
#mcastbw=castbw.mean(axis=0)
#mcastbw_df=mcastbw.to_frame()
#mcastbw_df.columns=['castbw']



#Test t-test
#from scipy.stats import ttest_ind
#stats.ttest_ind(mcastfw_df,mcastfw_df)



# Mann-Whitney U-test
#output doesn't make much sense
#from scipy.stats import mannwhitneyu
#mw1 = mannwhitneyu(mcastfw_df, mbwcast_df, use_continuity=False, alternative='two-sided')
#mw2 = mannwhitneyu(mcastfw_df, mbwcast_df, use_continuity=False)
#print(mw1)
#print(mw2)
'''
# Test for correlation matrix (prelim)
#fw1 = int_pre_data 
#turn1 = int_pre_data 
bw2 = int_pre_data
turn2 = int_post_data

fw1.corr(turn1, method='pearson')
bw2.corr(turn2, method='pearson')
turn2.corr()
bw2.corr()


plt.matshow(bw2.corr())
#plt.matshow(int_post_data.corr())
#plt.matshow(int_post_data.corr()-int_pre_data.corr())
cb = plt.colorbar()
cb.ax.tick_params(labelsize=14)
plt.title('Correlation Matrix', fontsize=16);
plt.show()
#correlation = np.corrcoef(cellpattern_averages) #data(cell,time)
#print(correlation)
'''

In [None]:
# For multiple transition events, group after transition (first, or second event) <most useful> with option to group
# after celltype, filterpattern, sample_id, observations, using class TransitionType

transition_types = [
    TransitionType(first_event = 'fw', second_event= 'turn'),
    #TransitionType(first_event = 'turn', second_event= 'fw'),
    #TransitionType(first_event = 'turn', second_event= 'fw'),
    #TransitionType(first_event = 'turn', second_event= 'bw')
    TransitionType(first_event = 'bw', second_event= 'turn')
]

print(transition_types)


#transition_types = [
    #TransitionType(filter_pattern= 'T3'),
    #TransitionType(filter_pattern = 'A1'),
    #TransitionType(filter_pattern = 'A2'),
    #TransitionType(filter_pattern = 'A3'),
    #TransitionType(filter_pattern = 'antR'),
    #TransitionType(filter_pattern= 'midR'),
    #TransitionType(filter_pattern= 'postR')
    #TransitionType(filter_pattern= 'A7'),
    #TransitionType(filter_pattern= 'A8')
#]
#print(transition_types)


In [None]:
# all all_Ptrans_df with left and right window in same data frame and aligned to second_event_start 
#(for stim first event on first_event_start_)
# Average over the frames within the same sample (eg average over 1 sec)
# extract all negative values for pre_event
# extract positiv values for post event, but in case of the stimulus, the first two seconds must not be included


pre_data = all_Ptrans_df[all_Ptrans_df.index < 0.0]
post_data = all_Ptrans_df[all_Ptrans_df.index >= 0.0] # for stim-behavior >2, otherwise >=0

int_pre_data = pre_data.interpolate(method='index', axis=0, limit=None, inplace=False, limit_direction='both')
int_post_data = post_data.interpolate(method='index', axis=0, limit=None, inplace=False, limit_direction='both')

# Average over time for each cell type
pre_data_avg = pre_data.mean(axis = 0)
post_data_avg = post_data.mean(axis = 0)
#print(b_pre_data_avg)
#print(t2_post_data_avg)

In [None]:
# Fold change (bar plot) in cell activity post - pre/pre (merged orderd,No interpolation, NO avg)
# Fold change for all cells individually

# fold changeI: post/pre
#fold_change = (post_data_avg)/(pre_data_avg) 

# fold changeII: post-pre/pre
fold_change = ((post_data_avg)-(pre_data_avg))/(pre_data_avg)
#print(fold_change.index)

# Transform Series to dataframe and name columns
fold_change_df = fold_change.to_frame('transitions')
# print(fold_change_df.index)
# transitions = set(["_".join(sample_transition.split("_")[-2:]) for sample_transition in fold_change_df.index])

transitions = []
past = None
for sample_transition in fold_change_df.index:
    current = "_".join(sample_transition.split("_")[-2:])
    if current != past:
        transitions.append(current)
        past = current
print(transitions)

print(transitions)
transition_df_map = {transition: fold_change_df[fold_change_df.index.str.contains(transition)] for transition in transitions}
print(list(transition_df_map.keys()))
#print(transition_df_map["fw_stim"].head(10))



In [None]:
# Merge
# df_key = name, df-list = dataframe
transition_df_list = list(transition_df_map.items())
df_key, all_fold_change_df = transition_df_list.pop(0)
for df_key, right_df in transition_df_list:
    all_fold_change_df = pd.merge(
        all_fold_change_df, right_df, left_index=True, right_index=True, how="outer"
    )

#print(all_fold_change_df.to_string()) #print everything

# Plot fold change
ax = all_fold_change_df.plot.box() # its a series (diff for data frame)
ax.set_title('')
ax.set_xlabel('Transitions')
ax.set_ylabel('Fold change')
ax.set_xticklabels(list(transition_df_map.keys()))

#ax = fold_change_df.plot.box() #single transition type
plt.show()

In [None]:
# Correlation Matrix: cell/cell before and cell/cell after transition

# Extract for data int_all_Ptrans/ int_all_Ttrans where second_event_start - second event ends

# muss average pro cell_id-transition haben sonst plottet er fur jedes sample/transition extra


pre_data = all_Ttrans_df[all_Ttrans_df.index < 0.0]
post_data = all_Ttrans_df[all_Ttrans_df.index > 2] # for stim-behavior >2, otherwise >=0

print(pre_data)
int_pre_data = pre_data.interpolate(method='index', axis=0, limit=None, inplace=False, limit_direction='both')
int_post_data = post_data.interpolate(method='index', axis=0, limit=None, inplace=False, limit_direction='both')


# To make the plot in the notebook and not in an extra window
%matplotlib notebook 

plt.matshow(post_data.corr())
#plt.matshow(int_post_data.corr()-int_pre_data.corr())
#cb = plt.colorbar()
#cb.ax.tick_params(labelsize=14)
#plt.title('Correlation Matrix', fontsize=16);
#plt.show()
#correlation = np.corrcoef(cellpattern_averages) #data(cell,time)
#print(correlation)

##average from pre and average from post for time
#avg_int_pre_data = int_pre_data.mean(axis=0)
#avg_int_post_data = int_post_data.mean(axis=1)
#print(avg_int_pre_data)
#plt.matshow(avg_int_post_data.corr())

#plt.matshow(int_post_data.corr()-int_pre_data.corr())
#cb = plt.colorbar()
#cb.ax.tick_params(labelsize=14)
#plt.title('Correlation Matrix', fontsize=16);
#plt.show()
#correlation = np.corrcoef(cellpattern_averages) #data(cell,time)
#print(correlation)

In [None]:
#plotting

In [None]:
%matplotlib notebook 

# For multiple transition types!! 
# If a dataframe with NANs is plotted (raw-data = non interpolated), use 
# marker = '+', or 'o', since the line in the lineplot only connects 
# consecutive data points
def aligned_layout_plot(plot, tick_spacing=1, fov=(-2, 6, 0.06, 0.125), legend=False): 
    # Set fine x-axis scale
    plot.xaxis.set_major_locator(ticker.MultipleLocator(tick_spacing))

    # Set x and y limits and legend (default = False) 
    plot.axis(fov)
    #plot.legend().set_visible(legend)

colors = ["mediumblue", "c", "r", "orange", 'm', "lightcoral", 'darkviolet', 'deeppink', "m", "lightcoral","darkviolet", "firebrick", "r","lightcoral", "orange", "darkblue", "cornflowerblue","c", "orangered", 'c', "seagreen", "limegreen",'k','y', 'k', 'y', 'm', 'g', 'orange', 'r', 'k', 'y', 'b', 'brown', "mediumblue", "cornflowerblue", "seagreen", "limegreen", "lightblue", "orange", "k", "c", "k", "y", "b", "brown"]
#colors = {tt.get_filter_regex(use_cell=True, use_filter_pattern=True, use_first_event=True, use_second_event=True) : color for tt, color in zip(transition_types, colors)}
    
print(colors)
    
fig = plt.figure()
sub2 = fig.add_subplot(111) 
for tt, color in zip(transition_types, colors):
    int_some_Ptrans_df = int_all_Ptrans_df.filter(regex=tt.get_filter_regex(use_cell=True, use_filter_pattern=True, use_first_event=True, use_second_event=True))
    # Average and stddev, min, max, sem for post_behavior_transition events
    all_Ptrans_avg_df = int_some_Ptrans_df.mean(axis=1) # Interpolated data used
    #all_Ptrans_min_df = int_some_Ptrans_df.min(axis=1)
    #all_Ptrans_max_df = int_some_Ptrans_df.max(axis=1)
    # Standard deviation (distribution)
    #all_Ptrans_std_df = int_some_Ptrans_df.std(axis = 1)
    #standard error of mean
    all_Ptrans_sem_df = int_some_Ptrans_df.sem(axis = 1)

    all_Ptrans_avg_df.plot(ax=sub2, label = tt.get_filter_regex(use_all=True), color = color, linewidth = 2) #use interpolated df to calculate average...
    #all_Ptrans_avg_df.plot(yerr=all_Ptrans_std_df, ax=sub2, label = tt.get_filter_regex(use_all=True), alpha = 0.005, color = color)
    all_Ptrans_avg_df.plot.line(yerr=all_Ptrans_sem_df, ax=sub2, color = 'lightgrey', alpha = 0.5)
aligned_layout_plot(sub2, legend=True)

#plt.savefig("fig-behavior11.png")

In [None]:
# To make the plot in the notebook and not in an extra window
%matplotlib notebook 

# Only for one type of transitions (I still need this!!)

# Average and stddev, min, max, sem for post_behavior_transition events
all_Ptrans_avg_df = int_all_Ptrans_df.mean(axis=1) # Interpolated data used
all_Ptrans_min_df = int_all_Ptrans_df.min(axis=1)
all_Ptrans_max_df = int_all_Ptrans_df.max(axis=1)
# Standard deviation (distribution)
all_Ptrans_std_df = int_all_Ptrans_df.std(axis = 1)
#standard error of mean
all_Ptrans_sem_df = int_all_Ptrans_df.sem(axis = 1)
#wrong zur haelfte: Want to have avg per celltyp over time point, 
#and not avg over all cells per timepoint (refer to Data_filter or Grouper) 

# Plotting for multi-events (same_behavioral_transition)
# If a dataframe with NANs is plotted (raw-data = non interpolated), use 
# marker = '+', or 'o', since the line in the lineplot only connects 
# consecutive data points
def aligned_layout_plot(plot, tick_spacing=1, fov=(-5, 15, 0.0, 0.7), legend=False): 
    # Set fine x-axis scale
    plot.xaxis.set_major_locator(ticker.MultipleLocator(tick_spacing))

    # Set x and y limits and legend (default = False) 
    plot.axis(fov)
    plot.legend().set_visible(legend)

fig = plt.figure()

# Plot all cells from all_df, aligned at zero for event_start, specified in Cell_Trace_Config.
sub1 = fig.add_subplot(111) #211
all_Ptrans_df.plot(ax=sub1, marker = '*', label = ctc.cell_type)
aligned_layout_plot(sub1)

#sub2 = fig.add_subplot(212) #212
#all_Ptrans_avg_df.plot(ax=sub2, color = 'c', label = ctc.cell_type) #use interpolated df to calculate average...
#all_Ptrans_min_df.plot(ax=sub2, color = 'r', linewidth=1, alpha = 0.5)
#all_Ptrans_max_df.plot(ax=sub2, color = 'r', linewidth=1, alpha = 0.5)
#all_Ptrans_avg_df.plot.line(yerr=all_Ptrans_std_df, ax=sub2, color = 'lightgrey', alpha = 0.1)
#all_Ptrans_avg_df.plot.line(yerr=all_Ptrans_sem_df, ax=sub2, color = 'grey', alpha = 0.1)
#aligned_layout_plot(sub2)


In [None]:
#wichtig adapt annotation heatmap

In [None]:
# Plotting - single sample for whole time
# To make the plot in the notebook and not in an extra window
%matplotlib notebook 
def layout_plot(plot, tick_spacing=2, fov=(1000, 1400, -0.05, 0.3), legend=False): 
    # Set fine x-axis scale
    plot.xaxis.set_major_locator(ticker.MultipleLocator(tick_spacing))

    # Set x and y limits and legend (default = False) 
    plot.axis(fov)
    plot.legend().set_visible(legend)
#####################
# Get rows where current event is active and draw a vertical 
# line to indicate the event in the plot
event_df = sample_df[sample_df.loc[:,ctc.get_event_start_col()] == 1]
fig = plt.figure()
fig.set_facecolor("white")

######################

# Plot all cells from cell_subset_df over entire time (specified in Cell_Trace_Config). <For a single sample>
#sub1 = fig.add_subplot(111) #211
#cell_subset_df.plot(ax=sub1)
#ctc.add_event_time_points_to_plot(event_df, sub1)
#layout_plot(sub1)

# Avg, min, max, std-dev for multiple cells in single sample over whole time.
# All cells are averaged. For cell and filterpattern see below.
sub2 = fig.add_subplot(111)#212
ctc.add_event_time_points_to_plot(event_df, sub2)
cell_avg_df.plot(ax=sub2, color = 'k', label = ctc.cell_type, linewidth=1)
#cell_min_df.plot(ax=sub2, color = 'r', linewidth=1, alpha = 0.5)
#cell_max_df.plot(ax=sub2, color = 'r', linewidth=1, alpha = 0.5)
#cell_avg_df.plot.line(yerr=cell_std_df, ax=sub2, color = 'r', alpha = 0.1)
#cell_avg_df.plot.line(yerr=cell_sem_df, ax=sub2, color = 'c', alpha = 0.1)
layout_plot(sub2)
