In [None]:
#Transition for different transitions (double and triple had to be combined)

In [None]:
class PostBehaviorTransition:
        
    def __init__(self, sample_id, event, post_event, max_delay=0):
        self.sample_id = sample_id
        self.post_event = post_event
        self.event = event
        self.max_delay = max_delay

def find_behavior_before(sample_id, sample_df, first_event, second_event, 
                         max_delay=0,
                         first_event_duration=None, 
                         second_event_duration=None):
    """For the data frame of a single sample <df>, find all behaviors
    of type <first_event> that is followed by the event <second_event>,
    separated by <max_delay> time. The end of <second_event> is expected
    to happen strictly after the end of <first_event>. The start time
    of <second_event> however can overlap with the end time of <first_event>.
    In this case, the time difference is negative, and still smaller than
    <max_delay>. The start time of <second_event> can be before, at or after the
    end of <first_event>.
    """
    results = []
    first_event_start_col = '{}_start'.format(first_event)
    first_event_end_col = '{}_end'.format(first_event)
    second_event_start_col = '{}_start'.format(second_event)
    second_event_end_col = '{}_end'.format(second_event)
    second_event_overlap_col = '{}_overlap'.format(second_event)
    
    first_event_start_time = None
    first_event_end_time = None
    second_event_start_time = None
    second_event_end_time = None
    
   
    for i, row in sample_df.iterrows():
        # Look for start of second behavior and remember its time.
        if row[second_event_start_col] and not row[second_event_overlap_col]:
            #print("{} starts at {}".format(second_event, row["time"]))
            second_event_start_time = row['time']
        if row[first_event_end_col]:
            #print("{} ends at {}".format(first_event, row["time"]))
            first_event_end_time = row['time']
        if row[first_event_start_col]:
            #print("{} starts at {}".format(first_event, row["time"]))
            first_event_start_time = row['time']
        for column in sample_df.columns:
            if (first_event_start_time is not None and
                column.endswith("_start") and
                "quiet" not in column and
                column != first_event_start_col and
                column != second_event_start_col and
                first_event not in column and
                second_event not in column):
                if row[column]:
                    #print("{} ended at {}, but then found {} at {}".format(first_event, first_event_end_time, column, row["time"]))
                    first_event_start_time = None
                    first_event_end_time = None
                    second_event_start_time = None
                    second_event_end_time = None
                    
        
        # As long as we haven't collected all needed time points,
        # keep on searching.
        if None in (first_event_start_time, first_event_end_time,
                    second_event_start_time):
            continue
        
        # Define rules for event_start_time and event_end_time
        if first_event_start_time > second_event_start_time:
            continue
        if first_event_start_time > first_event_end_time:
            continue
        
        # Test if first_event_start_time = second_event_start_time
        if abs(first_event_start_time - second_event_start_time) < 0.00001:
            print('{}: start time (first) event {} and start time of (second) event {} are the same: {}'.format(
                sample_id, first_event, second_event, first_event_start_time))
            
        if second_event_end_time is None:
            for j, row in sample_df.loc[i:, :].iterrows():
                if row[second_event_end_col]:
                    second_event_end_time = row["time"]
                    break
        if second_event_end_time is None:
            print("warning: end time not found for second event")

        # Test time between first event end and second event start. If it
        # is smaller than <max_delay>, store start of second event as result.
        # The first event end time being greater than the second event start
        # time, is explicitly allowed.
        # implement event duration (for quiet)
        if (second_event_start_time - first_event_end_time) <= max_delay:
            if first_event_duration is not None and first_event_end_time - first_event_start_time < first_event_duration:
                continue
            if second_event_duration is not None and second_event_end_time - second_event_start_time < second_event_duration:
                continue
            
            results.append({
                'sample_id': sample_id,
                'first_event_start': first_event_start_time,
                'first_event_end': first_event_end_time,
                'second_event_start': second_event_start_time,
                'second_event_end': second_event_end_time,
                'first_event': first_event,
                'second_event': second_event
            })
        
        # Reset behavior tracking variables to find new pattern match.
        first_event_start_time = None
        first_event_end_time = None
        second_event_start_time = None
        second_event_end_time = None
        
    return results

# Open single samples 

#behavior_transitions = [
#    PostBehaviorTransition('17-08-26L6-cl', 'quiet', 'stim', 10)
    #PostBehaviorTransition('17-08-24L4-cl', 'stim', 'bw', 4.9)
#]

# Open all samples single Transitions
#behavior_transitions = [
#    PostBehaviorTransition(name, 'turn', 'bw', 2) for name in lm_data]


# Open all samples multiple Transitions
behavior_transitions = [
    #PostBehaviorTransition(name, 'fw', 'bw', 10) for name in lm_data] + [
    #PostBehaviorTransition(name, 'bw', 'fw', 10) for name in lm_data] + [
    #PostBehaviorTransition(name, 'turn', 'turn', 10) for name in lm_data] + [
    PostBehaviorTransition(name, 'fw', 'turn', 3) for name in lm_data] + [
    #PostBehaviorTransition(name, 'turn', 'fw', 2) for name in lm_data] + [
    PostBehaviorTransition(name, 'bw', 'turn', 2) for name in lm_data] #+ [
    #PostBehaviorTransition(name, 'turn', 'bw', 2) for name in lm_data]


found_transitions = []
for bt in tqdm(behavior_transitions):
    sample_df = sample_data.get(bt.sample_id)
    if not any(["bw" in column for column in sample_df.columns]):
        continue
    if sample_df is None:
        raise ValueError('No data found for sample {}'.format(bt.sample_id))
    transitions = find_behavior_before(bt.sample_id, sample_df, bt.event, 
                                       bt.post_event, bt.max_delay, 
                                       first_event_duration = None, 
                                       second_event_duration = None) #For 'quiet' change *_event_duration. Defaul = None.
    
    if transitions:
        found_transitions.append(transitions)


print(len(found_transitions)) # Number of data sets 
print(sum([len(sample_transitions) for sample_transitions in found_transitions])) # Number of transitions/ not working!!
#print(found_transitions) # Transitions

In [None]:
# Triple transition
# Find first and  second transition between two different! behaviors with intersection: 
# first event end = second event start, within a max_delay.
 
# Open single samples 

#first_transitions = [
#    PostBehaviorTransition('17-08-26L6-cl', 'bw', 'stim', 9)
#]
#second_transitions = [
#    PostBehaviorTransition('17-08-26L6-cl', 'stim', 'fw', 9)
#]

# Open all samples

first_transitions = [
    PostBehaviorTransition(name,'stim', 'fw', 5 ) for name in lm_data]
    
second_transitions = [
    PostBehaviorTransition(name,'fw', 'turn', 3) for name in lm_data]    


found_transitions = []
for first_bt, second_bt in tqdm(zip(first_transitions, second_transitions)):
    transitions=[]
    assert first_bt.sample_id == second_bt.sample_id, "{} does not match {}".format(first_bt.sample_id, second_bt.sample_id)
    sample_df = sample_data.get(first_bt.sample_id)
    if sample_df is None:
        raise ValueError('No data found for sample {}'.format(bt.sample_id))
    if not any(["bw" in column for column in sample_df.columns]):
        continue
    first_transition_duration = None # None for all behavior, except quiet (test: 15)
    second_transition_duration = None
    third_transition_duration = None # None for all behavior, except quiet (test 15)
    first_transitions = find_behavior_before(first_bt.sample_id, sample_df, first_bt.event, first_bt.post_event, first_bt.max_delay, first_event_duration=first_transition_duration, 
                         second_event_duration=second_transition_duration)
    second_transitions = find_behavior_before(second_bt.sample_id, sample_df, second_bt.event, second_bt.post_event, second_bt.max_delay, first_event_duration=second_transition_duration, 
                         second_event_duration=third_transition_duration)
    #print("{} transitions from {} to {}".format(len(first_transitions), first_bt.event, first_bt.post_event))
    #print("{} transitions from {} to {}".format(len(second_transitions), second_bt.event, second_bt.post_event))
    
    for ft in first_transitions:
        for st in second_transitions:
            if abs(ft["second_event_start"] - st["first_event_start"]) < 0.00001:
                transitions.append({
                    "sample_id":ft["sample_id"], "first_event_start":ft["first_event_start"], "first_event_end":ft["first_event_end"],
                    "second_event_start": st["first_event_start"], "second_event_end": st["first_event_end"],
                    "third_event_start": st["second_event_start"]
                })
    if transitions:
        print("{} transition triples found".format(len(transitions)))
        found_transitions.append(transitions)
    


print(len(found_transitions)) #number of data sets not the actual stim
print(sum([len(sample_transitions) for sample_transitions in found_transitions]))
print(found_transitions)

In [None]:
# Define celltype, filter-pattern for transitions. Optional do not run regex-filter. Aligning to first event start or 
# second event start: depends on stimulus. Output: a) aligned raw values, b) aligned interpolated values.
# tqdm = progress bar

cell_Ptrans_configs = []
all_Ptrans_events = []


for sample in tqdm(found_transitions):
    for found_transition in sample:
        
        # For all behavior except stimulus as first event 
        cell_Ptrans_configs.append(CellTransConfig(found_transition["sample_id"], "A00c",
                                                    found_transition["second_event_start"], 
                                                    filter_pattern = None, 
                                                    first_event=found_transition["first_event"], 
                                                    second_event=found_transition["second_event"]))
        
        # For stimulus as first_event
        #cell_Ptrans_configs.append(CellTransConfig(found_transition["sample_id"], "A00c",
        #                                           found_transition["first_event_start"],
        #                                           filter_pattern = None,
        #                                           first_event=found_transition["first_event"], 
        #                                           second_event=found_transition["second_event"]))

# Extract for specific time window and align several events. 
# Define timepoints pre and post an event (event_df). 
# This works for single sample or multiple samples aligned 
# Note: In cell_subset_df, time was set to index, because for the previous avg calculation 
# Add index and time = column

# Set the window range left and right from the event (in seconds)
left_half_window_size = 20 # If negative it goes further to right half (Good for skipping stimulus)
right_half_window_size = 20

# trans_df defined in pargraph before 
windows = []
n_behavior_per_sample = {}

for ctc in tqdm(cell_Ptrans_configs):
    sample_df = sample_data.get(ctc.sample_id)
    #n_behavior = n_behavior_per_sample.get(ctc.sample_id, 1)
    n_behavior = n_behavior_per_sample.setdefault(ctc.sample_id, 1)
    if sample_df is None:
        raise ValueError('{}: could not find sample data'.format(ctc.sample_id))
        continue    
   
    # Extract columns matching our cell type and the optional filter pattern.
    cell_subset_df = sample_df.filter(regex=ctc.get_filter_regex()) #Get subset of cells 
    cell_subset_df.set_index(sample_df.time, inplace=True) #Set time to index (essential for min/max...)
    cell_subset_df.reset_index(inplace = True) # Add index and time = column
    
    # Don't apply filter regex, but take all cells from lm_data 
    # Try and except for cases when time was added to lm_data before (by partially running the notebook)
    #cell_subset_df = lm_data.get(ctc.sample_id)#Get subset of cells
    #try:
    #    cell_subset_df.set_index(sample_df.time, inplace=True) #Set time to index (essential for min/max...)
    #    cell_subset_df.reset_index(inplace = True, drop=True) # Add index and time = column
    #except:
    #    pass
    
    n_behavior_per_sample[ctc.sample_id] += 1
    window_start = ctc.event_time - left_half_window_size
    window_end = ctc.event_time + right_half_window_size
        
    # Get subset of rows between window_start and window_end
    # Including event_start
    #trans = cell_subset_df[(cell_subset_df.time >= window_start) & (cell_subset_df.time <= window_end)]
    # Excluding event start
    trans = cell_subset_df[(cell_subset_df.time > window_start) & (cell_subset_df.time < window_end)]
    # Normalizing the data to align on beginning of selected
    # behavior (event_df = Zero) by substracting events in window
    # around start of event of interest from start of event interest.
    # Note: using ":" in event.loc[] will select "all rows" in our window.
    #trans.loc[:, 'time'] = trans['time'] - row['time']
    trans.loc[:, 'time'] = trans['time'] - ctc.event_time
    
    # Add sample_id to each column as prefix and n_behavior as suffix to distinguish events within a sample
    trans.rename(lambda x: '{}_{}_{}_{}_{}'.format(ctc.sample_id, x, n_behavior, ctc.first_event, ctc.second_event), axis = 'columns', inplace = True) 

    # Rename time collum to time
    trans.rename(columns={ trans.columns[0]: 'time' }, inplace = True) 
    all_Ptrans_events.append(trans) # Append a list with all event
#print(all_Ptrans_events)

# Removes first event and takes it as left_window in pd.merge_ordered and iterates than through all_events
all_Ptrans_df = all_Ptrans_events.pop(0)
for right_df in all_Ptrans_events:
    all_Ptrans_df = pd.merge_ordered(all_Ptrans_df, right_df, on="time", how="outer")
#print(all_Ptrans_df)

# Resets the index as time and drops time column
all_Ptrans_df.index = all_Ptrans_df["time"]
del all_Ptrans_df["time"]        
#print(all_Ptrans_df)

# Index intepolation (linear interpolatione not on all_df, because index [=time] is not eaqually distributed)
int_all_Ptrans_df = all_Ptrans_df.interpolate(method='index', axis=0, limit=None, inplace=False, limit_direction='both')
#print(int_all_Ptrans_df.columns)

In [None]:
%matplotlib notebook 

# For multiple transition types!! 
# If a dataframe with NANs is plotted (raw-data = non interpolated), use 
# marker = '+', or 'o', since the line in the lineplot only connects 
# consecutive data points
def aligned_layout_plot(plot, tick_spacing=1, fov=(-2, 6, 0.06, 0.125), legend=False): 
    # Set fine x-axis scale
    plot.xaxis.set_major_locator(ticker.MultipleLocator(tick_spacing))

    # Set x and y limits and legend (default = False) 
    plot.axis(fov)
    #plot.legend().set_visible(legend)

colors = ["mediumblue", "c", "r", "orange", 'm', "lightcoral", 'darkviolet', 'deeppink', "m", "lightcoral","darkviolet", "firebrick", "r","lightcoral", "orange", "darkblue", "cornflowerblue","c", "orangered", 'c', "seagreen", "limegreen",'k','y', 'k', 'y', 'm', 'g', 'orange', 'r', 'k', 'y', 'b', 'brown', "mediumblue", "cornflowerblue", "seagreen", "limegreen", "lightblue", "orange", "k", "c", "k", "y", "b", "brown"]
#colors = {tt.get_filter_regex(use_cell=True, use_filter_pattern=True, use_first_event=True, use_second_event=True) : color for tt, color in zip(transition_types, colors)}
    
print(colors)
    
fig = plt.figure()
sub2 = fig.add_subplot(111) 
for tt, color in zip(transition_types, colors):
    int_some_Ptrans_df = int_all_Ptrans_df.filter(regex=tt.get_filter_regex(use_cell=True, use_filter_pattern=True, use_first_event=True, use_second_event=True))
    # Average and stddev, min, max, sem for post_behavior_transition events
    all_Ptrans_avg_df = int_some_Ptrans_df.mean(axis=1) # Interpolated data used
    #all_Ptrans_min_df = int_some_Ptrans_df.min(axis=1)
    #all_Ptrans_max_df = int_some_Ptrans_df.max(axis=1)
    # Standard deviation (distribution)
    #all_Ptrans_std_df = int_some_Ptrans_df.std(axis = 1)
    #standard error of mean
    all_Ptrans_sem_df = int_some_Ptrans_df.sem(axis = 1)

    all_Ptrans_avg_df.plot(ax=sub2, label = tt.get_filter_regex(use_all=True), color = color, linewidth = 2) #use interpolated df to calculate average...
    #all_Ptrans_avg_df.plot(yerr=all_Ptrans_std_df, ax=sub2, label = tt.get_filter_regex(use_all=True), alpha = 0.005, color = color)
    all_Ptrans_avg_df.plot.line(yerr=all_Ptrans_sem_df, ax=sub2, color = 'lightgrey', alpha = 0.5)
aligned_layout_plot(sub2, legend=True)

plt.savefig("fig-behavior11.png")

In [None]:
# To make the plot in the notebook and not in an extra window
%matplotlib notebook 

# Only for one type of transitions (I still need this!!)

# Average and stddev, min, max, sem for post_behavior_transition events
all_Ptrans_avg_df = int_all_Ptrans_df.mean(axis=1) # Interpolated data used
all_Ptrans_min_df = int_all_Ptrans_df.min(axis=1)
all_Ptrans_max_df = int_all_Ptrans_df.max(axis=1)
# Standard deviation (distribution)
all_Ptrans_std_df = int_all_Ptrans_df.std(axis = 1)
#standard error of mean
all_Ptrans_sem_df = int_all_Ptrans_df.sem(axis = 1)
#wrong zur haelfte: Want to have avg per celltyp over time point, 
#and not avg over all cells per timepoint (refer to Data_filter or Grouper) 

# Plotting for multi-events (same_behavioral_transition)
# If a dataframe with NANs is plotted (raw-data = non interpolated), use 
# marker = '+', or 'o', since the line in the lineplot only connects 
# consecutive data points
def aligned_layout_plot(plot, tick_spacing=5, fov=(-18.5, 42.4, 0.0, 1.0), legend=False): 
    # Set fine x-axis scale
    plot.xaxis.set_major_locator(ticker.MultipleLocator(tick_spacing))

    # Set x and y limits and legend (default = False) 
    plot.axis(fov)
    plot.legend().set_visible(legend)

fig = plt.figure()

# Plot all cells from all_df, aligned at zero for event_start, specified in Cell_Trace_Config.
sub1 = fig.add_subplot(211) #211
all_Ptrans_df.plot(ax=sub1, marker = '*', label = ctc.cell_type)
aligned_layout_plot(sub1)

sub2 = fig.add_subplot(212) #212
all_Ptrans_avg_df.plot(ax=sub2, color = 'c', label = ctc.cell_type) #use interpolated df to calculate average...
#all_Ptrans_min_df.plot(ax=sub2, color = 'r', linewidth=1, alpha = 0.5)
#all_Ptrans_max_df.plot(ax=sub2, color = 'r', linewidth=1, alpha = 0.5)
#all_Ptrans_avg_df.plot.line(yerr=all_Ptrans_std_df, ax=sub2, color = 'lightgrey', alpha = 0.1)
all_Ptrans_avg_df.plot.line(yerr=all_Ptrans_sem_df, ax=sub2, color = 'grey', alpha = 0.1)
aligned_layout_plot(sub2)
