In [1]:
"""
This notebook is the main part of the bachelors project 'A concept and prototypical implementation for network based 
analysis of fish behavior' by Nicolai Kraus at the University of Constance, supported by Michael Aichem and 
supervised by Dr. Karsten Klein. 

The project consists of a pipeline which loads a behavior dataset and produces a interactive dashboard via jupyter 
notebook and voila.

"""

#standard libraries
import os
import io
import warnings
import math
import numpy as np

#network/plot generation
import networkx as nx
from networkx.drawing.nx_pydot import to_pydot
import matplotlib.pyplot as plt
import pandas as pd
import graphviz
import matplotlib as mpl

#UI/display
import ipywidgets as widgets
from ipywidgets import interactive,interact, fixed
from IPython.display import display, Image, Markdown, SVG, HTML

#remove warnings
pd.options.mode.chained_assignment = None  # default='warn'
warnings.simplefilter(action='ignore', category=FutureWarning)

#this line is needed for windows so the library 'pygraphviz', a wrapper of 'graphviz' for 'python'
#can load its modules 'dot' and 'neato' properly.
if  not 'C:\\Program Files (x86)\\Graphviz2.38\\bin' in os.environ["PATH"]: 
    os.environ["PATH"] += os.pathsep + 'C:\\Program Files (x86)\\Graphviz2.38\\bin'  
    

In [2]:
def create_interaction_network(boris_df, min_interactions=1):
    """This function takes as parameters the behavior file and the minimum number of interactions between two fish 
    for an edge to be displayed. An edge count is increased for each row in the behavior file where 'subject' 
    and 'modifier_1' are the same."""
    #remove behavior with no interaction partner and irrelevant data
    interactions_df = boris_df[boris_df.modifier_1.notna()]
    interactions_df = interactions_df[['subject', 'modifier_1']]
    #create a dataframe for the edges 
    edges_df = interactions_df.groupby(interactions_df.columns.tolist(), as_index=False).size().to_frame(name='records').reset_index()
    #remove edges below the threshold
    edges_df = edges_df[edges_df.records >= min_interactions]
    #add tuples and records as attributes for the network generation
    edges_df['tuples'] = list(zip(edges_df.subject, edges_df.modifier_1))
    edge_attributes_label = dict(zip(edges_df.tuples, edges_df.records))
    #change for edge weight
    edges_df.records = edges_df.records * 3 / edges_df.records.max()
    edge_attributes_weight = dict(zip(edges_df.tuples, edges_df.records))
    #create directed graph with networkx
    G = nx.DiGraph()
    G.add_edges_from(edges_df.tuples)
    #edge labels
    nx.set_edge_attributes(G, edge_attributes_label, name='label')
    #edge weight
    nx.set_edge_attributes(G, edge_attributes_weight, name='penwidth')
    #graphviz
    G_dot_string = to_pydot(G).to_string()
    G_dot = graphviz.Source(G_dot_string)
    G_dot.format= 'svg'
    G_dot.render('images/interactions.gv', view=False)  
    display(HTML('images/interactions.gv.svg'))
    return

def create_distance_network(coordinates_df, max_dist=100, min_seconds=3):
    """This function takes as parameters the optional coordinates file, the maximal distance for two fish 
    in a frame for a edge to be drawn between them, and assuming 25 fps, the minimum amount of seconds which
    the two fish have to be in the specified distance. 
    A loop is checking for each frame the distance from each fish to each fish, that is the reason why the output
    may take a few seconds until loaded."""
    #get the range of the frames (i.e. 25/second) to loop through
    first_frame = coordinates_df.frame.min()
    last_frame = coordinates_df.frame.max()
    frames_list = list(range(first_frame, last_frame,5))
    close_fish_list = []
    #take a slice of coordinates_df for each frame and calculate the 
    #distances from each fish to each other fish
    for frame in frames_list:
        frame_df = coordinates_df[coordinates_df.frame == frame]
        i=0
        while i < len(frame_df)-1:
            k=i+1
            while k < len(frame_df):
                #calculate the distance for fish k and fish i
                x = abs(frame_df.x.iloc[i] - frame_df.x.iloc[k])
                y = abs(frame_df.y.iloc[i] - frame_df.y.iloc[k])
                dist = math.sqrt(x**2 + y**2)
                #add an entry to the close_fish_list if dist < threshold
                if dist <= max_dist:
                    close_fish_list.append((frame_df.id.iloc[i], frame_df.id.iloc[k]))
                k+=1
            i+=1
    #create edges and attributes for network generation
    edges_df = pd.DataFrame(close_fish_list, columns=['fish_1', 'fish_2'])
    edges_df = edges_df.groupby(edges_df.columns.tolist(), as_index=False).size().to_frame(name='frames').reset_index()
    edges_df['tuples'] = list(zip(edges_df.fish_1, edges_df.fish_2))
    edges_df['close_seconds'] = edges_df.apply(lambda row: row.frames / 5, axis=1)
    edges_df = edges_df[edges_df.close_seconds >= min_seconds]
    edge_attributes_label = dict(zip(edges_df.tuples, edges_df.close_seconds))
    #change for edge weight
    edges_df.close_seconds = edges_df.close_seconds * 3 / edges_df.close_seconds.max()
    edge_attributes_weight = dict(zip(edges_df.tuples, edges_df.close_seconds))
    #create undirected graph with attributes
    G = nx.Graph()
    G.add_edges_from(edges_df.tuples)
    nx.set_edge_attributes(G, edge_attributes_label, name='label')
    nx.set_edge_attributes(G, edge_attributes_weight, name='penwidth')
    #graphviz
    G_dot_string = to_pydot(G).to_string()
    G_dot = graphviz.Source(G_dot_string)
    G_dot.format= 'svg'
    G_dot.render('images/distances.gv', view=False)  
    display(HTML('images/distances.gv.svg'))
    return 

def create_trajectory_map(coordinates_df, boris_df):
    """Input parameters are the behavior and the coordinates file, the behavior file is used for the IDs, 
    so the colour scheme of the trajectories taken from the coordinates file is consistent with the colors
    from the plots. The trajectories are done by scattering the x- and y- coordinates for each fish for each
    frame together in one plot."""
    #id works only for jakobs positions
    fish_ids = get_fish_ids(boris_df)
    #fish_ids = coordinates_df.id.unique().tolist()
    trajectory_list = []
    fig = plt.figure(figsize=(9,7))
    ax = fig.subplots()
    for fish in fish_ids:
        #extract positions for the fish and scatter it
        coordinates = coordinates_df[coordinates_df.id==fish]
        trajectory = ax.scatter(coordinates.x, coordinates.y, 0.1)
        trajectory_list.append(trajectory)
    plt.legend(trajectory_list, fish_ids, markerscale=20)   
    plt.xlabel("x-coordinate", fontsize=18, labelpad=10)
    plt.ylabel("y-coordinate", fontsize=18, labelpad=10)
    fig.savefig('images/trajectory_map.png', bbox_inches='tight')
    return plt


def create_accumulate_actions_plot(boris_df, behavior, show_avg, show_grid):
    """Input parameter is the behavior file, the output is a static view of a accumulation of all actions 
    of all IDs accumulated over time, so the total amount is viewable as well as when the number of actions 
    increased most."""
    
    #get fish ids and initial empty figure for the plot
    fish_ids = get_fish_ids(boris_df)
    fig = plt.figure(figsize=(9,7))
    average = pd.DataFrame()
    highest_plot = 0
    
    #loop over all fish_ids and plot their amount of selected interactions 
    for fish in fish_ids:
        fish_df = boris_df[boris_df.subject == fish] 
        if 'behavioral_category' in boris_df:
            categories = fish_df[fish_df.behavioral_category == behavior]
            behaviors = fish_df[fish_df.behavior == behavior]
            fish_df = categories.append(behaviors)
        else:
            fish_df = fish_df[fish_df.behavior == behavior]
        if(len(fish_df)+1>highest_plot):
            highest_plot = len(fish_df)+1
        sum_of_rows = range(1,len(fish_df)+1)
        plt.plot(fish_df.time, sum_of_rows, label=fish)   
    #reset colour cycle 
    plt.gca().set_prop_cycle(None)
    
    #loop over all fish ids and make a dotted line to the end if the fish is not doing any new 
    #behaviors but some other fish are or some time is left
    for fish in fish_ids:
        fish_df = boris_df[boris_df.subject == fish]
        if 'behavioral_category' in boris_df:
            categories = fish_df[fish_df.behavioral_category == behavior]
            behaviors = fish_df[fish_df.behavior == behavior]
            fish_df = categories.append(behaviors)
        else:
            fish_df = fish_df[fish_df.behavior == behavior]
        plt.plot([fish_df.time.max(),boris_df.time.max()], [len(fish_df),len(fish_df)],':')
    plt.gca().set_prop_cycle(None)
    
    #loop over all fish ids and make the beginning  before the first behavior of the fish
    for fish in fish_ids:
        fish_df = boris_df[boris_df.subject == fish]
        if 'behavioral_category' in boris_df:
            categories = fish_df[fish_df.behavioral_category == behavior]
            behaviors = fish_df[fish_df.behavior == behavior]
            fish_df = categories.append(behaviors)
        else:
            fish_df = fish_df[fish_df.behavior == behavior]
        plt.plot([0,fish_df.time.min()], [0,1],':')
       
    #plot average
    if show_avg:
        avg_df = boris_df.copy()
        if 'behavioral_category' in avg_df:
            categories = avg_df[avg_df.behavioral_category == behavior]
            behaviors = avg_df[avg_df.behavior == behavior]
            avg_df = categories.append(behaviors)
        else: 
            avg_df = avg_df[avg_df.behavior == behavior]
        avg_range = []
        value=1/len(fish_ids)
        step=1
        while (step <= len(avg_df)):
            avg_range.append(value)
            value+=1/len(fish_ids)
            step+=1
        #plot from 0 to 1 dotted, main part, and end dotted
        plt.plot([0,avg_df.time.min()], [0,avg_range[0]], ':', color="black")
        plt.plot(avg_df.time, avg_range, label="average", color="black")
        plt.plot([avg_df.time.max(),boris_df.time.max()], [avg_range[-1], avg_range[-1]], ':', color="black")
    
    #finish the plot with some details
    plt.legend()
    plt.xlabel("Time", fontsize=18, labelpad=10)
    plt.ylabel("# " + str(behavior), fontsize=18, labelpad=10)
    #make frequency of yticks dependent on size of the highest plot
    if highest_plot < 11:
        yticks = range(0,highest_plot)
    elif highest_plot < 26:
        yticks = range(0,highest_plot, 2)
    elif highest_plot < 51:
        yticks = range(0,highest_plot, 5)
    elif highest_plot < 101:
        yticks = range(0,highest_plot, 10)
    elif highest_plot < 201:
        yticks = range(0,highest_plot, 20)
    else:
        yticks = range(0,highest_plot, 50)
    plt.yticks(yticks)
    
    if show_grid:
        plt.grid(linestyle='-', linewidth=0.2)
    
    plt.show()
    fig.savefig('images/accumulate_actions_plot.png', bbox_inches='tight')
    return plt

def create_activity_plot(boris_df, intervals=10):
    """Input parameters are the behavior file and the number of intervals the user wants to have displayed.
    Basically it is the same like the accumulate_actions_plot, but here the user can specify the intervals so 
    some correlations may be better to see."""
    fish_ids = get_fish_ids(boris_df)
    #retrieve interval size from amount of time intervals and the 
    #maximum value of boris_df.time
    max_time = boris_df.time.max()
    max_time = max_time.astype(int)
    interval_size = (max_time / intervals).astype(int)
    interval_list = range(interval_size, max_time+interval_size, interval_size)
    fig = plt.figure(figsize=(9,7))
    for fish in fish_ids:
        sum_actions=[]
        fish_df = boris_df[boris_df.subject == fish]
        for interval in interval_list:
            actions = len(fish_df[fish_df.time.astype(int) <= interval])
            sum_actions.append(actions)
        #take the differences and insert the first value again
        res = [sum_actions[i+1] - sum_actions[i] for i in range(len(sum_actions)-1)]
        res.insert(0,sum_actions[0])
        plt.plot(interval_list, res, label=fish)
    plt.legend()
    plt.xlabel("time in s", fontsize=16, labelpad=10)
    plt.ylabel("#behaviors in interval", fontsize=16, labelpad=10)
    

remove_list_cat = []
remove_list = []
remove_id_list = []
def create_behavior_cycle(boris_df, data, min_count, rmv_id, add_id, rmv_bhvr, add_bhvr, with_status, normalized, hue, node_colour, node_size, node_label, sort_by):
    """Input parameters are the behavior file, the specification if the user wants to see the behaviors itself 
    or the behavior cycle of the behavioral categories and the minimal count for a edge to be displayed. 
    This cycle is calculated by splitting the boris-file for each fish and then increasing the edge count for each 
    successing behavior. In the end, the edge count is normalized in [0,1] for each node where edges come from 
    so we have kind of a probability of which behavior follows which behavior"""
    fish_ids = get_fish_ids(boris_df)
    successor_list = []
    print(rmv_bhvr)
    #prepare dataframe with user input
    #first check if the user wants so see the behaviors or the behavioral categories
    if data == 'behavioral_category':
        #reset list of removed behaviors
        remove_list.clear()
        boris_df['chosen_data'] = boris_df['behavioral_category']
        #print unique behavioral categories
        display(Markdown("""#### All behavioral categories: \n"""))
        print(boris_df.chosen_data.unique())
        #remove and add behavioral categories
        if rmv_bhvr:
            remove_us = rmv_bhvr.split('\'')
            for x in remove_us:
                if x in boris_df.chosen_data.unique() and (len(remove_list_cat)+1 < len(boris_df.chosen_data.unique())):
                    remove_list_cat.append(x)
        if add_bhvr:
            add_us = add_bhvr.split('\'')
            for x in add_us:
                if x in boris_df.chosen_data.unique() and x in remove_list_cat:
                    remove_list_cat.remove(x)
        if remove_list_cat:
            display(Markdown("""#### Removed behavioral categories: \n"""))
            print(set(remove_list_cat))
            for x in remove_list_cat:
                boris_df = boris_df.drop(boris_df[boris_df.chosen_data == x].index)
    else:
        #reset list of removed behavioral categories
        remove_list_cat.clear()
        boris_df['chosen_data'] = boris_df['behavior']
        #print all behaviors
        display(Markdown("""#### All behaviors:"""))
        print(boris_df.chosen_data.unique())
       
        #add and remove behaviors
        if rmv_bhvr:
            remove_us = rmv_bhvr.split('\'')
            for x in remove_us:
                if x in boris_df.chosen_data.unique() and (len(remove_list)+1 < len(boris_df.chosen_data.unique())):
                    remove_list.append(x)
        if add_bhvr:
            add_us = add_bhvr.split('\'')
            for x in add_us:
                if x in boris_df.chosen_data.unique() and x in remove_list:
                    remove_list.remove(x)     
        if remove_list:
            display(Markdown("""#### Removed behavior: \n"""))
            print(set(remove_list))
            for x in remove_list:
                boris_df = boris_df.drop(boris_df[boris_df.chosen_data == x].index)
   
    #remove IDs
    if rmv_id:
        remove_ids = rmv_id.split('\'')
        for x in remove_ids:
            if x in fish_ids and len(remove_id_list)+1 < len(fish_ids):
                remove_id_list.append(x)
    if add_id:
        add_ids = add_id.split('\'')
        for x in add_ids:
            if (x in fish_ids or x in boris_df.modifier_1.unique()) and x in remove_id_list:
                remove_id_list.remove(x)
    if remove_id_list:
        display(Markdown("""#### Removed IDs: \n"""))
        print(set(remove_id_list))
    fish_ids_after_removal = [x for x in fish_ids if x not in remove_id_list]
    
    display(Markdown(""" --------------------------------------------------------"""))
    
    #loop through dataframe for each fish and add behavior and successor
    for fish in fish_ids_after_removal:
        id_frame = boris_df[boris_df.subject == fish]  
        if not (with_status):
            id_frame = id_frame.drop(id_frame[id_frame.status == 'STOP'].index)
        i=0
        k=i+1
        while i < len(id_frame)-1:
            successor_list.append((id_frame.chosen_data.iloc[i], id_frame.status.iloc[i], id_frame.chosen_data.iloc[k], id_frame.status.iloc[k]))
            k+=1
            i+=1
    #lets make an edgelist with behavior and successor
    successor_df = pd.DataFrame(successor_list, columns=['action_1', 'status_1', 'action_2', 'status_2'])
    if (with_status):
        successor_df['action_1'] = successor_df['action_1'] + ' ' + successor_df['status_1']
        successor_df['action_2'] = successor_df['action_2'] + ' ' + successor_df['status_2']
    else:
        successor_df = successor_df.replace(to_replace="POINT", value="")
    
    successor_df['tuples'] = list(zip(successor_df.action_1, successor_df.action_2))
    successor_df = successor_df.groupby(successor_df.columns.tolist(), as_index=False).size().to_frame(name='records').reset_index()
    
    #normalize the records in [0,1] so that all together are 1 for each action
    behavior_ids = successor_df.action_1.unique().tolist()
    edges_df = pd.DataFrame()
    for action in behavior_ids:
        action_frame = successor_df[successor_df.action_1 == action]
        if(normalized):    
            sum_of_successors = action_frame.records.sum()
            action_frame['normalized'] = action_frame.records.div(sum_of_successors).round(2)
        edges_df = edges_df.append(action_frame)   
    

    #erase edges below min_count
    try:
        if(normalized and min_count):
            edges_df = edges_df[edges_df.normalized > float(min_count)]
        elif not normalized and min_count:    
            edges_df = edges_df[edges_df.records > float(min_count)]
    except: display(Markdown("""#### min_count has to be a positive real number. No edges were removed.\n"""))
    
    # add average and total time
    times_list = get_total_and_avg_time(boris_df, fish_ids_after_removal)
    times_df = pd.DataFrame(times_list, columns=['action_1', 'total_time', 'avg_time'])
    
    #work on the nodes(behaviors) of the graph so we can later set node-attributes for graphviz
    nodes_df = edges_df[['action_1', 'records']]
    nodes_df = edges_df.groupby('action_1')['records'].sum().to_frame(name='records').reset_index()
    nodes_df = pd.merge(times_df, nodes_df, on='action_1', how='outer')
    nodes_df.columns = ['node', 'total_time', 'avg_time', 'record']
    #round results
    nodes_df.total_time = nodes_df.total_time.round(2)
    nodes_df.avg_time = nodes_df.avg_time.round(2)
    
    #merge nodes with amount and times in the dataframe for the tuples so 
    #they can be displayed inside the node as label
    labels_1 = nodes_df.copy()
    labels_1.columns = ['action_1', 'total_time_1', 'avg_time_1', 'record_1']
    edges_df = pd.merge(edges_df, labels_1, on='action_1', how='left')
    labels_2 = nodes_df.copy()
    labels_2.columns = ['action_2', 'total_time_2', 'avg_time_2', 'record_2']
    edges_df = pd.merge(edges_df, labels_2, on='action_2', how='left') 
    
    if(node_label == 'amount'):
        edges_df['action_1'] = edges_df['action_1'] + " - " + edges_df['record_1'].astype(str)
        edges_df['action_2'] = edges_df['action_2'] + " - " + edges_df['record_2'].astype(str)
        edges_df['tuples'] = list(zip(edges_df['action_1'],edges_df['action_2']))
        nodes_df['node'] = nodes_df['node'] + " - " + nodes_df['record'].astype(str)
    elif(node_label == 'total_time'):
        edges_df['action_1'] = edges_df['action_1'] + " - " + edges_df['total_time_1'].astype(str)
        edges_df['action_2'] = edges_df['action_2'] + " - " + edges_df['total_time_2'].astype(str)
        edges_df['tuples'] = list(zip(edges_df['action_1'],edges_df['action_2']))
        nodes_df['node'] = nodes_df['node'] + " - " + nodes_df['total_time'].astype(str)
    elif(node_label == 'avg_time'):
        edges_df['action_1'] = edges_df['action_1'] + " - " + edges_df['avg_time_1'].astype(str)
        edges_df['action_2'] = edges_df['action_2'] + " - " + edges_df['avg_time_2'].astype(str)
        edges_df['tuples'] = list(zip(edges_df['action_1'],edges_df['action_2']))
        nodes_df['node'] = nodes_df['node'] + " - " + nodes_df['avg_time'].astype(str)
    
    if(sort_by == 'amount'):
        nodes_df = nodes_df.sort_values(by='record', ascending=False)
    elif(sort_by == 'total_time'):
        nodes_df = nodes_df.sort_values(by='total_time', ascending=False)
    else:
        nodes_df = nodes_df.sort_values(by='avg_time', ascending=False)
    
    # print behavior nodes and amount
    print(nodes_df.to_string(index=False))
    display(Markdown(""" --- - - - """))
    
    
    #logarithmic normalization of record, avg_time and total_time 
    nodes_df.record = (np.log(nodes_df.record)-np.log(nodes_df.record.min()))/(np.log(nodes_df.record.max())-np.log(nodes_df.record.min()))
    nodes_df.total_time = nodes_df.total_time+1
    nodes_df.total_time = (np.log(nodes_df.total_time)-np.log(nodes_df.total_time.min()))/(np.log(nodes_df.total_time.max())-np.log(nodes_df.total_time.min()))
    nodes_df.avg_time = nodes_df.avg_time+1
    nodes_df.avg_time = (np.log(nodes_df.avg_time)-np.log(nodes_df.avg_time.min()))/(np.log(nodes_df.avg_time.max())-np.log(nodes_df.avg_time.min()))
        
    #node sizes dependent on user input and then a dictionary 
    #for node height and width is created to give it to graphviz
    if(node_size == 'amount'):
        nodes_width = dict(zip(nodes_df.node, nodes_df.record*3))
        nodes_height = dict(zip(nodes_df.node, nodes_df.record*1.4))
    elif (node_size == 'total_time'):
        nodes_width = dict(zip(nodes_df.node, nodes_df.total_time*3))
        nodes_height = dict(zip(nodes_df.node, nodes_df.total_time*1.4))
    elif (node_size == 'avg_time'):
        nodes_width = dict(zip(nodes_df.node, nodes_df.avg_time*3))
        nodes_height = dict(zip(nodes_df.node, nodes_df.avg_time*1.4))
        
    #node colour dependent on user input, values are normalized with np.log and then a dictionary
    #for node colour is created to give it to graphviz later
    hue = hue/360
    if(node_colour == 'amount'):
        nodes_df['colour'] = str(hue)+" "+ nodes_df['record'].astype(str) + " 1"
        nodes_colour = dict(zip(nodes_df.node, nodes_df.colour))
    elif (node_colour == 'total_time'):
        nodes_df['colour'] = str(hue)+" "+ nodes_df['total_time'].astype(str) + " 1"
        nodes_colour = dict(zip(nodes_df.node, nodes_df.colour))
    elif (node_colour == 'avg_time'):
        nodes_df['colour'] = str(hue)+" "+ nodes_df['avg_time'].astype(str) + " 1"
        nodes_colour = dict(zip(nodes_df.node, nodes_df.colour))
    
    #create directed graph
    G = nx.DiGraph()
    G.add_edges_from(edges_df.tuples)
    
    #create label and weight for edges
    if(normalized):
        edge_attributes_label = dict(zip(edges_df.tuples, edges_df.normalized))
        edges_df.normalized = edges_df.normalized * 3
        edge_attributes_weight = dict(zip(edges_df.tuples, edges_df.normalized))
    else:
        edge_attributes_label = dict(zip(edges_df.tuples, edges_df.records))
        #normalize logarithmic
        edges_df.records = (np.log(edges_df.records)-np.log(edges_df.records.min()))/(np.log(edges_df.records.max())-np.log(edges_df.records.min()))
        edges_df.records = edges_df.records + 0.1
        edge_attributes_weight = dict(zip(edges_df.tuples, edges_df.records/edges_df.records.max()))
    
    #set edge attributes
    nx.set_edge_attributes(G, edge_attributes_weight, name='penwidth')
    nx.set_edge_attributes(G, edge_attributes_label, name='label')
    
    #set node attributes
    nx.set_node_attributes(G, nodes_width, name='width')
    nx.set_node_attributes(G, nodes_height, name='height')
    nx.set_node_attributes(G, nodes_colour, name='fillcolor')
    nx.set_node_attributes(G, 'filled', name='style')
    nx.set_node_attributes(G, "1234 &#013; 234 &#013; 234", name='tooltip')
    
    #graphviz
    G_dot_string = to_pydot(G).to_string()
    G_dot = graphviz.Source(G_dot_string)
    G_dot.format= 'svg'
    G_dot.render('images/transitions.gv', view=False)  
    display(HTML('images/transitions.gv.svg'))
    
    return 


In [3]:
def get_fish_ids(boris_df):
    """This function collects the unique fish ids. Functions needing these call this function so the order
    is always the same which results in a consequent colour scheme over all plots."""
    fish_ids = boris_df.subject.unique().tolist()
    if 'Subject' in fish_ids: fish_ids.remove('Subject')
    fish_ids = [x for x in fish_ids if str(x) != 'nan']
    return fish_ids


def get_total_and_avg_time(df, fish_ids):
    df = df[['time', 'subject', 'chosen_data', 'status']]
    behavior_ids = df.chosen_data.unique().tolist()
    time_list = []
    for behavior in behavior_ids:
        behavior_df = df[df.chosen_data == behavior]
        total = 0
        avg = 0
        for fish in fish_ids:
            id_frame = behavior_df[behavior_df.subject == fish]
            stop_total = id_frame[id_frame.status == 'STOP'].time.sum()
            start_total = id_frame[id_frame.status == 'START'].time.sum()
            total = total + stop_total - start_total
        occurences = len(behavior_df[behavior_df.status == 'START'].index)
        if (math.isnan(occurences) or (occurences < 1)):
            occurences = 1
        if (total == 0.0):
            avg = 0.0
        else:
            avg = total / occurences
        time_list.append((behavior, total, avg))
    return time_list
        
            
def get_row_index(df, values):
    """ 
    Get index positions of values in dataframe
    
    `Required` 
    :param df: Panda dataframe
    :param values: data structure with values to search
    """
    
    for value in values:
        listOfPos = list()
        # Get bool dataframe with True at positions where the given value exists
        result = df.isin([value])
        # Get list of columns that contains the value
        seriesObj = result.any()
        columnNames = list(seriesObj[seriesObj == True].index)
        # Iterate over list of columns and fetch the rows indexes where value exists
        for col in columnNames:
            rows = list(result[col][result[col] == True].index)
            for row in rows:
                listOfPos.append(row)
                return listOfPos
        # Return a list of tuples indicating the positions of value in the dataframe
    return listOfPos
     
def _clean(df):
    """
    Delete unneeded header information and standardize column names. 
    Add necessary column names if not present.
    
    `Required` 
    :param df: Panda dataframe
    """
    
    #If header is not first row, delete rows until one of ['Time', 'time', 'Subject', 'Fps', 'fps', 'subject'] appears
    try:
        header_row_index = get_row_index(df, ['Time', 'time', 'Subject', 'Fps', 'fps', 'subject'])[0]
        df = df.iloc[header_row_index:]
        df.columns = df.iloc[0]
        df = df.iloc[1:]
    except:
        pass
    
    #all header in lowercase, no spaces
    df.columns = [x.lower() for x in df.columns]
    df.columns = df.columns.str.replace(' ','_')
    
    #convert time to float if excel gives string objects   
    df.time = df.time.astype(float)
   
    #if dataset contains only two individuals and modifier_1 not included, add corresponding modifier_1
    if 'modifier_1' not in df.columns and len(df.subject.unique()) == 2:
        df['modifier_1'] = df.subject.unique()[0]
        df['modifier_1'] = np.where(df['subject'] == df.subject.unique()[0], df.subject.unique()[1], df['modifier_1'])

    #add missing columns
    if 'modifier_1' not in df.columns:
        df['modifier_1'] = 'unknown'
    if 'behavioral_category' not in df.columns:
        df['behavioral category '] = 'unknown'
    if 'status' not in df.columns:
        df['status'] = 'unknown'
    if 'total_length' not in df.columns:
        df['total_length'] = df['time'].iloc[-1]
    
    #map behaviors to corresponding behavioral category
    df['behavior'] = [x.lower() for x in df['behavior']]
    _map = [
        #overt aggressive
        (df['behavior'] == 'bite', 'overt aggressive'),
        (df['behavior'] == 'mouth', 'overt aggressive'),
        (df['behavior'] == 'ram', 'overt aggressive'),
        (df['behavior'] == 'mouthfight overt', 'aggressive'),
        #aggressive
        (df['behavior'] == 'bite/ram', 'overt aggressive'),
        (df['behavior'] == 'chase', 'aggressive'),
        (df['behavior'] == 'frontal', 'aggressive'),
        (df['behavior'] == 'lateral display', 'aggressive'),
        (df['behavior'] == 'head-down', 'aggressive'),
        (df['behavior'] == 'tailbeat', 'aggressive'), 
        (df['behavior'] == 'lunging', 'aggressive'),
        (df['behavior'] == 'head shake', 'aggressive'),
        (df['behavior'] == 'aggressive posture', 'aggressive'), 
        (df['behavior'] == 'puffed throat', 'aggressive'),
        (df['behavior'] == 'sand spitting', 'aggressive'),
        (df['behavior'] == 'lunging/shooting out', 'aggressive'),
        #non-aggressive/social
        (df['behavior'] == 'quivering', 'non-aggressive/social'),
        (df['behavior'] == 'soft touch', 'non-aggressive/social'),
        (df['behavior'] == 'following', 'non-aggressive/social'),
        (df['behavior'] == 'group meeting', 'non-aggressive/social'),
        (df['behavior'] == 'parralel swim', 'non-aggressive/social'),
        #submissive
        (df['behavior'] == 'flee or chased', 'submissive'),
        (df['behavior'] == 'bitten', 'submissive'),
        (df['behavior'] == 'submissive display', 'submissive'),
        #maintenance
        (df['behavior'] == 'feed', 'maintenance'),
        (df['behavior'] == 'swim', 'maintenance'),
        (df['behavior'] == 'still', 'maintenance'),
        (df['behavior'] == 'darting', 'maintenance'),
        (df['behavior'] == 'yawn', 'maintenance'),
        (df['behavior'] == 'scraping', 'maintenance'),
        #workload
        (df['behavior'] == 'digging', 'workload'),
        (df['behavior'] == 'hover', 'workload'),
        (df['behavior'] == 'carrying', 'workload'),

    ]
    condlist = [item[0] for item in _map]
    choicelist = [item[1] for item in _map]
    
    #add behavioral category if not present
    df['behavioral_category'] = np.where(df['behavioral_category'].isnull(), np.select(condlist, choicelist, default='unclassified'), df['behavioral_category'])
    
    return df

In [4]:
def create_dashboard():
    #handle user inputed files correctly by checking if it is an .xlsx or .csv file
    try:
        [behavior] = uploader_bhvr.value
    except: 
        display(Markdown("""#### You need to upload an behavior file first."""))
        return
    try:
        boris_df = _clean(pd.read_csv(io.BytesIO(uploader_bhvr.value[behavior]["content"])))
    except:
        boris_df = _clean(pd.read_excel(io.BytesIO(uploader_bhvr.value[behavior]["content"])))
    
    #try using the optional coordinates file, fails if file is not uploaded
    coordinates_present = True
    try:
        [coordinates] = uploader_pos.value
        try:
            coordinates_df = pd.read_csv(io.BytesIO(uploader_pos.value[coordinates]["content"]))
        except: 
            coordinates_df = pd.read_excel(io.BytesIO(uploader_pos.value[coordinates]["content"]))
        coordinates_df.columns = [x.lower() for x in coordinates_df.columns]
    except: 
        coordinates_present = False
        #display(Markdown("""#### Trajectory Map and Distance network cannot be computed as trajectorie-file is not uploaded."""))
    
    #display metainformation for the user
    display(Markdown("""### IDs"""))
    print(get_fish_ids(boris_df))
    display(Markdown("""### Behavioral categories"""))
    print(boris_df.behavioral_category.unique())
    display(Markdown("""### Behaviors"""))
    print(boris_df.behavior.unique())
    
    display(Markdown(""" --------------------------------------------------------"""))
    
    #display data plot
    display(Markdown("""## Data plot 
    \n - <strong>Usage</strong>: Double-click in the behavior-field, then use Up/Down-keys"""))
    accumulate_actions = interactive(create_accumulate_actions_plot, 
                                         boris_df=fixed(boris_df), 
                                         behavior = boris_df.behavior.unique(),
                                         show_avg = True,
                                         show_grid = True
                                        )
    display(accumulate_actions)
    
    display(Markdown(""" --------------------------------------------------------"""))
    
    #display transition probability network
    display(Markdown("""## Transition probability network 
    \n <strong>Usage</strong>: Optionally choo
    \n ##### min_count: Choose the minimum edge weight for an edge to be displayed. Only positive real numbers accepted.
    \n ##### rmv_bhvr/add_bhvr: Remove or add behaviors/behavioral categories. Delimiter is \'.
    \n ##### rmv_id/add_id: Remove or add animal IDs, Delimiter is \'.
    \n ##### with_status: Include status of behaviors in network 
    \n ##### normalized: Normalize sum of outgoing edges to 1 per node 
    \n ##### hue: 0-red, yellow-60, 120-green, 180-cyan, 240-blue, 300-violet \n
    \n ##### The total amount, average or total time of the behaviors can be mapped to node colour or size or optionally the node label.
    \n
    """))
    behavior_cycle = interactive(create_behavior_cycle, {'manual': True}, boris_df = fixed(boris_df), 
                                 data=['behavior', 'behavioral_category'], 
                                 min_count='', 
                                 rmv_id='', 
                                 add_id='', 
                                 rmv_bhvr='',
                                 add_bhvr='', 
                                 with_status=False, 
                                 normalized=False, 
                                 hue=widgets.IntSlider(value=270,min=0,max=360), 
                                 node_colour=['total_time', 'avg_time', 'amount'],
                                 node_size=['amount', 'total_time', 'avg_time'],
                                 node_label=[' - ', 'amount', 'total_time', 'avg_time'],
                                 sort_by=['amount', 'total_time', 'avg_time'])
    display(behavior_cycle) 
    
    display(Markdown(""" --------------------------------------------------------"""))
    
    if coordinates_present:
        display(Markdown("""## Trajectory map"""))
        trajectory_map = interactive(create_trajectory_map, coordinates_df = fixed(coordinates_df), boris_df = fixed(boris_df))
        display(trajectory_map)
        display(Markdown("""## Distance network \n Assuming we have 25 frames per second, the edge count is increased by 1/25 for each frame in which the distance between two fish is smaller than 'max_dist'. 
        \n The edge is displayed if the count is bigger than 'min_seconds'. The computation may take a few seconds if the dataset is large. \n
        PLEASE BE PATIENT, COMPUTATION TAKES 5 TO 10 SECONDS."""))
        distance_network = interactive(create_distance_network, coordinates_df = fixed(coordinates_df), max_dist = (10,500,5), min_seconds = (1,600,5))
        display(distance_network)
        display(Markdown(""" --------------------------------------------------------"""))
    
    #display interactive interaction network
    display(Markdown("""## Interaction network \n Nodes are subjects/objects of behaviors. 
    \n Choose the minimum count of interactions for an edge to be displayed"""))
    graph = interactive(create_interaction_network, boris_df = fixed(boris_df), min_interactions=(1,100,1))
    display(graph)
    
    """
    #show interactions network
    interactions_button = widgets.Button(description="Interaction network")
    display(interactions_button)
    output = widgets.Output()
    
    @output.capture(clear_output=True,wait=True)
    def show(_):
        # "linking function with output"
        with output:
            # what happens when we press the button
            output.clear_output()
            #display interactive interaction network
            graph = interactive(create_interaction_network, boris_df = fixed(boris_df), min_interactions=(1,100,1))
            display(graph)
    display(interactions_button)
    interactions_button.on_click(show)
    # displaying button and its output together
    widgets.VBox([interactions_button,output])
    """
    
    #display(Markdown(""" --------------------------------------------------------"""))
    
    #display activity plot
    #display(Markdown("""## Activity plot \n All behaviors are accumulated over time. You choose the size of the intervals."""))
    #activity_plot = interactive(create_activity_plot, boris_df=fixed(boris_df), intervals = (1,100,1))
    #display(activity_plot)
    return


In [5]:
display(Markdown(""" 
# BehaviorAnalyzer 
<em>An interactive tool to visually analyse behavior data derived from event-logging software like BORIS</em> \n
- <strong>Usage</strong>: Upload file containing the data \n by clicking <em>Behavior</em>. 
Analyse by clicking <em>Analyse data!</em> \n
- <strong>Required columns</strong>: <em>Time</em>, <em>Subject</em>, <em>Behavior</em>, <em>Status</em>\n
- <strong>Optional columns</strong>: <em>Modifier 1</em>, <em>Behavioral category</em>, <em>Total length</em> ... \n


---

"""))



 
# BehaviorAnalyzer 
<em>An interactive tool to visually analyse behavior data derived from event-logging software like BORIS</em> 

- <strong>Usage</strong>: Upload file containing the data 
 by clicking <em>Behavior</em>. 
Analyse by clicking <em>Analyse data!</em> 

- <strong>Required columns</strong>: <em>Time</em>, <em>Subject</em>, <em>Behavior</em>, <em>Status</em>

- <strong>Optional columns</strong>: <em>Modifier 1</em>, <em>Behavioral category</em>, <em>Total length</em> ... 



---



In [6]:
#buttons

#reset user input
#reset_button = widgets.Button(description="Clear data")
#def reset(_):
#    with out:
#        out.clear_output()
    #uploader_bhvr.close()
    #uploader_pos.close()
#display(reset_button)
#reset_button.on_click(reset)


#upload behavior and trajectories
uploader_bhvr = widgets.FileUpload(description='Behavior', multiple=True)
display(uploader_bhvr)
uploader_pos = widgets.FileUpload(description='Trajectories', multiple=True)
display(uploader_pos)

#analyze uploaded data
analyse_button = widgets.Button(description="Analyse data!")
out = widgets.Output()
def analyse(_):
    # "linking function with output"
    with out:
        # what happens when we press the button
        out.clear_output()
        create_dashboard()
analyse_button.on_click(analyse)
# displaying button and its output together
widgets.VBox([analyse_button,out])

FileUpload(value={}, description='Behavior', multiple=True)

FileUpload(value={}, description='Trajectories', multiple=True)

VBox(children=(Button(description='Analyse data!', style=ButtonStyle()), Output()))

In [7]:
display(Markdown(""" ---"""))
display(Markdown("""  <sub><sup>This tool was developed at the University of Constance under supervision of Michael Aichem and Dr. Karsten Klein from the laboratory for Computational Life Sciences. Valuable feedback and data was provided by Etienne Lein, Manh Huy Nguyen, Jakob Guebel and Dr. Alex Jordan from the laboratory for the Evolution of Collective and Social Behavior. The tool is written in Python, using 'networkx' for network generation, 'GraphViz' for drawing and 'voila' in combination with 'heroku' for deploying.  Please send bugs or recommendations to nicolai.kraus@uni-konstanz.de</sup></sub>"""))

 ---

  <sub><sup>This tool was developed at the University of Constance under supervision of Michael Aichem and Dr. Karsten Klein from the laboratory for Computational Life Sciences. Valuable feedback and data was provided by Etienne Lein, Manh Huy Nguyen, Jakob Guebel and Dr. Alex Jordan from the laboratory for the Evolution of Collective and Social Behavior. The tool is written in Python, using 'networkx' for network generation, 'GraphViz' for drawing and 'voila' in combination with 'heroku' for deploying.  Please send bugs or recommendations to nicolai.kraus@uni-konstanz.de</sup></sub>

In [8]:

df = pd.read_excel("multi2-big-dataset-etienne.xlsx")

df = pd.read_csv("multi3-huy.csv")

def dummy():
    return

dummy = interactive(dummy)
display(dummy)


interactive(children=(Output(),), _dom_classes=('widget-interact',))