In [16]:
"""
This notebook is the main part of the bachelors project 'A concept and prototypical implementation for network based 
analysis of fish behavior' by Nicolai Kraus at the University of Constance, supported by Michael Aichem and 
supervised by Dr. Karsten Klein. 

The project consists of a pipeline which loads a behavior dataset and produces a interactive dashboard via jupyter 
notebook and voila.

"""

#standard libraries
import os
import io
import warnings
import math
import numpy as np

#network/plot generation
import networkx as nx
from networkx.drawing.nx_pydot import to_pydot
import matplotlib.pyplot as plt
import pandas as pd
import graphviz
import matplotlib as mpl

#UI/display
import ipywidgets as widgets
from ipywidgets import interactive,interact, fixed, VBox, HBox
from IPython.display import display, Image, Markdown, SVG, HTML, clear_output

#remove warnings
pd.options.mode.chained_assignment = None  # default='warn'
warnings.simplefilter(action='ignore', category=FutureWarning)

#this line is needed for windows so the library 'pygraphviz', a wrapper of 'graphviz' for 'python'
#can load its modules 'dot' and 'neato' properly.
if  not 'C:\\Program Files (x86)\\Graphviz2.38\\bin' in os.environ["PATH"]: 
    os.environ["PATH"] += os.pathsep + 'C:\\Program Files (x86)\\Graphviz2.38\\bin'  
    
#global variable dataframe
df = None

display(Markdown(
""" 
# BehaviorAnalyzer 
<em>An interactive tool to visually analyse behavior data derived from event-logging software like BORIS</em> \n
- <strong>Usage</strong>: Upload file containing the data \n by clicking <em>Behavior</em>\n
- <strong>Required columns</strong>: <em>Time</em>, <em>Subject</em>, <em>Behavior</em>, <em>Status</em>\n
- <strong>Optional columns</strong>: <em>Modifier 1</em>, <em>Behavioral category</em>, <em>Total length</em> ... \n
---
"""))

 
# BehaviorAnalyzer 
<em>An interactive tool to visually analyse behavior data derived from event-logging software like BORIS</em> 

- <strong>Usage</strong>: Upload file containing the data 
 by clicking <em>Behavior</em>

- <strong>Required columns</strong>: <em>Time</em>, <em>Subject</em>, <em>Behavior</em>, <em>Status</em>

- <strong>Optional columns</strong>: <em>Modifier 1</em>, <em>Behavioral category</em>, <em>Total length</em> ... 

---


In [19]:
# a button to upload a file containing behavior data
uploader_bhvr = widgets.FileUpload(description='Behavior', multiple=True)
display(uploader_bhvr)
out = widgets.Output()
display(out)

def upload_handler(_):
    """
    Handle File Upload. On success, display metainformation about the dataset and initialize 
    global dataframe variable df. On failure, give instructions on what is missing or wrong.
    
    `Required`
    :param change: Indicates new file upload
    """
    global df
    upload_sanitized = False
    
    #read uploaded file into dataframe, display message if wrong file format
    [behavior] = uploader_bhvr.value
    try:
        df = pd.read_csv(io.BytesIO(uploader_bhvr.value[behavior]["content"]))
    except:
        try:
            df = pd.read_excel(io.BytesIO(uploader_bhvr.value[behavior]["content"]))
        except: 
            with out:
                clear_output(wait=True)
                display(Markdown("""File must be of type <em>.csv</em> or <em>.xlsx</em>"""))
                return
        
    #clean file and display message if required header(s) are missing
    try:
        df = _clean(df)
        upload_sanitized = True
    except: 
        with out:
            clear_output(wait=True)
            if 'time' not in df.columns:
                display(Markdown(""" Missing column header: <em>Time/time</em> """))
            if 'subject' not in df.columns:
                display(Markdown(""" Missing column header: <em>Subject/subject</em> """))
            if 'behavior' not in df.columns:
                display(Markdown(""" Missing column header: <em>Behavior/behavior</em> """))
            if 'status' not in df.columns:
                display(Markdown(""" Missing column header: <em>Status/status</em> """))
            
    #if data upload and sanitation successful, display information about dataset
    if(upload_sanitized):
        with out:
            clear_output(wait=True)
            display(Markdown("""#### File name"""))
            print(next(iter(uploader_bhvr.value)))
            display(Markdown("""#### IDs"""))
            print(get_fish_ids(df))
            display(Markdown("""#### Behavioral categories"""))
            print(df.behavioral_category.unique())
            display(Markdown("""#### Behaviors"""))
            print(df.behavior.unique())
            display(Markdown("""---"""))
    
            #display data plot
            display(Markdown("""## Data plot 
            \n - <strong>Usage</strong>: Double-click in the behavior-field, then use Up/Down-keys"""))
            data = interactive(plot_data, 
                                         df=fixed(df), 
                                         behavior = df.behavior.unique(),
                                         show_avg = True,
                                         show_grid = True
                                        )
            display(data)
            
    return

#connect on_upload_change function to file upload widget by using its internal counter
uploader_bhvr.observe(upload_handler, names='_counter')   

FileUpload(value={}, description='Behavior', multiple=True)

Output()

In [20]:
def plot_data(df, behavior, show_avg, show_grid):
    """Input parameter is the behavior file, the output is a static view of a accumulation of all actions 
    of all IDs accumulated over time, so the total amount is viewable as well as when the number of actions 
    increased most."""
    
    #get fish ids and initial empty figure for the plot
    fish_ids = get_fish_ids(df)
    fig = plt.figure(figsize=(9,7))
    average = pd.DataFrame()
    highest_plot = 0
    
    #loop over all fish_ids and plot their amount of selected interactions 
    for fish in fish_ids:
        fish_df = df[df.subject == fish] 
        if 'behavioral_category' in df:
            categories = fish_df[fish_df.behavioral_category == behavior]
            behaviors = fish_df[fish_df.behavior == behavior]
            fish_df = categories.append(behaviors)
        else:
            fish_df = fish_df[fish_df.behavior == behavior]
        if(len(fish_df)+1>highest_plot):
            highest_plot = len(fish_df)+1
        sum_of_rows = range(1,len(fish_df)+1)
        plt.plot(fish_df.time, sum_of_rows, label=fish)   
    #reset colour cycle 
    plt.gca().set_prop_cycle(None)
    
    #loop over all fish ids and make a dotted line to the end if the fish is not doing any new 
    #behaviors but some other fish are or some time is left
    for fish in fish_ids:
        fish_df = df[df.subject == fish]
        if 'behavioral_category' in df:
            categories = fish_df[fish_df.behavioral_category == behavior]
            behaviors = fish_df[fish_df.behavior == behavior]
            fish_df = categories.append(behaviors)
        else:
            fish_df = fish_df[fish_df.behavior == behavior]
        plt.plot([fish_df.time.max(),df.time.max()], [len(fish_df),len(fish_df)],':')
    plt.gca().set_prop_cycle(None)
    
    #loop over all fish ids and make the beginning  before the first behavior of the fish
    for fish in fish_ids:
        fish_df = df[df.subject == fish]
        if 'behavioral_category' in df:
            categories = fish_df[fish_df.behavioral_category == behavior]
            behaviors = fish_df[fish_df.behavior == behavior]
            fish_df = categories.append(behaviors)
        else:
            fish_df = fish_df[fish_df.behavior == behavior]
        plt.plot([0,fish_df.time.min()], [0,1],':')
       
    #plot average
    if show_avg:
        avg_df = df.copy()
        if 'behavioral_category' in avg_df:
            categories = avg_df[avg_df.behavioral_category == behavior]
            behaviors = avg_df[avg_df.behavior == behavior]
            avg_df = categories.append(behaviors)
        else: 
            avg_df = avg_df[avg_df.behavior == behavior]
        avg_range = []
        value=1/len(fish_ids)
        step=1
        while (step <= len(avg_df)):
            avg_range.append(value)
            value+=1/len(fish_ids)
            step+=1
        #plot from 0 to 1 dotted, main part, and end dotted
        plt.plot([0,avg_df.time.min()], [0,avg_range[0]], ':', color="black")
        plt.plot(avg_df.time, avg_range, label="average", color="black")
        plt.plot([avg_df.time.max(),df.time.max()], [avg_range[-1], avg_range[-1]], ':', color="black")
    
    #finish the plot with some details
    plt.legend()
    plt.xlabel("Time", fontsize=18, labelpad=10)
    plt.ylabel("# " + str(behavior), fontsize=18, labelpad=10)
    #make frequency of yticks dependent on size of the highest plot
    if highest_plot < 11:
        yticks = range(0,highest_plot)
    elif highest_plot < 26:
        yticks = range(0,highest_plot, 2)
    elif highest_plot < 51:
        yticks = range(0,highest_plot, 5)
    elif highest_plot < 101:
        yticks = range(0,highest_plot, 10)
    elif highest_plot < 201:
        yticks = range(0,highest_plot, 20)
    else:
        yticks = range(0,highest_plot, 50)
    plt.yticks(yticks)
    
    if show_grid:
        plt.grid(linestyle='-', linewidth=0.2)
    
    plt.show()
    fig.savefig('images/accumulate_actions_plot.png', bbox_inches='tight')
    return plt


In [21]:
#buttons for the transition probability network
display(Markdown("""---"""))
display(Markdown("""## Transition probability network 
\n - <strong>Usage</strong>: Choose parameters, click <em>Execute</em>"""))

data_button = widgets.ToggleButtons(
    options=['behavior', 'behavioral_category'],
    value='behavior',
)

min_count_button = widgets.FloatText(
    value=0.0,
    description='Min_edge_val',
)

rmv_id_button = widgets.Text(
    value='',
    description='Delete ID'
)

add_id_button = widgets.Text(
    value='',
    description='Add ID'
)

rmv_bhvr_button = widgets.Text(
    value='',
    description='Delete node'
)

add_bhvr_button = widgets.Text(
    value='',
    description='Add node'
)

status_button = widgets.ToggleButton(
    value=False,
    description='Show status',
    icon='check'
)

normalized_button = widgets.ToggleButton(
    value=True,
    description='Normalize',
    icon='check'
)

hue_slider = widgets.IntSlider(
    value=140,
    min=1,
    max=360,
    step=1,
    description='Color hue:'
)

colour_button = widgets.ToggleButtons(
    options=['amount', 'total_time', 'avg_time'],
    value='total_time',
    description='Colour')
    
size_button = widgets.ToggleButtons(
    options=['amount', 'total_time', 'avg_time'],
    value='amount',
    description='Size')
    
sort_button = widgets.ToggleButtons(
    options=['amount', 'total_time', 'avg_time'],
    value='total_time',
    description='Sort by')

label_button = widgets.ToggleButtons(
    options=['amount', 'total_time', 'avg_time', ' - '],
    value=' - ',
    description='Label')

execute_network_button = widgets.Button(
    description = 'Execute'
)

#tabs for organizing the buttons of the transition probabilty network

tab1 = VBox(children=[data_button,
                      HBox(children=[normalized_button, status_button,]),
                     ])

tab2 = VBox(children=[size_button,
                      label_button,
                      colour_button,
                      hue_slider,
                     ])

tab3 = VBox(children=[rmv_bhvr_button,
                      add_bhvr_button,
                      rmv_id_button,
                      add_id_button,
                      min_count_button,
                     ])

tab4 = VBox(children=[sort_button,
                     ])

tab = widgets.Tab(children=[tab1, tab2, tab3])
tab.set_title(0, 'network')
tab.set_title(1, 'nodes')
tab.set_title(2, 'thin out')
tab.set_title(3, 'infotable')

network_output = widgets.Output()
transition_probability_network = VBox(children=[tab, execute_network_button, network_output])

    
@execute_network_button.on_click
def execute_network(_):
    with network_output:
        clear_output(wait=True)
        try:
            create_behavior_cycle()
        except:
            display(Markdown("""Not possible: No uploaded file detected"""))

display(transition_probability_network)





remove_list_cat = []
remove_list = []
remove_id_list = []
def create_behavior_cycle():
    """Input parameters are the behavior file, the specification if the user wants to see the behaviors itself 
    or the behavior cycle of the behavioral categories and the minimal count for a edge to be displayed. 
    This cycle is calculated by splitting the boris-file for each fish and then increasing the edge count for each 
    successing behavior. In the end, the edge count is normalized in [0,1] for each node where edges come from 
    so we have kind of a probability of which behavior follows which behavior"""
    global df
    
    #display tab
    data = data_button.value
    with_status = status_button.value
    normalized = normalized_button.value
    
    #reduce tab
    min_count = min_count_button.value
    rmv_id = rmv_id_button.value
    add_id = add_id_button.value
    rmv_bhvr = rmv_bhvr_button.value
    add_bhvr = rmv_bhvr_button.value
    
    #node tab
    hue = hue_slider.value
    node_colour= colour_button.value
    node_size= size_button.value
    node_label= label_button.value
    sort_by= sort_button.value
    
    
    fish_ids = get_fish_ids(df)
    successor_list = []
    #prepare dataframe with user input
    #first check if the user wants so see the behaviors or the behavioral categories
    if data == 'behavioral_category':
        #reset list of removed behaviors
        remove_list.clear()
        df['chosen_data'] = df['behavioral_category']
        #print unique behavioral categories
        display(Markdown("""#### All behavioral categories: \n"""))
        print(df.chosen_data.unique())
        #remove and add behavioral categories
        if rmv_bhvr:
            remove_us = rmv_bhvr.split('\'')
            for x in remove_us:
                if x in df.chosen_data.unique() and (len(remove_list_cat)+1 < len(df.chosen_data.unique())):
                    remove_list_cat.append(x)
        if add_bhvr:
            add_us = add_bhvr.split('\'')
            for x in add_us:
                if x in df.chosen_data.unique() and x in remove_list_cat:
                    remove_list_cat.remove(x)
        if remove_list_cat:
            display(Markdown("""#### Removed behavioral categories: \n"""))
            print(set(remove_list_cat))
            for x in remove_list_cat:
                df = df.drop(df[df.chosen_data == x].index)
    else:
        #reset list of removed behavioral categories
        remove_list_cat.clear()
        df['chosen_data'] = df['behavior']
        #print all behaviors
        display(Markdown("""#### All behaviors:"""))
        print(df.chosen_data.unique())
       
        #add and remove behaviors
        if rmv_bhvr:
            remove_us = rmv_bhvr.split('\'')
            for x in remove_us:
                if x in df.chosen_data.unique() and (len(remove_list)+1 < len(df.chosen_data.unique())):
                    remove_list.append(x)
        if add_bhvr:
            add_us = add_bhvr.split('\'')
            for x in add_us:
                if x in df.chosen_data.unique() and x in remove_list:
                    remove_list.remove(x)     
        if remove_list:
            display(Markdown("""#### Removed behavior: \n"""))
            print(set(remove_list))
            for x in remove_list:
                df = df.drop(df[df.chosen_data == x].index)
   
    #remove IDs
    if rmv_id:
        remove_ids = rmv_id.split('\'')
        for x in remove_ids:
            if x in fish_ids and len(remove_id_list)+1 < len(fish_ids):
                remove_id_list.append(x)
    if add_id:
        add_ids = add_id.split('\'')
        for x in add_ids:
            if (x in fish_ids or x in df.modifier_1.unique()) and x in remove_id_list:
                remove_id_list.remove(x)
    if remove_id_list:
        display(Markdown("""#### Removed IDs: \n"""))
        print(set(remove_id_list))
    fish_ids_after_removal = [x for x in fish_ids if x not in remove_id_list]
    
    display(Markdown(""" --------------------------------------------------------"""))
    
    #loop through dataframe for each fish and add behavior and successor
    for fish in fish_ids_after_removal:
        id_frame = df[df.subject == fish]  
        if not (with_status):
            id_frame = id_frame.drop(id_frame[id_frame.status == 'STOP'].index)
        i=0
        k=i+1
        while i < len(id_frame)-1:
            successor_list.append((id_frame.chosen_data.iloc[i], id_frame.status.iloc[i], id_frame.chosen_data.iloc[k], id_frame.status.iloc[k]))
            k+=1
            i+=1
    #lets make an edgelist with behavior and successor
    successor_df = pd.DataFrame(successor_list, columns=['action_1', 'status_1', 'action_2', 'status_2'])
    if (with_status):
        successor_df['action_1'] = successor_df['action_1'] + ' ' + successor_df['status_1']
        successor_df['action_2'] = successor_df['action_2'] + ' ' + successor_df['status_2']
    else:
        successor_df = successor_df.replace(to_replace="POINT", value="")
    
    successor_df['tuples'] = list(zip(successor_df.action_1, successor_df.action_2))
    successor_df = successor_df.groupby(successor_df.columns.tolist(), as_index=False).size().to_frame(name='records').reset_index()
    
    #normalize the records in [0,1] so that all together are 1 for each action
    behavior_ids = successor_df.action_1.unique().tolist()
    edges_df = pd.DataFrame()
    for action in behavior_ids:
        action_frame = successor_df[successor_df.action_1 == action]
        if(normalized):    
            sum_of_successors = action_frame.records.sum()
            action_frame['normalized'] = action_frame.records.div(sum_of_successors).round(2)
        edges_df = edges_df.append(action_frame)   
    

    #erase edges below min_count
    try:
        if(normalized and min_count):
            edges_df = edges_df[edges_df.normalized > float(min_count)]
        elif not normalized and min_count:    
            edges_df = edges_df[edges_df.records > float(min_count)]
    except: display(Markdown("""#### min_count has to be a positive real number. No edges were removed.\n"""))
    
    # add average and total time
    times_list = get_total_and_avg_time(df, fish_ids_after_removal)
    times_df = pd.DataFrame(times_list, columns=['action_1', 'total_time', 'avg_time'])
    
    #work on the nodes(behaviors) of the graph so we can later set node-attributes for graphviz
    nodes_df = edges_df[['action_1', 'records']]
    nodes_df = edges_df.groupby('action_1')['records'].sum().to_frame(name='records').reset_index()
    nodes_df = pd.merge(times_df, nodes_df, on='action_1', how='outer')
    nodes_df.columns = ['node', 'total_time', 'avg_time', 'record']
    #round results
    nodes_df.total_time = nodes_df.total_time.round(2)
    nodes_df.avg_time = nodes_df.avg_time.round(2)
    
    #merge nodes with amount and times in the dataframe for the tuples so 
    #they can be displayed inside the node as label
    labels_1 = nodes_df.copy()
    labels_1.columns = ['action_1', 'total_time_1', 'avg_time_1', 'record_1']
    edges_df = pd.merge(edges_df, labels_1, on='action_1', how='left')
    labels_2 = nodes_df.copy()
    labels_2.columns = ['action_2', 'total_time_2', 'avg_time_2', 'record_2']
    edges_df = pd.merge(edges_df, labels_2, on='action_2', how='left') 
    
    if(node_label == 'amount'):
        edges_df['action_1'] = edges_df['action_1'] + " - " + edges_df['record_1'].astype(str)
        edges_df['action_2'] = edges_df['action_2'] + " - " + edges_df['record_2'].astype(str)
        edges_df['tuples'] = list(zip(edges_df['action_1'],edges_df['action_2']))
        nodes_df['node'] = nodes_df['node'] + " - " + nodes_df['record'].astype(str)
    elif(node_label == 'total_time'):
        edges_df['action_1'] = edges_df['action_1'] + " - " + edges_df['total_time_1'].astype(str)
        edges_df['action_2'] = edges_df['action_2'] + " - " + edges_df['total_time_2'].astype(str)
        edges_df['tuples'] = list(zip(edges_df['action_1'],edges_df['action_2']))
        nodes_df['node'] = nodes_df['node'] + " - " + nodes_df['total_time'].astype(str)
    elif(node_label == 'avg_time'):
        edges_df['action_1'] = edges_df['action_1'] + " - " + edges_df['avg_time_1'].astype(str)
        edges_df['action_2'] = edges_df['action_2'] + " - " + edges_df['avg_time_2'].astype(str)
        edges_df['tuples'] = list(zip(edges_df['action_1'],edges_df['action_2']))
        nodes_df['node'] = nodes_df['node'] + " - " + nodes_df['avg_time'].astype(str)
    
    if(sort_by == 'amount'):
        nodes_df = nodes_df.sort_values(by='record', ascending=False)
    elif(sort_by == 'total_time'):
        nodes_df = nodes_df.sort_values(by='total_time', ascending=False)
    else:
        nodes_df = nodes_df.sort_values(by='avg_time', ascending=False)
    
    # print behavior nodes and amount
    print(nodes_df.to_string(index=False))
    display(Markdown(""" --- - - - """))
    
    
    #logarithmic normalization of record, avg_time and total_time 
    nodes_df.record = (np.log(nodes_df.record)-np.log(nodes_df.record.min()))/(np.log(nodes_df.record.max())-np.log(nodes_df.record.min()))
    nodes_df.total_time = nodes_df.total_time+1
    nodes_df.total_time = (np.log(nodes_df.total_time)-np.log(nodes_df.total_time.min()))/(np.log(nodes_df.total_time.max())-np.log(nodes_df.total_time.min()))
    nodes_df.avg_time = nodes_df.avg_time+1
    nodes_df.avg_time = (np.log(nodes_df.avg_time)-np.log(nodes_df.avg_time.min()))/(np.log(nodes_df.avg_time.max())-np.log(nodes_df.avg_time.min()))
        
    #node sizes dependent on user input and then a dictionary 
    #for node height and width is created to give it to graphviz
    if(node_size == 'amount'):
        nodes_width = dict(zip(nodes_df.node, nodes_df.record*3))
        nodes_height = dict(zip(nodes_df.node, nodes_df.record*1.4))
    elif (node_size == 'total_time'):
        nodes_width = dict(zip(nodes_df.node, nodes_df.total_time*3))
        nodes_height = dict(zip(nodes_df.node, nodes_df.total_time*1.4))
    elif (node_size == 'avg_time'):
        nodes_width = dict(zip(nodes_df.node, nodes_df.avg_time*3))
        nodes_height = dict(zip(nodes_df.node, nodes_df.avg_time*1.4))
        
    #node colour dependent on user input, values are normalized with np.log and then a dictionary
    #for node colour is created to give it to graphviz later
    hue = hue/360
    if(node_colour == 'amount'):
        nodes_df['colour'] = str(hue)+" "+ nodes_df['record'].astype(str) + " 1"
        nodes_colour = dict(zip(nodes_df.node, nodes_df.colour))
    elif (node_colour == 'total_time'):
        nodes_df['colour'] = str(hue)+" "+ nodes_df['total_time'].astype(str) + " 1"
        nodes_colour = dict(zip(nodes_df.node, nodes_df.colour))
    elif (node_colour == 'avg_time'):
        nodes_df['colour'] = str(hue)+" "+ nodes_df['avg_time'].astype(str) + " 1"
        nodes_colour = dict(zip(nodes_df.node, nodes_df.colour))
    
    #create directed graph
    G = nx.DiGraph()
    G.add_edges_from(edges_df.tuples)
    
    #create label and weight for edges
    if(normalized):
        edge_attributes_label = dict(zip(edges_df.tuples, edges_df.normalized))
        edges_df.normalized = edges_df.normalized * 3
        edge_attributes_weight = dict(zip(edges_df.tuples, edges_df.normalized))
    else:
        edge_attributes_label = dict(zip(edges_df.tuples, edges_df.records))
        #normalize logarithmic
        edges_df.records = (np.log(edges_df.records)-np.log(edges_df.records.min()))/(np.log(edges_df.records.max())-np.log(edges_df.records.min()))
        edges_df.records = edges_df.records + 0.1
        edge_attributes_weight = dict(zip(edges_df.tuples, edges_df.records/edges_df.records.max()))
    
    #set edge attributes
    nx.set_edge_attributes(G, edge_attributes_weight, name='penwidth')
    nx.set_edge_attributes(G, edge_attributes_label, name='label')
    
    #set node attributes
    nx.set_node_attributes(G, nodes_width, name='width')
    nx.set_node_attributes(G, nodes_height, name='height')
    nx.set_node_attributes(G, nodes_colour, name='fillcolor')
    nx.set_node_attributes(G, 'filled', name='style')
    nx.set_node_attributes(G, "1234 &#013; 234 &#013; 234", name='tooltip')
    
    #graphviz
    G_dot_string = to_pydot(G).to_string()
    G_dot = graphviz.Source(G_dot_string)
    G_dot.format= 'svg'
    G_dot.render('images/transitions.gv', view=False)  
    display(HTML('images/transitions.gv.svg'))
    
    return 

---

## Transition probability network 

 - <strong>Usage</strong>: Choose parameters, click <em>Execute</em>

VBox(children=(Tab(children=(VBox(children=(ToggleButtons(options=('behavior', 'behavioral_category'), value='…

In [22]:
def get_fish_ids(boris_df):
    """This function collects the unique fish ids. Functions needing these call this function so the order
    is always the same which results in a consequent colour scheme over all plots."""
    fish_ids = boris_df.subject.unique().tolist()
    if 'Subject' in fish_ids: fish_ids.remove('Subject')
    fish_ids = [x for x in fish_ids if str(x) != 'nan']
    return fish_ids


def get_total_and_avg_time(df, fish_ids):
    df = df[['time', 'subject', 'chosen_data', 'status']]
    behavior_ids = df.chosen_data.unique().tolist()
    time_list = []
    for behavior in behavior_ids:
        behavior_df = df[df.chosen_data == behavior]
        total = 0
        avg = 0
        for fish in fish_ids:
            id_frame = behavior_df[behavior_df.subject == fish]
            stop_total = id_frame[id_frame.status == 'STOP'].time.sum()
            start_total = id_frame[id_frame.status == 'START'].time.sum()
            total = total + stop_total - start_total
        occurences = len(behavior_df[behavior_df.status == 'START'].index)
        if (math.isnan(occurences) or (occurences < 1)):
            occurences = 1
        if (total == 0.0):
            avg = 0.0
        else:
            avg = total / occurences
        time_list.append((behavior, total, avg))
    return time_list
        
            
def get_row_index(df, values):
    """ 
    Get index positions of values in dataframe
    
    `Required` 
    :param df: Panda dataframe
    :param values: data structure with values to search
    """
    
    for value in values:
        listOfPos = list()
        # Get bool dataframe with True at positions where the given value exists
        result = df.isin([value])
        # Get list of columns that contains the value
        seriesObj = result.any()
        columnNames = list(seriesObj[seriesObj == True].index)
        # Iterate over list of columns and fetch the rows indexes where value exists
        for col in columnNames:
            rows = list(result[col][result[col] == True].index)
            for row in rows:
                listOfPos.append(row)
                return listOfPos
        # Return a list of tuples indicating the positions of value in the dataframe
    return listOfPos
     
def _clean(df):
    """
    Delete unneeded header information and standardize column names. 
    Add necessary column names if not present.
    
    `Required` 
    :param df: Panda dataframe
    """
    
    #If header is not first row, delete rows until one of ['Time', 'time', 'Subject', 'Fps', 'fps', 'subject'] appears
    try:
        header_row_index = get_row_index(df, ['Time', 'time', 'Subject', 'subject', 'Status', 'Behavior'])[0]
        df = df.iloc[header_row_index:]
        df.columns = df.iloc[0]
        df = df.iloc[1:]
    except:
        pass
    
    #all header in lowercase, no spaces
    df.columns = [x.lower() for x in df.columns]
    df.columns = df.columns.str.replace(' ','_')
    
    #convert time to float if excel gives string objects   
    df.time = df.time.astype(float)
   
    #if dataset contains only two individuals and modifier_1 not included, add corresponding modifier_1
    if 'modifier_1' not in df.columns and len(df.subject.unique()) == 2:
        df['modifier_1'] = df.subject.unique()[0]
        df['modifier_1'] = np.where(df['subject'] == df.subject.unique()[0], df.subject.unique()[1], df['modifier_1'])

    #add missing columns
    if 'modifier_1' not in df.columns:
        df['modifier_1'] = 'unknown'
    if 'behavioral_category' not in df.columns:
        df['behavioral category '] = 'unknown'
    if 'status' not in df.columns:
        df['status'] = 'unknown'
    if 'total_length' not in df.columns:
        df['total_length'] = df['time'].iloc[-1]
    
    #map behaviors to corresponding behavioral category
    df['behavior'] = [x.lower() for x in df['behavior']]
    _map = [
        #overt aggressive
        (df['behavior'] == 'bite', 'overt aggressive'),
        (df['behavior'] == 'mouth', 'overt aggressive'),
        (df['behavior'] == 'ram', 'overt aggressive'),
        (df['behavior'] == 'mouthfight', 'overt aggressive'),
        #aggressive
        (df['behavior'] == 'bite/ram', 'overt aggressive'),
        (df['behavior'] == 'chase', 'aggressive'),
        (df['behavior'] == 'frontal', 'aggressive'),
        (df['behavior'] == 'lateral display', 'aggressive'),
        (df['behavior'] == 'head-down', 'aggressive'),
        (df['behavior'] == 'tailbeat', 'aggressive'), 
        (df['behavior'] == 'lunging', 'aggressive'),
        (df['behavior'] == 'head shake', 'aggressive'),
        (df['behavior'] == 'aggressive posture', 'aggressive'), 
        (df['behavior'] == 'puffed throat', 'aggressive'),
        (df['behavior'] == 'sand spitting', 'aggressive'),
        (df['behavior'] == 'lunging/shooting out', 'aggressive'),
        #non-aggressive/social
        (df['behavior'] == 'quivering', 'non-aggressive/social'),
        (df['behavior'] == 'soft touch', 'non-aggressive/social'),
        (df['behavior'] == 'following', 'non-aggressive/social'),
        (df['behavior'] == 'group meeting', 'non-aggressive/social'),
        (df['behavior'] == 'parralel swim', 'non-aggressive/social'),
        #submissive
        (df['behavior'] == 'flee or chased', 'submissive'),
        (df['behavior'] == 'bitten', 'submissive'),
        (df['behavior'] == 'submissive display', 'submissive'),
        #maintenance
        (df['behavior'] == 'feed', 'maintenance'),
        (df['behavior'] == 'swim', 'maintenance'),
        (df['behavior'] == 'still', 'maintenance'),
        (df['behavior'] == 'darting', 'maintenance'),
        (df['behavior'] == 'yawn', 'maintenance'),
        (df['behavior'] == 'scraping', 'maintenance'),
        #workload
        (df['behavior'] == 'digging', 'workload'),
        (df['behavior'] == 'hover', 'workload'),
        (df['behavior'] == 'carrying', 'workload'),

    ]
    condlist = [item[0] for item in _map]
    choicelist = [item[1] for item in _map]
    
    #add behavioral category if not present
    df['behavioral_category'] = np.where(df['behavioral_category'].isnull(), np.select(condlist, choicelist, default='unclassified'), df['behavioral_category'])
    
    return df

In [23]:
def upload():
    
    #display transition probability network
    display(Markdown("""## Transition probability network 
    \n <strong>Usage</strong>: Optionally choo
    \n ##### min_count: Choose the minimum edge weight for an edge to be displayed. Only positive real numbers accepted.
    \n ##### rmv_bhvr/add_bhvr: Remove or add behaviors/behavioral categories. Delimiter is \'.
    \n ##### rmv_id/add_id: Remove or add animal IDs, Delimiter is \'.
    \n ##### with_status: Include status of behaviors in network 
    \n ##### normalized: Normalize sum of outgoing edges to 1 per node 
    \n ##### hue: 0-red, yellow-60, 120-green, 180-cyan, 240-blue, 300-violet \n
    \n ##### The total amount, average or total time of the behaviors can be mapped to node colour or size or optionally the node label.
    \n
    """))
    behavior_cycle = interactive(create_behavior_cycle, {'manual': True}, boris_df = fixed(boris_df), 
                                 data=['behavior', 'behavioral_category'], 
                                 min_count='', 
                                 rmv_id='', 
                                 add_id='', 
                                 rmv_bhvr='',
                                 add_bhvr='', 
                                 with_status=False, 
                                 normalized=False, 
                                 hue=widgets.IntSlider(value=270,min=0,max=360), 
                                 node_colour=['total_time', 'avg_time', 'amount'],
                                 node_size=['amount', 'total_time', 'avg_time'],
                                 node_label=[' - ', 'amount', 'total_time', 'avg_time'],
                                 sort_by=['amount', 'total_time', 'avg_time'])
    display(behavior_cycle) 
    
    display(Markdown(""" --------------------------------------------------------"""))
    
    if coordinates_present:
        display(Markdown("""## Trajectory map"""))
        trajectory_map = interactive(create_trajectory_map, coordinates_df = fixed(coordinates_df), boris_df = fixed(boris_df))
        display(trajectory_map)
        display(Markdown("""## Distance network \n Assuming we have 25 frames per second, the edge count is increased by 1/25 for each frame in which the distance between two fish is smaller than 'max_dist'. 
        \n The edge is displayed if the count is bigger than 'min_seconds'. The computation may take a few seconds if the dataset is large. \n
        PLEASE BE PATIENT, COMPUTATION TAKES 5 TO 10 SECONDS."""))
        distance_network = interactive(create_distance_network, coordinates_df = fixed(coordinates_df), max_dist = (10,500,5), min_seconds = (1,600,5))
        display(distance_network)
        display(Markdown(""" --------------------------------------------------------"""))
    
    #display interactive interaction network
    display(Markdown("""## Interaction network \n Nodes are subjects/objects of behaviors. 
    \n Choose the minimum count of interactions for an edge to be displayed"""))
    graph = interactive(create_interaction_network, boris_df = fixed(boris_df), min_interactions=(1,100,1))
    display(graph)
    
    """
    #show interactions network
    interactions_button = widgets.Button(description="Interaction network")
    display(interactions_button)
    output = widgets.Output()
    
    @output.capture(clear_output=True,wait=True)
    def show(_):
        # "linking function with output"
        with output:
            # what happens when we press the button
            output.clear_output()
            #display interactive interaction network
            graph = interactive(create_interaction_network, boris_df = fixed(boris_df), min_interactions=(1,100,1))
            display(graph)
    display(interactions_button)
    interactions_button.on_click(show)
    # displaying button and its output together
    widgets.VBox([interactions_button,output])
    """
    
    #display(Markdown(""" --------------------------------------------------------"""))
    
    #display activity plot
    #display(Markdown("""## Activity plot \n All behaviors are accumulated over time. You choose the size of the intervals."""))
    #activity_plot = interactive(create_activity_plot, boris_df=fixed(boris_df), intervals = (1,100,1))
    #display(activity_plot)
    return


In [24]:
display(Markdown(""" ---"""))
display(Markdown("""  <sub><sup>This tool was developed at the University of Constance under supervision of Michael Aichem and Dr. Karsten Klein from the laboratory for Computational Life Sciences. Valuable feedback and data was provided by Etienne Lein, Manh Huy Nguyen, Jakob Guebel and Dr. Alex Jordan from the laboratory for the Evolution of Collective and Social Behavior. The tool is written in Python, using 'networkx' for network generation, 'GraphViz' for drawing and 'voila' in combination with 'heroku' for deploying.  Please send bugs or recommendations to nicolai.kraus@uni-konstanz.de</sup></sub>"""))

 ---

  <sub><sup>This tool was developed at the University of Constance under supervision of Michael Aichem and Dr. Karsten Klein from the laboratory for Computational Life Sciences. Valuable feedback and data was provided by Etienne Lein, Manh Huy Nguyen, Jakob Guebel and Dr. Alex Jordan from the laboratory for the Evolution of Collective and Social Behavior. The tool is written in Python, using 'networkx' for network generation, 'GraphViz' for drawing and 'voila' in combination with 'heroku' for deploying.  Please send bugs or recommendations to nicolai.kraus@uni-konstanz.de</sup></sub>

In [25]:

df = pd.read_excel("multi2-big-dataset-etienne.xlsx")

df = pd.read_csv("multi3-huy.csv")

