In [17]:
"""
This notebook is the programmatic part of the bachelors project 'A concept and prototypical implementation for network based 
analysis of fish behavior' by Nicolai Kraus at the University of Constance, supported by Michael Aichem and 
supervised by Dr. Karsten Klein. 

The project consists of a pipeline which loads a behavior dataset and produces a interactive dashboard via jupyter 
notebook and voila.

"""

#standard libraries
import os
import io
import warnings
import math
import numpy as np
import datetime
import time
import base64
from termcolor import colored

#network/plot generation
import networkx as nx
from networkx.drawing.nx_pydot import to_pydot
import matplotlib.pyplot as plt
import pandas as pd
import graphviz
import matplotlib as mpl
import pickle

#network distances/similarity
import netrd
import itertools

#UI/display
import ipywidgets as widgets
from ipywidgets import interactive,interact, fixed, VBox, HBox
from IPython.display import display, Image, Markdown, SVG, HTML, clear_output, FileLink

#remove warnings
pd.options.mode.chained_assignment = None  # default='warn'
warnings.simplefilter(action='ignore', category=FutureWarning)

#this line is needed for windows so the library 'pygraphviz', a wrapper of 'graphviz' for 'python'
#can load its modules 'dot' and 'neato' properly.
if  not 'C:\\Program Files (x86)\\Graphviz2.38\\bin' in os.environ["PATH"]: 
    os.environ["PATH"] += os.pathsep + 'C:\\Program Files (x86)\\Graphviz2.38\\bin'  
    
#global variable dataframe
df = None
trajectory_df = None
G1 = None
G2 = None

display(Markdown(
""" 
# BehaviorAnalyzer 
<em>An interactive tool to visually analyse behavior data derived from event-logging software like BORIS</em> \n
- <strong>Usage</strong>: Upload file containing the data \n by clicking <em>Behavior</em>\n
- <strong>Required columns</strong>: <em>Time</em>, <em>Subject</em>, <em>Behavior</em>, <em>Status</em>\n
- <strong>Optional columns</strong>: <em>Modifier 1</em>, <em>Behavioral category</em>, <em>Total length</em> ... \n
---
"""))

 
# BehaviorAnalyzer 
<em>An interactive tool to visually analyse behavior data derived from event-logging software like BORIS</em> 

- <strong>Usage</strong>: Upload file containing the data 
 by clicking <em>Behavior</em>

- <strong>Required columns</strong>: <em>Time</em>, <em>Subject</em>, <em>Behavior</em>, <em>Status</em>

- <strong>Optional columns</strong>: <em>Modifier 1</em>, <em>Behavioral category</em>, <em>Total length</em> ... 

---


In [18]:
def get_fish_ids(boris_df):
    """This function collects the unique fish ids. Functions needing these call this function so the order
    is always the same which results in a consequent colour scheme over all plots."""
    fish_ids = boris_df.subject.unique().tolist()
    if 'Subject' in fish_ids: fish_ids.remove('Subject')
    fish_ids = [x for x in fish_ids if str(x) != 'nan']
    return fish_ids


def get_total_and_avg_time(df, fish_ids):
    df = df[['time', 'subject', 'chosen_data', 'status']]
    behavior_ids = df.chosen_data.unique().tolist()
    time_list = []
    for behavior in behavior_ids:
        behavior_df = df[df.chosen_data == behavior]
        total = 0
        avg = 0
        for fish in fish_ids:
            id_frame = behavior_df[behavior_df.subject == fish]
            stop_total = id_frame[id_frame.status == 'STOP'].time.sum()
            start_total = id_frame[id_frame.status == 'START'].time.sum()
            total = total + stop_total - start_total
        occurences = len(behavior_df[behavior_df.status == 'START'].index)
        if (math.isnan(occurences) or (occurences < 1)):
            occurences = 1
        if (total == 0.0):
            avg = 0.0
        else:
            avg = total / occurences
        time_list.append((behavior, total, avg))
    return time_list
        
            
def get_row_index(df, values):
    """ 
    Get index positions of values in dataframe
    
    `Required` 
    :param df: Panda dataframe
    :param values: data structure with values to search
    """
    
    for value in values:
        listOfPos = list()
        # Get bool dataframe with True at positions where the given value exists
        result = df.isin([value])
        # Get list of columns that contains the value
        seriesObj = result.any()
        columnNames = list(seriesObj[seriesObj == True].index)
        # Iterate over list of columns and fetch the rows indexes where value exists
        for col in columnNames:
            rows = list(result[col][result[col] == True].index)
            for row in rows:
                listOfPos.append(row)
                return listOfPos
        # Return a list of tuples indicating the positions of value in the dataframe
    return listOfPos
     
def _clean(df):
    """
    Delete unneeded header information and standardize column names. 
    Add necessary column names if not present.
    
    `Required` 
    :param df: Panda dataframe
    """
    
    #If header is not first row, delete rows until one of ['Time', 'time', 'Subject', 'Fps', 'fps', 'subject'] appears
    try:
        header_row_index = get_row_index(df, ['Time', 'time', 'Subject', 'subject', 'Status', 'Behavior'])[0]
        df = df.iloc[header_row_index:]
        df.columns = df.iloc[0]
        df = df.iloc[1:]
    except:
        pass
    
    #all header in lowercase, no spaces
    df.columns = [x.lower() for x in df.columns]
    df.columns = df.columns.str.replace(' ','_')
    
    #convert time to float if excel gives string objects   
    df.time = df.time.astype(float)
   
    #if dataset contains only two individuals and modifier_1 not included, add corresponding modifier_1
    if 'modifier_1' not in df.columns and len(df.subject.unique()) == 2:
        df['modifier_1'] = df.subject.unique()[0]
        df['modifier_1'] = np.where(df['subject'] == df.subject.unique()[0], df.subject.unique()[1], df['modifier_1'])

    #add missing columns
    if 'modifier_1' not in df.columns:
        df['modifier_1'] = 'unknown'
    if 'behavioral_category' not in df.columns:
        df['behavioral category '] = 'unknown'
    if 'status' not in df.columns:
        df['status'] = 'unknown'
    if 'total_length' not in df.columns:
        df['total_length'] = df['time'].iloc[-1]
    
    #map behaviors to corresponding behavioral category
    df['behavior'] = [x.lower() for x in df['behavior']]
    _map = [
        #overt aggressive
        (df['behavior'] == 'bite', 'overt aggressive'),
        (df['behavior'] == 'mouth', 'overt aggressive'),
        (df['behavior'] == 'ram', 'overt aggressive'),
        (df['behavior'] == 'mouthfight', 'overt aggressive'),
        #aggressive
        (df['behavior'] == 'bite/ram', 'overt aggressive'),
        (df['behavior'] == 'chase', 'aggressive'),
        (df['behavior'] == 'frontal', 'aggressive'),
        (df['behavior'] == 'lateral display', 'aggressive'),
        (df['behavior'] == 'head-down', 'aggressive'),
        (df['behavior'] == 'tailbeat', 'aggressive'), 
        (df['behavior'] == 'lunging', 'aggressive'),
        (df['behavior'] == 'head shake', 'aggressive'),
        (df['behavior'] == 'aggressive posture', 'aggressive'), 
        (df['behavior'] == 'puffed throat', 'aggressive'),
        (df['behavior'] == 'sand spitting', 'aggressive'),
        (df['behavior'] == 'lunging/shooting out', 'aggressive'),
        #non-aggressive/social
        (df['behavior'] == 'quivering', 'non-aggressive/social'),
        (df['behavior'] == 'soft touch', 'non-aggressive/social'),
        (df['behavior'] == 'following', 'non-aggressive/social'),
        (df['behavior'] == 'group meeting', 'non-aggressive/social'),
        (df['behavior'] == 'parralel swim', 'non-aggressive/social'),
        #submissive
        (df['behavior'] == 'flee or chased', 'submissive'),
        (df['behavior'] == 'bitten', 'submissive'),
        (df['behavior'] == 'submissive display', 'submissive'),
        #maintenance
        (df['behavior'] == 'feed', 'maintenance'),
        (df['behavior'] == 'swim', 'maintenance'),
        (df['behavior'] == 'still', 'maintenance'),
        (df['behavior'] == 'darting', 'maintenance'),
        (df['behavior'] == 'yawn', 'maintenance'),
        (df['behavior'] == 'scraping', 'maintenance'),
        #workload
        (df['behavior'] == 'digging', 'workload'),
        (df['behavior'] == 'hover', 'workload'),
        (df['behavior'] == 'carrying', 'workload'),

    ]
    condlist = [item[0] for item in _map]
    choicelist = [item[1] for item in _map]
    
    #add behavioral category if not present
    df['behavioral_category'] = np.where(df['behavioral_category'].isnull(), np.select(condlist, choicelist, default='unclassified'), df['behavioral_category'])
    
    return df

def create_download_link(filename, title = "Click here to download: "):  
    data = open(filename, "rb").read()
    b64 = base64.b64encode(data)
    payload = b64.decode()
    html = '<a download="{filename}" href="data:text/csv;base64,{payload}" target="_blank">{title}</a>'
    html = html.format(payload=payload,title=title+f' {filename}',filename=filename)
    return HTML(html)

def edit_download_html(htmlWidget, filename, title = "Click here to download: "):
    
    # Change widget html temperarily to a font-awesome spinner
    htmlWidget.value = "<i class=\"fa fa-spinner fa-spin fa-2x fa-fw\"></i><span class=\"sr-only\">Loading...</span>"
    
    # Process raw data
    data = open(filename, "rb").read()
    b64 = base64.b64encode(data)
    payload = b64.decode()
    
    # Create and assign html to widget
    html = '<a download="{filename}" href="data:text/csv;base64,{payload}" target="_blank">{title}</a>'
    htmlWidget.value = html.format(payload = payload, title = title+filename, filename = filename)
    os.remove(filename)
    if filename[-1] == 'g':
        os.remove(filename.split(".")[0])

In [19]:
def plot_data(df, behavior, show_avg, show_grid):
    """
    Plot single behaviors or behavioral categories.
    
    `Required`
    :param df: Dataframe containing the behavior data
    :param behavior: The single behavior or behavioral category to plot
    
    `Optional`
    :param show_avg: display average line
    :param show_grid: display grid
    """
    
    #get fish ids and initial empty figure for the plot
    fish_ids = get_fish_ids(df)
    fig = plt.figure(figsize=(9,7))
    average = pd.DataFrame()
    highest_plot = 0
    
    #loop over all fish_ids and plot their amount of selected interactions 
    for fish in fish_ids:
        fish_df = df[df.subject == fish] 
        if 'behavioral_category' in df:
            categories = fish_df[fish_df.behavioral_category == behavior]
            behaviors = fish_df[fish_df.behavior == behavior]
            fish_df = categories.append(behaviors)
        else:
            fish_df = fish_df[fish_df.behavior == behavior]
        if(len(fish_df)+1>highest_plot):
            highest_plot = len(fish_df)+1
        sum_of_rows = range(1,len(fish_df)+1)
        plt.plot(fish_df.time, sum_of_rows, label=fish)   
    #reset colour cycle 
    plt.gca().set_prop_cycle(None)
    
    #loop over all fish ids and make a dotted line to the end if the fish is not doing any new 
    #behaviors but some other fish are or some time is left
    for fish in fish_ids:
        fish_df = df[df.subject == fish]
        if 'behavioral_category' in df:
            categories = fish_df[fish_df.behavioral_category == behavior]
            behaviors = fish_df[fish_df.behavior == behavior]
            fish_df = categories.append(behaviors)
        else:
            fish_df = fish_df[fish_df.behavior == behavior]
        plt.plot([fish_df.time.max(),df.time.max()], [len(fish_df),len(fish_df)],':')
    plt.gca().set_prop_cycle(None)
    
    #loop over all fish ids and make the beginning  before the first behavior of the fish
    for fish in fish_ids:
        fish_df = df[df.subject == fish]
        if 'behavioral_category' in df:
            categories = fish_df[fish_df.behavioral_category == behavior]
            behaviors = fish_df[fish_df.behavior == behavior]
            fish_df = categories.append(behaviors)
        else:
            fish_df = fish_df[fish_df.behavior == behavior]
        plt.plot([0,fish_df.time.min()], [0,1],':')
       
    #plot average
    if show_avg:
        avg_df = df.copy()
        if 'behavioral_category' in avg_df:
            categories = avg_df[avg_df.behavioral_category == behavior]
            behaviors = avg_df[avg_df.behavior == behavior]
            avg_df = categories.append(behaviors)
        else: 
            avg_df = avg_df[avg_df.behavior == behavior]
        avg_range = []
        value=1/len(fish_ids)
        step=1
        while (step <= len(avg_df)):
            avg_range.append(value)
            value+=1/len(fish_ids)
            step+=1
        #plot from 0 to 1 dotted, main part, and end dotted
        plt.plot([0,avg_df.time.min()], [0,avg_range[0]], ':', color="black")
        plt.plot(avg_df.time, avg_range, label="average", color="black")
        plt.plot([avg_df.time.max(),df.time.max()], [avg_range[-1], avg_range[-1]], ':', color="black")
    
    #add legend and edge labels
    plt.legend()
    plt.xlabel("Time", fontsize=18, labelpad=10)
    plt.ylabel("|" + str(behavior) + "|", fontsize=18, labelpad=10)
    
    #make frequency of yticks dependent on size of the highest plot
    if highest_plot < 11:
        yticks = range(0,highest_plot)
    elif highest_plot < 26:
        yticks = range(0,highest_plot, 2)
    elif highest_plot < 51:
        yticks = range(0,highest_plot, 5)
    elif highest_plot < 101:
        yticks = range(0,highest_plot, 10)
    elif highest_plot < 201:
        yticks = range(0,highest_plot, 20)
    else:
        yticks = range(0,highest_plot, 50)
    plt.yticks(yticks)
    
    if show_grid:
        plt.grid(linestyle='-', linewidth=0.2)
    
    plt.show()
    fig.savefig('images/accumulate_actions_plot.png', bbox_inches='tight')
    return plt

def create_interaction_network(df, threshold=1):
    """
    Create a network showing the interactions between different fish in the dataset. 
    An edge is drawn or increased by 1 for each row in the dataframe where 'subject' 
    and 'modifier_1' refer to the same individuals.
    
    `Required`
    :param df: The dataframe containing the behavior data
    
    `Optional`
    :threshold: Threshold for edges to be displayed 
    
    """
    
    #remove behavior with no interaction partner and irrelevant data
    interactions_df = df[df.modifier_1.notna()]
    interactions_df = interactions_df[['subject', 'modifier_1']]
    
    #create a dataframe for the edges 
    edges_df = interactions_df.groupby(interactions_df.columns.tolist(), as_index=False).size().to_frame(name='records').reset_index()
    
    #remove edges below the threshold
    edges_df = edges_df[edges_df.records >= threshold]
    
    #add tuples and records as attributes for the network generation
    edges_df['tuples'] = list(zip(edges_df.subject, edges_df.modifier_1))
    edge_attributes_label = dict(zip(edges_df.tuples, edges_df.records))
    
    #change for edge weight
    edges_df.records = edges_df.records * 3 / edges_df.records.max()
    edge_attributes_weight = dict(zip(edges_df.tuples, edges_df.records))
    
    #create directed graph with networkx
    G = nx.DiGraph()
    G.add_edges_from(edges_df.tuples)
    
    #edge labels
    nx.set_edge_attributes(G, edge_attributes_label, name='label')
    
    #edge weight
    nx.set_edge_attributes(G, edge_attributes_weight, name='penwidth')
    
    #graphviz
    G_dot_string = to_pydot(G).to_string()
    G_dot = graphviz.Source(G_dot_string)
    G_dot.format= 'svg'
    G_dot.render('images/interactions.gv', view=False)  
    display(HTML('images/interactions.gv.svg'))
    return


In [20]:
# a button to upload a file containing behavior data
uploader_bhvr = widgets.FileUpload(description='Behavior', multiple=True)
display(uploader_bhvr)
out = widgets.Output()
display(out)

def upload_handler(_):
    """
    Handle File Upload. On success, display metainformation about the dataset and initialize 
    global dataframe variable df. On failure, give instructions on what is missing or wrong.
    
    `Required`
    :param change: Indicates new file upload
    """
    
    global df,df_name
    upload_sanitized = False
    
    #read uploaded file into dataframe, display message if wrong file format
    [behavior] = uploader_bhvr.value
    try:
        df = pd.read_csv(io.BytesIO(uploader_bhvr.value[behavior]["content"]))
    except:
        try:
            df = pd.read_excel(io.BytesIO(uploader_bhvr.value[behavior]["content"]))
        except: 
            with out:
                clear_output(wait=True)
                display(Markdown("""File must be of type <em>.csv</em> or <em>.xlsx</em>"""))
                return
        
    #clean file and display message if required header(s) are missing
    try:
        df = _clean(df)
        upload_sanitized = True
    except: 
        with out:
            clear_output(wait=True)
            if 'time' not in df.columns:
                display(Markdown(""" Missing column header: <em>Time/time</em> """))
            if 'subject' not in df.columns:
                display(Markdown(""" Missing column header: <em>Subject/subject</em> """))
            if 'behavior' not in df.columns:
                display(Markdown(""" Missing column header: <em>Behavior/behavior</em> """))
            if 'status' not in df.columns:
                display(Markdown(""" Missing column header: <em>Status/status</em> """))
            
    #if data upload and sanitation successful, display information about dataset
    if(upload_sanitized):
        with out:
            clear_output(wait=True)
            display(Markdown("""---"""))
            display(Markdown("""#### File name"""))
            df_name = next(iter(uploader_bhvr.value))
            print(df_name)
            display(Markdown("""#### IDs"""))
            print(get_fish_ids(df))
            display(Markdown("""#### Behavioral categories"""))
            print(df.behavioral_category.unique())
            display(Markdown("""#### Behaviors"""))
            print(df.behavior.unique())
            display(Markdown("""---"""))
    
            #display data plot
            display(Markdown("""## Data plot 
            \n - <strong>Usage</strong>: Double-click in the behavior-field, then use Up/Down-keys"""))
            data_plot = interactive(plot_data, 
                                    df=fixed(df), 
                                    behavior = np.concatenate((df.behavioral_category.unique(),df.behavior.unique()), axis=0),
                                    show_avg = True,
                                    show_grid = True)
            display(data_plot)
            
            #display  interaction network
            display(Markdown("""## Interaction network 
            \n - <strong>Usage</strong>: Optionally choose threshold for displayed edges"""))
            interaction_network = interactive(create_interaction_network, 
                                              df = fixed(df), 
                                              threshold=(1,100,1))
            display(interaction_network)
            
    return

#connect on_upload_change function to file upload widget by using its internal counter
uploader_bhvr.observe(upload_handler, names='_counter')   

FileUpload(value={}, description='Behavior', multiple=True)

Output()

In [21]:
#buttons for the transition probability network
display(Markdown("""---"""))
display(Markdown("""## Transition probability network 
\n - <strong>Usage</strong>: Choose parameters, click <em>Execute</em>"""))

data_button = widgets.ToggleButtons(
    options=['behavior', 'behavioral_category'],
    value='behavior',
)

min_count_button = widgets.FloatText(
    value=0.0,
    description='Min_edge_val',
)

rmv_id_button = widgets.Text(
    value='',
    description='Delete ID'
)

add_id_button = widgets.Text(
    value='',
    description='Add ID'
)

rmv_bhvr_button = widgets.Text(
    value='',
    description='Delete node'
)

add_bhvr_button = widgets.Text(
    value='',
    description='Add node'
)

status_button = widgets.ToggleButton(
    value=False,
    description='Show status',
    icon='check'
)

normalized_button = widgets.ToggleButton(
    value=False,
    description='Normalize',
    icon='check'
)

hue_slider = widgets.IntSlider(
    value=140,
    min=1,
    max=360,
    step=1,
    description='Color hue:'
)

colour_button = widgets.ToggleButtons(
    options=['amount', 'total_time', 'avg_time'],
    value='avg_time',
    description='Colour')
    
size_button = widgets.ToggleButtons(
    options=['amount', 'total_time', 'avg_time'],
    value='total_time',
    description='Size')
    
sort_button = widgets.ToggleButtons(
    options=['amount', 'total_time', 'avg_time'],
    value='total_time',
    description='Sort by')

label_button = widgets.ToggleButtons(
    options=['amount', 'total_time', 'avg_time', ' - '],
    value=' - ',
    description='Label')

execute_network_button = widgets.Button(
    description = 'Execute'
)

download_gpickle = widgets.HTML(value = '')
download_svg = widgets.HTML(value = '')


#tabs for organizing the buttons of the transition probabilty network

tab1 = VBox(children=[data_button,
                      HBox(children=[normalized_button, status_button,]),
                     ])

tab2 = VBox(children=[size_button,
                      label_button,
                      colour_button,
                      hue_slider,
                     ])

tab3 = VBox(children=[rmv_bhvr_button,
                      add_bhvr_button,
                      rmv_id_button,
                      add_id_button,
                      min_count_button,
                     ])

tab4 = VBox(children=[download_gpickle,
                      download_svg,
                     ])

tab = widgets.Tab(children=[tab1, tab2, tab3, tab4])
tab.set_title(0, 'network')
tab.set_title(1, 'nodes')
tab.set_title(2, 'thin out')
tab.set_title(3, 'download')

network_output = widgets.Output()

transition_probability_network = VBox(children=[tab, execute_network_button, network_output])

    
@execute_network_button.on_click
def execute_network(_):
    with network_output:
        clear_output(wait=True)
        try:
            create_behavior_cycle()
        except:
            display(Markdown("""#### Error: No uploaded file detected - Please try again after successful file upload"""))
    #clear input fields
    rmv_id_button.value = ''
    add_id_button.value = ''
    rmv_bhvr_button.value = ''
    add_bhvr_button.value = ''
    

    
display(transition_probability_network)

remove_list_cat = []
remove_list = []
remove_id_list = []
def create_behavior_cycle():
    """Input parameters are the behavior file, the specification if the user wants to see the behaviors itself 
    or the behavior cycle of the behavioral categories and the minimal count for a edge to be displayed. 
    This cycle is calculated by splitting the boris-file for each fish and then increasing the edge count for each 
    successing behavior. In the end, the edge count is normalized in [0,1] for each node where edges come from 
    so we have kind of a probability of which behavior follows which behavior"""
    global df, df_name, remove_list_cat, remove_list, remove_id_list
    local_df = df
    
    #display tab
    data = data_button.value
    with_status = status_button.value
    normalized = normalized_button.value
    
    #reduce tab
    min_count = min_count_button.value
    rmv_id = rmv_id_button.value
    add_id = add_id_button.value
    rmv_bhvr = rmv_bhvr_button.value
    add_bhvr = add_bhvr_button.value
    
    #node tab
    hue = hue_slider.value
    node_colour= colour_button.value
    node_size= size_button.value
    node_label= label_button.value
    sort_by= sort_button.value
    
    fish_ids = get_fish_ids(df)
    successor_list = []
    #prepare dataframe with user input
    #first check if the user wants so see the behaviors or the behavioral categories
    if data == 'behavioral_category':
        #reset list of removed behaviors
        remove_list.clear()
        local_df['chosen_data'] = local_df['behavioral_category']
        
        #print unique behavioral categories
        display(Markdown("""#### All behavioral categories: \n"""))
        print(df.chosen_data.unique())
        
        #remove and add behavioral categories
        if rmv_bhvr:
            remove_us = rmv_bhvr.split('\'')
            for x in remove_us:
                if x in local_df.chosen_data.unique() and (len(remove_list_cat)+1 < len(local_df.chosen_data.unique())):
                    remove_list_cat.append(x)
        
        if add_bhvr:
            add_us = add_bhvr.split('\'')
            for x in add_us:
                if x in df.behavioral_category.unique() and x in remove_list_cat:
                    remove_list_cat.remove(x)
        
        #display removed behaviors and create new reduced dataframe
        if remove_list_cat:
            display(Markdown("""#### Removed behavioral categories: \n"""))
            print(set(remove_list_cat))
            for x in remove_list_cat:
                local_df = local_df.drop(local_df[local_df.behavioral_category == x].index)
    else:
        #reset list of removed behavioral categories
        remove_list_cat.clear()
        local_df['chosen_data'] = local_df['behavior']
        #print all behaviors
        display(Markdown("""#### All behaviors:"""))
        print(local_df.chosen_data.unique())
       
        #add and remove behaviors
        if rmv_bhvr:
            remove_us = rmv_bhvr.split('\'')
            for x in remove_us:
                if x in local_df.chosen_data.unique() and (len(remove_list)+1 < len(local_df.chosen_data.unique())):
                    remove_list.append(x)            
        if add_bhvr:
            add_us = add_bhvr.split('\'')
            for x in add_us:
                if x in df.chosen_data.unique() and x in remove_list:
                    remove_list.remove(x)  
        if remove_list:
            display(Markdown("""#### Removed behavior: \n"""))
            print(set(remove_list))
            for x in remove_list:
                local_df = local_df.drop(df[df.chosen_data == x].index)
   
    #remove IDs
    if rmv_id:
        remove_ids = rmv_id.split('\'')
        for x in remove_ids:
            if x in fish_ids and len(remove_id_list)+1 < len(fish_ids):
                remove_id_list.append(x)
    if add_id:
        add_ids = add_id.split('\'')
        for x in add_ids:
            if (x in fish_ids or x in df.modifier_1.unique()) and x in remove_id_list:
                remove_id_list.remove(x)
    if remove_id_list:
        display(Markdown("""#### Removed IDs: \n"""))
        print(set(remove_id_list))
    fish_ids_after_removal = [x for x in fish_ids if x not in remove_id_list]
    
    display(Markdown(""" --- """))
    
    #loop through dataframe for each fish and add behavior and successor
    for fish in fish_ids_after_removal:
        id_frame = local_df[local_df.subject == fish]  
        if not (with_status):
            id_frame = id_frame.drop(id_frame[id_frame.status == 'STOP'].index)
        i=0
        k=i+1
        while i < len(id_frame)-1:
            successor_list.append((id_frame.chosen_data.iloc[i], id_frame.status.iloc[i], id_frame.chosen_data.iloc[k], id_frame.status.iloc[k]))
            k+=1
            i+=1
    #lets make an edgelist with behavior and successor
    successor_df = pd.DataFrame(successor_list, columns=['action_1', 'status_1', 'action_2', 'status_2'])
    if (with_status):
        successor_df['action_1'] = successor_df['action_1'] + ' ' + successor_df['status_1']
        successor_df['action_2'] = successor_df['action_2'] + ' ' + successor_df['status_2']
    else:
        successor_df = successor_df.replace(to_replace="POINT", value="")
    
    successor_df['tuples'] = list(zip(successor_df.action_1, successor_df.action_2))
    successor_df = successor_df.groupby(successor_df.columns.tolist(), as_index=False).size().to_frame(name='records').reset_index()
    
    #normalize the records in [0,1] so that all together are 1 for each action
    behavior_ids = successor_df.action_1.unique().tolist()
    edges_df = pd.DataFrame()
    for action in behavior_ids:
        action_frame = successor_df[successor_df.action_1 == action]
        if(normalized):    
            sum_of_successors = action_frame.records.sum()
            action_frame['normalized'] = action_frame.records.div(sum_of_successors).round(2)
        edges_df = edges_df.append(action_frame)   
    

    #erase edges below min_count
    try:
        if(normalized and min_count):
            edges_df = edges_df[edges_df.normalized > float(min_count)]
        elif not normalized and min_count:    
            edges_df = edges_df[edges_df.records > float(min_count)]
    except: display(Markdown("""#### min_count has to be a positive real number. No edges were removed.\n"""))
    
    # add average and total time
    times_list = get_total_and_avg_time(df, fish_ids_after_removal)
    times_df = pd.DataFrame(times_list, columns=['action_1', 'total_time', 'avg_time'])
    
    #work on the nodes(behaviors) of the graph so we can later set node-attributes for graphviz
    nodes_df = edges_df[['action_1', 'records']]
    nodes_df = edges_df.groupby('action_1')['records'].sum().to_frame(name='records').reset_index()
    nodes_df = pd.merge(times_df, nodes_df, on='action_1', how='outer')
    nodes_df.columns = ['node', 'total_time', 'avg_time', 'record']
    
    #if a behavior occurs only once/ as last behavior maybe of an animal it is not counted
    if not (with_status):
        nodes_df.record = nodes_df.record.fillna(1)
    #round results
    nodes_df.total_time = nodes_df.total_time.round(2)
    nodes_df.avg_time = nodes_df.avg_time.round(2)
    
    #merge nodes with amount and times in the dataframe for the tuples so 
    #they can be displayed inside the node as label
    labels_1 = nodes_df.copy()
    labels_1.columns = ['action_1', 'total_time_1', 'avg_time_1', 'record_1']
    edges_df = pd.merge(edges_df, labels_1, on='action_1', how='left')
    labels_2 = nodes_df.copy()
    labels_2.columns = ['action_2', 'total_time_2', 'avg_time_2', 'record_2']
    edges_df = pd.merge(edges_df, labels_2, on='action_2', how='left') 
    
    if(node_label == 'amount'):
        edges_df['action_1'] = edges_df['action_1'] + " - " + edges_df['record_1'].astype(str)
        edges_df['action_2'] = edges_df['action_2'] + " - " + edges_df['record_2'].astype(str)
        edges_df['tuples'] = list(zip(edges_df['action_1'],edges_df['action_2']))
        nodes_df['node'] = nodes_df['node'] + " - " + nodes_df['record'].astype(str)
    elif(node_label == 'total_time'):
        edges_df['action_1'] = edges_df['action_1'] + " - " + edges_df['total_time_1'].astype(str)
        edges_df['action_2'] = edges_df['action_2'] + " - " + edges_df['total_time_2'].astype(str)
        edges_df['tuples'] = list(zip(edges_df['action_1'],edges_df['action_2']))
        nodes_df['node'] = nodes_df['node'] + " - " + nodes_df['total_time'].astype(str)
    elif(node_label == 'avg_time'):
        edges_df['action_1'] = edges_df['action_1'] + " - " + edges_df['avg_time_1'].astype(str)
        edges_df['action_2'] = edges_df['action_2'] + " - " + edges_df['avg_time_2'].astype(str)
        edges_df['tuples'] = list(zip(edges_df['action_1'],edges_df['action_2']))
        nodes_df['node'] = nodes_df['node'] + " - " + nodes_df['avg_time'].astype(str)
    
    if(sort_by == 'amount'):
        nodes_df = nodes_df.sort_values(by='record', ascending=False)
    elif(sort_by == 'total_time'):
        nodes_df = nodes_df.sort_values(by='total_time', ascending=False)
    else:
        nodes_df = nodes_df.sort_values(by='avg_time', ascending=False)
    
    # print behavior nodes and amount
    display(HTML(nodes_df.to_html(index=False)))
    display(Markdown(""" ---"""))
    
    
    #logarithmic normalization of record, avg_time and total_time 
    nodes_df.record = (np.log(nodes_df.record)-np.log(nodes_df.record.min()))/(np.log(nodes_df.record.max())-np.log(nodes_df.record.min()))
    nodes_df.total_time = nodes_df.total_time+1
    nodes_df.total_time = (np.log(nodes_df.total_time)-np.log(nodes_df.total_time.min()))/(np.log(nodes_df.total_time.max())-np.log(nodes_df.total_time.min()))
    nodes_df.avg_time = nodes_df.avg_time+1
    nodes_df.avg_time = (np.log(nodes_df.avg_time)-np.log(nodes_df.avg_time.min()))/(np.log(nodes_df.avg_time.max())-np.log(nodes_df.avg_time.min()))
        
    #node sizes dependent on user input and then a dictionary 
    #for node height and width is created to give it to graphviz
    if(node_size == 'amount'):
        nodes_width = dict(zip(nodes_df.node, nodes_df.record*3))
        nodes_height = dict(zip(nodes_df.node, nodes_df.record*1.4))
    elif (node_size == 'total_time'):
        nodes_width = dict(zip(nodes_df.node, nodes_df.total_time*3))
        nodes_height = dict(zip(nodes_df.node, nodes_df.total_time*1.4))
    elif (node_size == 'avg_time'):
        nodes_width = dict(zip(nodes_df.node, nodes_df.avg_time*3))
        nodes_height = dict(zip(nodes_df.node, nodes_df.avg_time*1.4))
        
    #node colour dependent on user input, values are normalized with np.log and then a dictionary
    #for node colour is created to give it to graphviz later
    hue = hue/360
    if(node_colour == 'amount'):
        nodes_df['colour'] = str(hue)+" "+ nodes_df['record'].astype(str) + " 1"
        nodes_colour = dict(zip(nodes_df.node, nodes_df.colour))
    elif (node_colour == 'total_time'):
        nodes_df['colour'] = str(hue)+" "+ nodes_df['total_time'].astype(str) + " 1"
        nodes_colour = dict(zip(nodes_df.node, nodes_df.colour))
    elif (node_colour == 'avg_time'):
        nodes_df['colour'] = str(hue)+" "+ nodes_df['avg_time'].astype(str) + " 1"
        nodes_colour = dict(zip(nodes_df.node, nodes_df.colour))
    
    #create directed graph
    G = nx.DiGraph()
    G.add_edges_from(edges_df.tuples)
    
    #create label and weight for edges
    if(normalized):
        edge_attributes_label = dict(zip(edges_df.tuples, edges_df.normalized))
        edges_df.normalized = edges_df.normalized * 3
        edge_attributes_weight = dict(zip(edges_df.tuples, edges_df.normalized))
    else:
        edge_attributes_label = dict(zip(edges_df.tuples, edges_df.records))
        #normalize logarithmic
        edges_df.records = (np.log(edges_df.records)-np.log(edges_df.records.min()))/(np.log(edges_df.records.max())-np.log(edges_df.records.min()))
        edges_df.records = edges_df.records + 0.1
        edge_attributes_weight = dict(zip(edges_df.tuples, edges_df.records/edges_df.records.max()))
    
    #set edge attributes
    nx.set_edge_attributes(G, edge_attributes_weight, name='penwidth')
    nx.set_edge_attributes(G, edge_attributes_label, name='label')
    
    #set node attributes
    nx.set_node_attributes(G, nodes_width, name='width')
    nx.set_node_attributes(G, nodes_height, name='height')
    if not (with_status):
        nx.set_node_attributes(G, nodes_colour, name='fillcolor')
    nx.set_node_attributes(G, 'filled', name='style')
    nx.set_node_attributes(G, "what should &#013; be here", name='tooltip')
    
    #use time to name the files
    now = datetime.datetime.now()
    
    #get a good filename for the newly generated network download, consisting of the users name + date
    filename = df_name
    if filename.endswith('.csv'):
        filename = filename[:-4]
    elif filename.endswith('.xlsx'):
        filename = filename[:-5]
    #filename = filename + "-"+ str(now.year) +"_"+ str(now.month) +"_"+ str(now.day) 
    filename = filename.replace(".","_")
    #graphviz
    G_dot_string = to_pydot(G).to_string()
    G_dot = graphviz.Source(G_dot_string)
    G_dot.format= 'svg'
    G_dot.render(filename, view=False)  
    display(HTML(filename + '.svg'))
    
    #make download available
    nx.write_gpickle(G, filename + ".gpickle")
    download_link = create_download_link(filename + ".gpickle", title = "Download for network comparison:  ")
    display(download_link)
    edit_download_html(download_gpickle, filename + ".gpickle", title = "Download for network comparison: ")
    edit_download_html(download_svg, filename + '.svg', title = "Download svg image: ")

    
    return 

---

## Transition probability network 

 - <strong>Usage</strong>: Choose parameters, click <em>Execute</em>

VBox(children=(Tab(children=(VBox(children=(ToggleButtons(options=('behavior', 'behavioral_category'), value='…

In [22]:
# compare different networks previously uploaded
display(Markdown("""---"""))
display(Markdown(
""" 
## Network comparison 
- <strong>Usage</strong>: Upload previously downloaded transition probability networks \n by clicking the <em>Network</em>-buttons\n
- <strong>Info</strong>: This functionality is still in progress, do not expect much output \n
---
"""))

#upload buttons and output for the network comparison
uploader_network1 = widgets.FileUpload(description='Network-1', multiple=True)
display(uploader_network1)
uploader_network2 = widgets.FileUpload(description='Network-2', multiple=True)
display(uploader_network2)
out_comp = widgets.Output()
display(out_comp)
  

def comp_upload_handler1(_):
    """
    Handle file upload of uploader_network1, gets triggered as file upload changes
    
    `Required` 
    :param _: A needed filler for it to work
    """
    global G1,G2
    try:
        [network1] = uploader_network1.value
        G1 = nx.read_gpickle(io.BytesIO(uploader_network1.value[network1]["content"]))
        G1.graph['name'] = next(iter(uploader_network1.value)).split(".", 1)[0]
        
        with out_comp:
            clear_output(wait=True)
            display(Markdown("""First network successfully uploaded : """ + G1.graph['name'] ))
            
            #now the real network comparison
            if (G1 and G2):
                compare_networks(G1,G2)
            
    except:
        #check for correct file ending
        with out_comp:
            clear_output(wait=True)
            display(Markdown("""An error occured: \n"""))
            if ".gpickle" not in next(iter(uploader_network1.value)):
                display(Markdown("""File must be of type <em>.gpickle</em>"""))
    
    return

def comp_upload_handler2(_):
    """
    Handle file upload of uploader_network2, gets triggered as file upload changes
    
    `Required` 
    :param _: A needed filler for it to work
    """
    global G1,G2
    
    try:
        [network2] = uploader_network2.value
        G2 = nx.read_gpickle(io.BytesIO(uploader_network2.value[network2]["content"]))
        G2.graph['name'] = next(iter(uploader_network2.value)).split(".", 1)[0]
        
        with out_comp:
            clear_output(wait=True)
            display(Markdown("""Second network successfully uploaded : """ + G2.graph['name'] ))
            
            #now the real network comparison
            if (G1 and G2):
                clear_output(wait=True)
                compare_networks(G1,G2)
            
    except:
        #check for correct file ending
        with out_comp:
            clear_output(wait=True)
            display(Markdown("""An error occured: \n"""))
            if ".gpickle" not in next(iter(uploader_network2.value)):
                display(Markdown("""File must be of type <em>.gpickle</em>"""))
    
    return
    
#connect on_upload_change function to file upload widget by using its internal counter
uploader_network1.observe(comp_upload_handler1, names='_counter') 
uploader_network2.observe(comp_upload_handler2, names='_counter') 

def avg_degree_centrality(G):
    #calculate average degree centrality
    values = nx.degree_centrality(G).values()
    avg_degree_centrality = sum(values)/len(values)
    return avg_degree_centrality

def select_k(spectrum, minimum_energy = 0.9):
    running_total = 0.0
    total = sum(spectrum)
    if total == 0.0:
        return len(spectrum)
    for i in range(len(spectrum)):
        running_total += spectrum[i]
        if running_total / total >= minimum_energy:
            return i + 1
    return len(spectrum)


def eigenvector_similarity(G1, G2):
    """
    Calculate the eigenvector similarity for the undirected equivalents of the input Graphs.
    
    `Required`
    :param G1: the first graph to be compared
    :param G2: the second graph to be compared
    """
    display(Markdown("""---"""))
    display(Markdown("""Eigenvector Similarity: \n 
    Calculate the Laplacian eigenvalues for the adjacency matrices of each of the graphs. 
    For each graph, find the smallest k such that the sum of the k largest eigenvalues constitutes at least 90% of the sum of all of the eigenvalues. 
    If the values of k are different between the two graphs, then use the smaller one. 
    The similarity metric is then the sum of the squared differences between the largest k eigenvalues between the graphs. 
    This will produce a similarity metric in the range [0, ∞), where values closer to zero are more similar. Neither edge direction nor edge weigth are taken in account."""))
    display(Markdown("""<sub><em>Source: ESultanik on stackoverflow, this is his post: https://stackoverflow.com/a/27303476</em> </sub>"""))

    
    G1 = G1.to_undirected()
    G2 = G2.to_undirected()
    #https://stackoverflow.com/questions/12122021/python-implementation-of-a-graph-similarity-grading-algorithm
    laplacian1 = nx.spectrum.laplacian_spectrum(G1)
    laplacian2 = nx.spectrum.laplacian_spectrum(G2)

    k1 = select_k(laplacian1)
    k2 = select_k(laplacian2)
    k = min(k1, k2)

    similarity = sum((laplacian1[:k] - laplacian2[:k])**2)
    
    display(Markdown("""<strong>Similarity score:</strong> """ + str(similarity)))

    return


def union_graph(G,H):
    """
    Calculate and display an graph showing the intersections of the union of the two graphs
    
    `Required`
    :param G1: the first graph to be compared
    :param G2: the second graph to be compared
    """
    
    GH =nx.compose(G,H)
    
    # set edge colors
    edge_colors = dict()
    edge_labels = dict()
    edge_width = dict()
    for edge in GH.edges():
        edge_width[edge] = 0.9
        if G.has_edge(*edge):
            if H.has_edge(*edge):
                edge_colors[edge] = 'azure3'
                edge_labels[edge] = ''
                edge_width[edge] = 0.8
                continue
            edge_colors[edge] = 'magenta'
        elif H.has_edge(*edge):
            edge_colors[edge] = 'chartreuse1'

    # set node colors
    G_nodes = set(G.nodes())
    H_nodes = set(H.nodes())
    node_colors = []
    for node in GH.nodes():
        if node in G_nodes:
            if node in H_nodes:
                node_colors.append('azure3')
                continue
            node_colors.append('magenta')
        if node in H_nodes:
            node_colors.append('chartreuse1')
            
    node_colors_dict = dict(zip(GH.nodes(), node_colors))
            
    #graphviz draw
    nx.set_edge_attributes(GH, edge_colors, name='color')
    nx.set_edge_attributes(GH, edge_labels, name='label')
    nx.set_edge_attributes(GH, edge_width, name='penwidth')
    nx.set_node_attributes(GH, node_colors_dict, name='fillcolor')
    nx.set_node_attributes(GH, 1.2, name ='width')
    nx.set_node_attributes(GH, 0.5, name ='height')
    #graphviz
    GH_dot_string = to_pydot(GH).to_string()
    GH_dot = graphviz.Source(GH_dot_string)
    GH_dot.format= 'svg'
    GH_dot.render('images/uniongraph', view=False)  
    
    #display metainformation and graph
    display(Markdown("""---"""))
    print(colored(G.graph['name'], 'magenta'))
    print(colored(H.graph['name'], 'green'))
    print(colored("Intersecting nodes/edges \n", 'grey'))
    display(HTML('images/uniongraph.svg'))
    
    i = set(G.edges()).intersection(H.edges())
    distance = round(len(i) / (len(G.edges()) + len(H.edges()) - len(i)),3)    
    
    display(Markdown("""<strong>Ratio of Intersection over Union</strong>: """ + str(distance)))
    
    return

def jaccard(G1, G2):
    """
    Display intersective nodes and edges of union of graphs. 
    Calculate and display jaccard distance of the two graphs
    
    `Required`
    :param G1: the first graph to be compared
    :param G2: the second graph to be compared
    """
    display(Markdown("""---"""))
    
    display(Markdown("""
    The Jaccard index, also known as the Jaccard similarity coefficient, 
    is a statistic used for gauging the similarity and diversity of sample sets. 
    In this case, the Jaccard coefficient measures similarity between the graphs by dividing the intersection 
    by the size of the union of the edge sets. """))
    

    i = set(G1.edges()).intersection(G2.edges())
    distance = round(len(i) / (len(G1.edges()) + len(G2.edges()) - len(i)),3)    
    
    display(Markdown("""<strong>Ratio of intersection over union of edges:</strong> """ + str(distance)))

    return

def graph_edit_distance(G1, G2):
    """
    Calculate and display the graph edit distance
    
    `Required`
    :param G1: the first graph to be compared
    :param G2: the second graph to be compared
    """
    timed_out = False

        
    display(Markdown("""---"""))
    display(Markdown("""Returns GED (graph edit distance) between graphs G1 and G2. \n
    Graph edit distance is a graph similarity measure analogous to Levenshtein distance for strings. 
    It is defined as minimum cost of edit path (sequence of node and edge edit operations) 
    transforming graph G1 to graph isomorphic to G2. (Edge weights are not taken into account)"""))
    
    loading_indicator = widgets.HTML(value = "<i class=\"fa fa-spinner fa-spin fa-2x fa-fw\"></i><span class=\"sr-only\">Loading...</span>")
    display(loading_indicator)
    
    #networkx optimized function for graph edit distance
    timeout = time.time() + 3
    for v in nx.optimize_graph_edit_distance(G1, G2):
        minv = v
        if time.time() > timeout:
            timed_out = True
            break
    
    clear_output(wait=True)
    display(Markdown("""Returns GED (graph edit distance). \n
    Graph edit distance is a graph similarity measure analogous to Levenshtein distance for strings. 
    It is defined as minimum cost of edit path (sequence of node and edge edit operations) 
    transforming graph G1 to graph isomorphic to G2. (Edge weights are not taken into account)"""))
    if timed_out ==False:
        display(Markdown("""<strong>Graph edit distance:</strong> """ +str(minv)))
    else: 
        display(Markdown(""" 
        The algorithm is stopping after 10 seconds to not crash the application. 
        The intermediate result for the graph edit distance is: """ +str(minv)))
        
    return
        
    
def highest_probability_chain(G1,G2):
    """
    Remove all edges but the one with the biggest/highest label for each node, then show union network of these 
    reduced networks
    
    `Required`
    :param G1: the first graph to be compared
    :param G2: the second graph to be compared
    """
    H1 = G1.copy()
    H2 = G2.copy()
    
    #remove the edge with the lowest label until only one edge is left
    for u in H1.nodes():
        while(len(H1.edges(u)) > 1):
            min_weight_edge = min(H1.edges(u), key=lambda x: H1.get_edge_data(x[0], x[1])["label"])
            H1.remove_edge(*min_weight_edge)

    for u in H2.nodes():
        while(len(H2.edges(u)) > 1):
            min_weight_edge = min(H2.edges(u), key=lambda x: H2.get_edge_data(x[0], x[1])["label"])
            H2.remove_edge(*min_weight_edge)
    
    union_graph(H1,H2)
    
    return


def show_similarity(algorithm, G1, G2):
    """
    Display output of chosen algorithm and explain it to the user.
    
    `Required`
    :param algorithm: the similarity scoring algorithm to apply
    :param G1: the first graph to be compared
    :param G2: the second graph to be compared
    """
    if algorithm == 'eigenvector similarity':
        eigenvector_similarity(G1,G2)
    elif algorithm == 'visualization':
        display(Markdown(""" --- """))
        display(Markdown("""The graphs are combined to identify nodes and edges common to both"""))
        union_graph(G1,G2)
    elif algorithm == 'jaccard distance':
        jaccard(G1,G2)
    elif algorithm == 'graph edit distance':
        graph_edit_distance(G1,G2)
    elif algorithm == 'highest_probability_chain':
        display(Markdown("""---"""))
        display(Markdown("""For each node, all edges but the one with the highest weight have been removed."""))
        highest_probability_chain(G1,G2)
        
    return

def compare_networks(G1, G2):
    """
    Calculate network metrics, compare graphs and display output
    
    `Required` 
    :param G1: the first graph to be compared
    :param G2: the second graph to be compared
    """
    
    display(Markdown("""---"""))
    
    
    networks = {
        G1.graph['name']: [nx.number_of_nodes(G1),
                           nx.number_of_edges(G1),
                           nx.number_weakly_connected_components(G1),
                           nx.number_strongly_connected_components(G1),
                           nx.density(G1), 
                           nx.transitivity(G1),
                           #nx.average_shortest_path_length(G1),
                           #avg_degree_centrality(G1)
                          ],
        G2.graph['name']: [nx.number_of_nodes(G2),
                           nx.number_of_edges(G2),
                           nx.number_weakly_connected_components(G2),
                           nx.number_strongly_connected_components(G2),
                           nx.density(G2), 
                           nx.transitivity(G2),
                           #nx.average_shortest_path_length(G2),
                           #avg_degree_centrality(G2)
                          ]
        }

    df = pd.DataFrame(networks, 
                      columns = [G1.graph['name'],
                                 G2.graph['name']
                                ],
                      index = ['Number of nodes',
                               'Number of edges',
                               'Number weakly connected components',
                               'Number strongly connected components',
                               'Density', 
                               'Transitivity',
                               #'Avg shortest path length',
                               #'Avg degree centrality']
                              ])
    
    #change values from float to int where float makes no use
    df = df.transpose()
    df['Number of nodes'] = df['Number of nodes'].astype('int64')
    df['Number of edges'] = df['Number of edges'].astype('int64')
    df['Number weakly connected components'] = df['Number weakly connected components'].astype('int64')
    df['Number strongly connected components'] = df['Number strongly connected components'].astype('int64')
    df = df.astype(object).T
    
    display(HTML(df.to_html()))
    display(Markdown("""---"""))
    
    
    display(Markdown("""#### Graph similarity:"""))
    similarity = interactive(show_similarity, algorithm = ['visualization','highest_probability_chain', 'jaccard distance', 'graph edit distance', 'eigenvector similarity' ], G1 = fixed(G1), G2 = fixed(G2)) 
    display(similarity)
    
    return



---

 
## Network comparison 
- <strong>Usage</strong>: Upload previously downloaded transition probability networks 
 by clicking the <em>Network</em>-buttons

- <strong>Info</strong>: This functionality is still in progress, do not expect much output 

---


FileUpload(value={}, description='Network-1', multiple=True)

FileUpload(value={}, description='Network-2', multiple=True)

Output()

In [23]:
#compare_networks(G1,G2)

In [24]:
# uploader for the trajectories and corresponding functions

def create_distance_network(trajectory_df, max_dist=100, min_seconds=3):
    """This function takes as parameters the optional coordinates file, the maximal distance for two fish 
    in a frame for a edge to be drawn between them, and assuming 25 fps, the minimum amount of seconds which
    the two fish have to be in the specified distance. 
    A loop is checking for each frame the distance from each fish to each fish, that is the reason why the output
    may take a few seconds until loaded."""
    #get the range of the frames (i.e. 25/second) to loop through
    first_frame = trajectory_df.frame.min()
    last_frame = trajectory_df.frame.max()
    frames_list = list(range(first_frame, last_frame,5))
    close_fish_list = []
    #take a slice of trajectory_df for each frame and calculate the 
    #distances from each fish to each other fish
    for frame in frames_list:
        frame_df = trajectory_df[trajectory_df.frame == frame]
        i=0
        while i < len(frame_df)-1:
            k=i+1
            while k < len(frame_df):
                #calculate the distance for fish k and fish i
                x = abs(frame_df.x.iloc[i] - frame_df.x.iloc[k])
                y = abs(frame_df.y.iloc[i] - frame_df.y.iloc[k])
                dist = math.sqrt(x**2 + y**2)
                #add an entry to the close_fish_list if dist < threshold
                if dist <= max_dist:
                    close_fish_list.append((frame_df.id.iloc[i], frame_df.id.iloc[k]))
                k+=1
            i+=1
    #create edges and attributes for network generation
    edges_df = pd.DataFrame(close_fish_list, columns=['fish_1', 'fish_2'])
    edges_df = edges_df.groupby(edges_df.columns.tolist(), as_index=False).size().to_frame(name='frames').reset_index()
    edges_df['tuples'] = list(zip(edges_df.fish_1, edges_df.fish_2))
    edges_df['close_seconds'] = edges_df.apply(lambda row: row.frames / 5, axis=1)
    edges_df = edges_df[edges_df.close_seconds >= min_seconds]
    edge_attributes_label = dict(zip(edges_df.tuples, edges_df.close_seconds))
    #change for edge weight
    edges_df.close_seconds = edges_df.close_seconds * 3 / edges_df.close_seconds.max()
    edge_attributes_weight = dict(zip(edges_df.tuples, edges_df.close_seconds))
    #create undirected graph with attributes
    G = nx.Graph()
    G.add_edges_from(edges_df.tuples)
    nx.set_edge_attributes(G, edge_attributes_label, name='label')
    nx.set_edge_attributes(G, edge_attributes_weight, name='penwidth')
    #graphviz
    G_dot_string = to_pydot(G).to_string()
    G_dot = graphviz.Source(G_dot_string)
    G_dot.format= 'svg'
    G_dot.render('images/distances.gv', view=False)  
    display(HTML('images/distances.gv.svg'))
    return 

def create_trajectory_map(trajectory_df):
    """Input parameters are the behavior and the coordinates file, the behavior file is used for the IDs, 
    so the colour scheme of the trajectories taken from the coordinates file is consistent with the colors
    from the plots. The trajectories are done by scattering the x- and y- coordinates for each fish for each
    frame together in one plot."""
    global df
    #id works only for jakobs positions
    fish_ids = trajectory_df.id.unique()
    #fish_ids = trajectory_df.id.unique().tolist()
    trajectory_list = []
    fig = plt.figure(figsize=(9,7))
    ax = fig.subplots()
    for fish in fish_ids:
        #extract positions for the fish and scatter it
        coordinates = trajectory_df[trajectory_df.id==fish]
        trajectory = ax.scatter(coordinates.x, coordinates.y, 0.1)
        trajectory_list.append(trajectory)
    plt.legend(trajectory_list, fish_ids, markerscale=20)   
    plt.xlabel("x-coordinate", fontsize=18, labelpad=10)
    plt.ylabel("y-coordinate", fontsize=18, labelpad=10)
    fig.savefig('images/trajectory_map.png', bbox_inches='tight')
    return plt

def _clean_trajectories(df):
    """
    Delete unneeded header information and standardize column names. 
    
    `Required` 
    :param df: Panda dataframe
    """
    
    #If header is not first row, delete rows until one of ['Time', 'time', 'Subject', 'Fps', 'fps', 'subject'] appears
    try:
        header_row_index = get_row_index(df, ['Id', 'id', 'Frame', 'frame', 'X', 'x'])[0]
        df = df.iloc[header_row_index:]
        df.columns = df.iloc[0]
        df = df.iloc[1:]
    except:
        pass
    
    #all header in lowercase, no spaces
    df.columns = [x.lower() for x in df.columns]
    df.columns = df.columns.str.replace(' ','_')
    
    #test if needed column headers are present
    headers = ['id', 'frame', 'x', 'y']
    for header in headers:
        if header not in trajectory_df:
            raise Exception("Error: column header " + header + " is not present.")
    
    return df

# a button to upload a file containing behavior data
display(Markdown("""---"""))
display(Markdown(
""" 
## Got a corresponding trajectory file? 
<em>Optionally inspect trajectories/coordinates</em> \n
- <strong>Usage</strong>: Upload file containing the data \n by clicking <em>Trajectories</em>\n
- <strong>Required columns</strong>: <em>Id</em>, <em>Frame</em>, <em>x</em>, <em>y</em>\n
---
"""))

uploader_traj = widgets.FileUpload(description='Trajectories', multiple=True)
display(uploader_traj)
out_traj = widgets.Output()
display(out_traj)

def traj_upload_handler(_):
    """
    Handle File Upload. On success, display trajectorie plot and distance network. 
    On failure, give instructions on what is missing or wrong.
    
    `Required`
    :param change: Indicates new file upload
    """
    
    global trajectory_df
    upload_sanitized = False
    
    #read uploaded file into dataframe, display message if wrong file format
    [trajectories] = uploader_traj.value
    try:
        trajectory_df = pd.read_csv(io.BytesIO(uploader_traj.value[trajectories]["content"]))
    except:
        try:
            trajectory_df = pd.read_excel(io.BytesIO(uploader_traj.value[trajectories]["content"]))
        except: 
            with out_traj:
                clear_output(wait=True)
                display(Markdown("""File must be of type <em>.csv</em> or <em>.xlsx</em>"""))
                return
        
    #clean file and display message if required header(s) are missing
    try:
        trajectory_df = _clean_trajectories(trajectory_df)
        upload_sanitized = True
    except: 
        with out_traj:
            clear_output(wait=True)
            if 'id' not in trajectory_df.columns:
                display(Markdown(""" Missing column header: <em>Id/id</em> """))
            if 'frame' not in trajectory_df.columns:
                display(Markdown(""" Missing column header: <em>Frame/frame</em> """))
            if 'x' not in trajectory_df.columns:
                display(Markdown(""" Missing column header: <em>X/x</em> """))
            if 'y' not in trajectory_df.columns:
                display(Markdown(""" Missing column header: <em>Y/y</em> """))
            
    #if data upload and sanitation successful, display information about dataset
    if(upload_sanitized):
        with out_traj:
            clear_output(wait=True)
            display(Markdown("""---"""))
            display(Markdown("""## Trajectory map"""))
            trajectory_map = interactive(create_trajectory_map, trajectory_df = fixed(trajectory_df))
            display(trajectory_map)
            display(Markdown("""## Distance network 
            \n The edge label is corresponding to the time in which two individuals are closer to each other than <em>max_dist</em>. 
            \n The edge is displayed if the count is bigger than <em>min_seconds</em>. 
            \n Be patient: The computation may take a few seconds. """))
            distance_network = interactive(create_distance_network, trajectory_df = fixed(trajectory_df), max_dist = (10,500,5), min_seconds = (1,600,5))
            display(distance_network)
            display(Markdown(""" ---"""))    
            
    return

#connect on_upload_change function to file upload widget by using its internal counter
uploader_traj.observe(traj_upload_handler, names='_counter')   

---

 
## Got a corresponding trajectory file? 
<em>Optionally inspect trajectories/coordinates</em> 

- <strong>Usage</strong>: Upload file containing the data 
 by clicking <em>Trajectories</em>

- <strong>Required columns</strong>: <em>Id</em>, <em>Frame</em>, <em>x</em>, <em>y</em>

---


FileUpload(value={}, description='Trajectories', multiple=True)

Output()

In [25]:
display(Markdown(""" ---"""))
display(Markdown("""  <sub><sup>This tool was developed at the University of Constance under supervision of Michael Aichem and Dr. Karsten Klein from the laboratory for Computational Life Sciences. Valuable feedback and data was provided by Etienne Lein, Manh Huy Nguyen, Jakob Guebel and Dr. Alex Jordan from the laboratory for the Evolution of Collective and Social Behavior. The tool is written in Python, using 'networkx' for network generation, 'GraphViz' for drawing and 'voila' in combination with 'heroku' for deploying.  Please send bugs or recommendations to nicolai.kraus@uni-konstanz.de</sup></sub>"""))

 ---

  <sub><sup>This tool was developed at the University of Constance under supervision of Michael Aichem and Dr. Karsten Klein from the laboratory for Computational Life Sciences. Valuable feedback and data was provided by Etienne Lein, Manh Huy Nguyen, Jakob Guebel and Dr. Alex Jordan from the laboratory for the Evolution of Collective and Social Behavior. The tool is written in Python, using 'networkx' for network generation, 'GraphViz' for drawing and 'voila' in combination with 'heroku' for deploying.  Please send bugs or recommendations to nicolai.kraus@uni-konstanz.de</sup></sub>