In [37]:
"""
T notebook is the main part of the bachelors project 'A concept and prototypical implementation for network based 
analysis of fish behavior' by Nicolai Kraus at the University of Constance, supported by Michael Aichem and 
supervised by Dr. Karsten Klein. 

The project consists of a pipeline which loads a behavior dataset and produces a interactive dashboard via jupyter 
notebook and voila.

Usage: Read "README.md", install needed libraries and load the dashboard via jupyter 
notebook with the button in the upper right-hand corner or via 'voila analyse_behavior.ipynb' in the command line.

"""
import networkx as nx
from networkx.drawing.nx_pydot import to_pydot
import matplotlib.pyplot as plt
import pandas as pd
import math
import os
import numpy as np
import io
import graphviz
from graphviz import Source
import matplotlib as mpl
from ipywidgets import interactive,interact, fixed
import ipywidgets as widgets
from IPython.display import display, Image, Markdown
pd.options.mode.chained_assignment = None  # default='warn'

#this line is needed for windows so the library 'pygraphviz', a wrapper of 'graphviz' for 'python'
#can load its modules 'dot' and 'neato' properly.
if  not 'C:\\Program Files (x86)\\Graphviz2.38\\bin' in os.environ["PATH"]: 
    os.environ["PATH"] += os.pathsep + 'C:\\Program Files (x86)\\Graphviz2.38\\bin'  

def get_fish_ids(boris_df):
    """This function collects the unique fish ids. Functions needing these call this function so the order
    is always the same which results in a consequent colour scheme over all plots."""
    fish_ids = boris_df.subject.unique().tolist()
    if 'Subject' in fish_ids: fish_ids.remove('Subject')
    fish_ids = [x for x in fish_ids if str(x) != 'nan']
    return fish_ids


def create_trajectory_map(coordinates_df, boris_df):
    """Input parameters are the behavior and the coordinates file, the behavior file is used for the IDs, 
    so the colour scheme of the trajectories taken from the coordinates file is consistent with the colors
    from the plots. The trajectories are done by scattering the x- and y- coordinates for each fish for each
    frame together in one plot."""
    #id works only for jakobs positions
    fish_ids = get_fish_ids(boris_df)
    #fish_ids = coordinates_df.id.unique().tolist()
    trajectory_list = []
    fig = plt.figure(figsize=(9,7))
    ax = fig.subplots()
    for fish in fish_ids:
        #extract positions for the fish and scatter it
        coordinates = coordinates_df[coordinates_df.id==fish]
        trajectory = ax.scatter(coordinates.x, coordinates.y, 0.1)
        trajectory_list.append(trajectory)
    plt.legend(trajectory_list, fish_ids, markerscale=20)   
    plt.xlabel("x-coordinate", fontsize=18, labelpad=10)
    plt.ylabel("y-coordinate", fontsize=18, labelpad=10)
    fig.savefig('images/trajectory_map.png', bbox_inches='tight')
    return plt


def create_accumulate_actions_plot(boris_df, behavior):
    """Input parameter is the behavior file, the output is a static view of a accumulation of all actions 
    of all IDs accumulated over time, so the total amount is viewable as well as when the number of actions 
    increased most."""
    #ugly programming follows but I could not solve otherwise in a couple hours so I was exhausted and made it ugly
    fish_ids = get_fish_ids(boris_df)
    fig = plt.figure(figsize=(9,7))
    for fish in fish_ids:
        fish_df = boris_df[boris_df.subject == fish] 
        if 'behavioral_category' in boris_df:
            categories = fish_df[fish_df.behavioral_category == behavior]
            behaviors = fish_df[fish_df.behavior == behavior]
            fish_df = categories.append(behaviors)
        else:
            fish_df = fish_df[fish_df.behavior == behavior]
        sum_of_rows = range(1,len(fish_df)+1)
        plt.plot(fish_df.time, sum_of_rows, label=fish)
        #plt.plot([fish_df.time.max(),boris_df.time.max()], [len(fish_df),len(fish_df)])
    plt.gca().set_prop_cycle(None)
    for fish in fish_ids:
        fish_df = boris_df[boris_df.subject == fish]
        if 'behavioral_category' in boris_df:
            categories = fish_df[fish_df.behavioral_category == behavior]
            behaviors = fish_df[fish_df.behavior == behavior]
            fish_df = categories.append(behaviors)
        else:
            fish_df = fish_df[fish_df.behavior == behavior]
        plt.plot([fish_df.time.max(),boris_df.time.max()], [len(fish_df),len(fish_df)])
    plt.gca().set_prop_cycle(None)
    for fish in fish_ids:
        fish_df = boris_df[boris_df.subject == fish]
        if 'behavioral_category' in boris_df:
            categories = fish_df[fish_df.behavioral_category == behavior]
            behaviors = fish_df[fish_df.behavior == behavior]
            fish_df = categories.append(behaviors)
        else:
            fish_df = fish_df[fish_df.behavior == behavior]
        plt.plot([0,fish_df.time.min()], [0,1])
    plt.legend()
    plt.xlabel("time in s", fontsize=18, labelpad=10)
    plt.ylabel("#behaviors", fontsize=18, labelpad=10)
    plt.show()
    fig.savefig('images/accumulate_actions_plot.png', bbox_inches='tight')
    return plt



def create_dashboard():
    #handle user inputed files correctly by checking if it is an .xlsx or .csv file
    try:
        [behavior] = uploader_bhvr.value
    except: 
        print('You need to upload an behavior file first.')
        return
    try:
        boris_df = clean_boris_df(pd.read_csv(io.BytesIO(uploader_bhvr.value[behavior]["content"])))
    except:
        boris_df = clean_boris_df(pd.read_excel(io.BytesIO(uploader_bhvr.value[behavior]["content"])))
    #try using the optional coordinates file, fails if file is not uploaded
    coordinates_present = True
    try:
        [coordinates] = uploader_pos.value
        try:
            coordinates_df = pd.read_csv(io.BytesIO(uploader_pos.value[coordinates]["content"]))
        except: 
            coordinates_df = pd.read_excel(io.BytesIO(uploader_pos.value[coordinates]["content"]))
        coordinates_df.columns = [x.lower() for x in coordinates_df.columns]
    except: 
        coordinates_present = False
        display(Markdown("""#### Optional coordinates file is not uploaded. \n #### Trajectory Map and Distance network cannot be computed"""))
    #display metainformation for the user
    display(Markdown("""### IDs"""))
    print(get_fish_ids(boris_df))
    if 'behavioral_category' in boris_df:
        display(Markdown("""### Behavioral categories"""))
        print(boris_df.behavioral_category.unique())
    display(Markdown("""### Behaviors"""))
    print(boris_df.behavior.unique())
    #display transition probability network
    display(Markdown("""## Interactive transition probability network \n ##### kind_of_cycle: Choose what you want to have displayed \n ##### min_count: Choose the minimum edge weight for an edge to be displayed \n ##### to_remove: Type name of behavior to be removed of display. Restarts if you change 'kind_of_cycle' \n ##### with_status: Tick checkbox if you want to see "START" and "STOP" of behaviors \n
    """))
    behavior_cycle = interactive(create_behavior_cycle, {'manual': True}, boris_df = fixed(boris_df), kind_of_cycle=['behavior', 'behavioral_category'], min_count=(0,1,0.02), rmv_id='', rmv_bhvr='', with_status=False, normalized=True)
    display(behavior_cycle) 
    if coordinates_present:
        display(Markdown("""## Trajectory map"""))
        trajectory_map = interactive(create_trajectory_map, coordinates_df = fixed(coordinates_df), boris_df = fixed(boris_df))
        display(trajectory_map)
    #display behavior or behavioral category
    display(Markdown("""## Behaviors accumulated in relation to time \n Choose the behavior or behavioral category you want to have displayed"""))
    if 'behavioral_category' in boris_df:
        accumulate_actions = interactive(create_accumulate_actions_plot, boris_df=fixed(boris_df), behavior = np.concatenate([boris_df.behavioral_category.unique(),boris_df.behavior.unique()]))
    else:
        accumulate_actions = interactive(create_accumulate_actions_plot, boris_df=fixed(boris_df), behavior = boris_df.behavior.unique())
    display(accumulate_actions)
    return


def clean_boris_df(boris_df):
    """This function precleans the dataset by deleting header information if present. Furthermore
    it standardizes all column names."""
    #set all headers to lowercase
    boris_df.columns = [x.lower() for x in boris_df.columns]
    
    #for etiennes data
    if 'observation id' in boris_df.iloc[0]:
        boris_df.columns = ['time', 'media_file_path', 'total_length', 'fps', 'subject', 'behavior', 'behavioral_category', 'modifier_1', 'comment', 'status']
        boris_df = boris_df.dropna(axis=0, subset=['subject'])
        boris_df = boris_df.iloc[1:]
    
    #for jakobs data
    if 'modifier 1' in boris_df:
        boris_df.rename(columns = {'modifier 1':'modifier_1'}, inplace = True)
    if 'behavioral category' in boris_df:
        boris_df.rename(columns = {'behavioral category':'behavioral_category'}, inplace = True)
        
    #for huys data
    if 'subject' in boris_df and len(boris_df.subject.unique()) == 2:
        if 'modifier_1' not in boris_df:
            boris_df['modifier_1'] = 'Left'
            boris_df['modifier_1'] = np.where(boris_df['subject'] == 'Left', 'Right', boris_df['modifier_1'])
            boris_df['behavioral_category'] = 'restrained aggression'
            boris_df['behavioral_category'] = np.where(boris_df['behavior'] == 'bite', 'overt aggression', boris_df['behavioral_category'])
            boris_df['behavioral_category'] = np.where(boris_df['behavior'] == 'mouth', 'overt aggression', boris_df['behavioral_category'])
    
    #convert time to float if excel gives string objects   
    boris_df.time = boris_df.time.astype(float)
    return boris_df


global remove_list_cat
remove_list_cat = []
global remove_list
remove_list = []
global remove_id_list
remove_id_list = []
def create_behavior_cycle(boris_df, kind_of_cycle, min_count, rmv_id, rmv_bhvr, with_status, normalized):
    """Input parameters are the behavior file, the specification if the user wants to see the behaviors itself 
    or the behavior cycle of the behavioral categories and the minimal count for a edge to be displayed. 
    This cycle is calculated by splitting the boris-file for each fish and then increasing the edge count for each 
    successing behavior. In the end, the edge count is normalized in [0,1] for each node where edges come from 
    so we have kind of a probability of which behavior follows which behavior"""
    fish_ids = get_fish_ids(boris_df)
    successor_list = []
    #prepare dataframe with user input
    #first check if the user wants so see the behaviors or the behavioral categories
    if kind_of_cycle == 'behavioral_category':
        #reset list of removed behaviors
        remove_list.clear()
        boris_df['chosen_data'] = boris_df['behavioral_category']
        display(Markdown("""#### All behavioral categories: \n"""))
        
        print(boris_df.chosen_data.unique())
        if rmv_bhvr:
            remove_us = rmv_bhvr.split(',')
            for x in remove_us:
                remove_list_cat.append(x)
        if remove_list_cat:
            display(Markdown("""#### Removed behavioral categories: \n"""))
            print(set(remove_list_cat))
            for x in remove_list_cat:
                boris_df = boris_df.drop(boris_df[boris_df.chosen_data == x].index)
        else: 
            display(Markdown("""#### No behavioral categories removed yet: \n"""))
    else:
        #reset list of removed behavioral categories
        remove_list_cat.clear()
        boris_df['chosen_data'] = boris_df['behavior']
        display(Markdown("""#### All behaviors: \n"""))
        print(boris_df.chosen_data.unique())
        if rmv_bhvr:
            remove_us2 = rmv_bhvr.split(',')
            for x in remove_us2:
                remove_list.append(x)
        if remove_list:
            display(Markdown("""#### Removed behavior: \n"""))
            print(set(remove_list))
            for x in remove_list:
                boris_df = boris_df.drop(boris_df[boris_df.chosen_data == x].index)
        else: 
            display(Markdown("""#### No behaviors removed yet \n"""))
   
    #remove IDs
    if rmv_id:
        remove_ids = rmv_id.split(',')
        for x in remove_ids:
            remove_id_list.append(x)
    if remove_id_list:
        display(Markdown("""#### Removed IDs: \n"""))
        print(set(remove_id_list))
    fish_ids_after_removal = [x for x in fish_ids if x not in remove_id_list]
    if not fish_ids_after_removal:
        print("It does not work if you remove all IDs Huy :)")
            
    
   
    #loop through dataframe for each fish and add behavior and successor
    for fish in fish_ids_after_removal:
        id_frame = boris_df[boris_df.subject == fish]  
        if not (with_status):
            id_frame = id_frame.drop(id_frame[id_frame.status == 'STOP'].index)
        i=0
        k=i+1
        while i < len(id_frame)-1:
            successor_list.append((id_frame.chosen_data.iloc[i], id_frame.status.iloc[i], id_frame.chosen_data.iloc[k], id_frame.status.iloc[k]))
            k+=1
            i+=1
    #lets make an edgelist with behavior and successor
    successor_df = pd.DataFrame(successor_list, columns=['action_1', 'status_1', 'action_2', 'status_2'])
    if (with_status):
        successor_df['action_1'] = successor_df['action_1'] + ' ' + successor_df['status_1']
        successor_df['action_2'] = successor_df['action_2'] + ' ' + successor_df['status_2']
    else:
        successor_df = successor_df.replace(to_replace="POINT", value="")
        
    successor_df['tuples'] = list(zip(successor_df.action_1, successor_df.action_2))
    successor_df = successor_df.groupby(successor_df.columns.tolist(), as_index=False).size().to_frame(name='records').reset_index()

    #normalize the records in [0,1] so that all together are 1 for each action
    behavior_ids = successor_df.action_1.unique().tolist()
    edges_df = pd.DataFrame()
    for action in behavior_ids:
        action_frame = successor_df[successor_df.action_1 == action]
        if(normalized):    
            sum_of_successors = action_frame.records.sum()
            action_frame.records = action_frame.records.div(sum_of_successors).round(2)
        edges_df = edges_df.append(action_frame)   
    edges_df = edges_df[edges_df.records > min_count]
    #create directed graph with edges and edge attributes
    G = nx.DiGraph()
    G.add_edges_from(edges_df.tuples)
    edge_attributes_label = dict(zip(edges_df.tuples, edges_df.records))
    #change edges records for weight
    edges_df.records = edges_df.records * 3
    edge_attributes_weight = dict(zip(edges_df.tuples, edges_df.records))
    nx.set_edge_attributes(G, edge_attributes_weight, name='penwidth')
    nx.set_edge_attributes(G, edge_attributes_label, name='label')
    #graphviz
    G_dot_string = to_pydot(G).to_string()
    G_dot = Source(G_dot_string)
    G_dot.format= 'png'
    G_dot.render('images/transitions.gv', view=False)  
    display(Image('images/transitions.gv.png'))
    return 
    

In [31]:
#title and introduction
display(Markdown("""# Analyse fish behavior \n
This is a interactive tool for inspecting behavior files. They may be of type .csv or .xlsx. \n
Necessary columns are ['time', 'total_length', 'fps', 'subject', 'behavior', 'behavioral_category', 'modifier_1', 'status']. \n
If your data includes only two individuals you may omit 'modifier_1'. \n
Optionally you can include a corresponding coordinates file of type .csv or .excel with the columns ['row_number', 'id', 'frame', 'x', 'y']."""))
display(Markdown(""" ### Please upload your behavior file and optionally an corresponding coordinates file. \n 
### Then click on 'Analyse data!'. You can upload new data and repeat that."""))


# Analyse fish behavior 

This is a interactive tool for inspecting behavior files. They may be of type .csv or .xlsx. 

Necessary columns are ['time', 'total_length', 'fps', 'subject', 'behavior', 'behavioral_category', 'modifier_1', 'status']. 

If your data includes only two individuals you may omit 'modifier_1'. 

Optionally you can include a corresponding coordinates file of type .csv or .excel with the columns ['row_number', 'id', 'frame', 'x', 'y'].

 ### Please upload your behavior file and optionally an corresponding coordinates file. 
 
### Then click on 'Analyse data!'. You can upload new data and repeat that.

In [32]:
#buttons

#reset user input
reset_button = widgets.Button(description="Clear data")
def reset(_):
    with out:
        out.clear_output()
    #uploader_bhvr.close()
    #uploader_pos.close()
reset_button.on_click(reset)
display(reset_button)

#upload behavior and trajectories
uploader_bhvr = widgets.FileUpload(description='Behavior', multiple=True)
display(uploader_bhvr)
uploader_pos = widgets.FileUpload(description='Coordinates', multiple=True)
display(uploader_pos)

#analyze uploaded data
analyse_button = widgets.Button(description="Analyse data!")
out = widgets.Output()
def analyse(_):
      # "linking function with output"
      with out:
        # what happens when we press the button
        out.clear_output()
        create_dashboard()
analyse_button.on_click(analyse)
# displaying button and its output together
widgets.VBox([analyse_button,out])


Button(description='Clear data', style=ButtonStyle())

FileUpload(value={}, description='Behavior', multiple=True)

FileUpload(value={}, description='Coordinates', multiple=True)

VBox(children=(Button(description='Analyse data!', style=ButtonStyle()), Output()))