In [4]:
# imports
import pandas as pd

In [5]:
import plotly.graph_objects as go
fig = go.Figure(data=go.Bar(y=[2, 3, 1]))
fig.write_html('first_figure.html', auto_open=True)

# to make notebook work offline
from plotly.offline import iplot, init_notebook_mode
init_notebook_mode(connected=True)

In [6]:
path_to_file = "../input/pottokasdata/pottokasfullsankey.csv"
df = pd.read_csv(path_to_file, encoding='utf-8')
print(type(df))
df.head()

In [34]:
# Helper function to transform regular data to sankey format
# Returns data and layout as dictionary
def genSankey(df,cat_cols=[],value_cols='',title='Sankey Diagram'):
    #assigns exact colors to node as labeled in data frame
    labelList = ["Standing Quietly","Sexual","Playing","Other","Ingestive","Resting","Sexual2","Ingestive2","Standing Quietly2","Walking2","Playing2","Resting2","Playing3","Walking3","Ingestive3", "Resting3", "Standing Quietly3"]
    colorList = ["#FC8D62","#D91174","#FFD92F","#F19634","#66c2a5","#8DA0CB","#D91174","#66c2a5","#FC8D62","#7F6DA9","#FFD92F ","#8DA0CB ","#FFD92F ","#7F6DA9","#66c2a5", "#8DA0CB", "#FC8D62"]
        

    
  
        
    # transform df into a source-target pair
    for i in range(len(cat_cols)-1):
        if i==0:
            sourceTargetDf = df[[cat_cols[i],cat_cols[i+1],value_cols]]
            sourceTargetDf.columns = ['source','target','count']
        else:
            tempDf = df[[cat_cols[i],cat_cols[i+1],value_cols]]
            tempDf.columns = ['source','target','count']
            sourceTargetDf = pd.concat([sourceTargetDf,tempDf])
        sourceTargetDf = sourceTargetDf.groupby(['source','target']).agg({'count':'sum'}).reset_index()
        
    # add index for source-target pair
    sourceTargetDf['sourceID'] = sourceTargetDf['source'].apply(lambda x: labelList.index(x))
    sourceTargetDf['targetID'] = sourceTargetDf['target'].apply(lambda x: labelList.index(x))
    
    # creating the sankey diagram
    data = dict(
        type='sankey',
        node = dict(
          pad = 15,
          thickness = 20,
          line = dict(
            color = "black",
            width = 0.5
          ),
            
            label = labelList,
          color = colorList
        ),
        link = dict(
          source = sourceTargetDf['sourceID'],
          target = sourceTargetDf['targetID'],
          value = sourceTargetDf['count']
        )
      )
    
    layout =  dict(
        title = title,
        font = dict(
          size = 10
        )
    )
       
    fig = dict(data=[data], layout=layout)
    

    return fig

In [37]:
# Generating regular sankey diagram
sank = genSankey(df,cat_cols=['Before','During','After'],value_cols='Value',title='Observations of Behaviors Expressed Before, During, and After Erection/Masturbation in Free-Roaming Pottoka Ponies')
fig = go.Figure(sank)
iplot(fig)

In [None]:
# Generating DFs for different filter options
synchrony = genSankey(df[df['Simultaneous']!='0'],cat_cols=['Before','During','After'],value_cols='Value',title='Observations of Synchronous E/M')
masturbation = genSankey(df[df['Classification']!='Masturbation'],cat_cols=['Before','During','After'],value_cols='Value',title='Observations of Masturbation')
erection = genSankey(df[df['Classification']!='Erection'],cat_cols=['Before','During','After'],value_cols='Value',title='Observations of Erection Only')
proximity = genSankey(df[df['Proximity']==1],cat_cols=['Before','During','After'],value_cols='Value',title='Observations of E/M in Close Proximity to Another Pony')
haremstallion = genSankey(df[df['Status']=='Stallion'],cat_cols=['Before','During','After'],value_cols='Value',title='Observations of Harem Stallions')
bachelor = genSankey(df[df['Status']=='Soltero'],cat_cols=['Before','During','After'],value_cols='Value',title='Observations of Bachelor Band')
arousalnochange = genSankey(df[df['ArousalBDA']=='nc'],cat_cols=['Before','During','After'],value_cols='Value',title='No Change in Arousal Across Entire Sequence')
arousalup = genSankey(df[df['ArousalBDA']=='up'],cat_cols=['Before','During','After'],value_cols='Value',title='Increase in Arousal Across Entire Sequence')
arousaldown = genSankey(df[df['ArousalBDA']=='down'],cat_cols=['Before','During','After'],value_cols='Value',title='Decrease in Arousal Across Entire Sequence')
arousalmix = genSankey(df[df['ArousalBDA']=='mix'],cat_cols=['Before','During','After'],value_cols='Value',title='Varied Arousal During Entire Sequence')
all = genSankey(df,cat_cols=['Before','During','After'],value_cols='Value',title='All Observations')

# Constructing menus
updatemenus = [{'buttons': [{'method': 'animate',
                             'label': 'All',
                             'args': [all]
                              },
    
                            {'method': 'animate',
                             'label': 'Synchronous',
                             'args': [synchrony]
                             },
                            
                             {'method': 'animate',
                             'label': 'Proximity',
                             'args': [proximity]
                             },
                            
                              {'method': 'animate',
                             'label': 'Harem Stallions',
                             'args': [haremstallion]
                             },
                            
                              {'method': 'animate',
                             'label': 'Bachelor Band',
                             'args': [bachelor]
                             },
                            
                            {'method': 'animate',
                             'label': 'No Change in Arousal Across Entire Sequence',
                             'args': [arousalnochange]
                             },
                            
                                {'method': 'animate',
                             'label': 'Increase in Arousal Across Entire Sequence',
                             'args': [arousalup]
                             },
                            
                                {'method': 'animate',
                             'label': 'Decrease in Arousal Across Entire Sequence',
                             'args': [arousaldown]
                             },
                            
                                {'method': 'animate',
                             'label': 'Varied Arousal Across Entire Sequence',
                             'args': [arousalmix]
                             },
                            
                              {'method': 'animate',
                             'label': 'Masturbation Only',
                             'args': [masturbation]
                             },
                            
                             {'method': 'animate',
                             'label': 'Erection Only',
                             'args': [erection]
                             }
                            
                            ] } ]

# update layout with buttons, and show the figure
sank = genSankey(df,cat_cols=['Before','During','After'],value_cols='Value',title='Observations of Behaviors Expressed Before, During, and After Erection/Masturbation in Free-Roaming Pottoka Ponies')
fig = go.Figure(sank)
fig.update_layout(updatemenus=updatemenus)
fig.update_traces(link_hoverlabel_font_family="Open Sans", selector=dict(type='sankey')) 
    
iplot(fig)

# Use dropdown below to interact with the plot