# Boxplots
The script generates the boxplots used in the paper by reading the results from respective excell sheets 
Set the following parameters depending on which results to visualize:

experiment = 'RRMS' or 'NIND'

sampling = 'BagSample' or 'PerSample'

metric = 'Accuracy' or 'Precision' or 'Recall' or 'F-score'

In [None]:
import plotly.express as px
import pandas
import plotly.graph_objects as go
import os
from scipy.stats import wilcoxon
import numpy as np
experiment = 'RRMS'
sampling = 'BagSample'
metric= 'F-score'
fileToWrite = 'AccuracyFigures/'+experiment+'_'+sampling+'_'+metric
fileName='AllResults_'+experiment+'_10runs_'+ sampling + '.xlsx'
df = pandas.read_excel(fileName)
float_formatter = "{:.2f}".format
dfPhenotype = df.loc[df['Classification']== 'Phenotype Classification']
dfMultiCell = df.loc[df['Classification']== 'Multi-cell Classification']
# fig = px.box(dfPhenotype, x="Runs", y="Accuracy")
# fig.show()
colors = ['rgba(93, 164, 214, 0.5)', 'rgba(255, 144, 14, 0.5)', 'rgba(44, 160, 101, 0.5)',
          'rgba(255, 65, 54, 0.5)', 'rgba(207, 114, 255, 0.5)', 'rgba(127, 96, 0, 0.5)']


#Phenotype classification data to color.. 
cellcnn = dfPhenotype.loc[dfPhenotype['Runs']== 'CellCnn Centralized']
local2 = dfPhenotype.loc[dfPhenotype['Runs']== 'Local_2']
local4 = dfPhenotype.loc[dfPhenotype['Runs']== 'Local_4']
local6 = dfPhenotype.loc[dfPhenotype['Runs']== 'Local_6']
peg2 = dfPhenotype.loc[dfPhenotype['Runs']== 'PriCell (N=2)']
peg4 = dfPhenotype.loc[dfPhenotype['Runs']== 'PriCell (N=4)']
peg6 = dfPhenotype.loc[dfPhenotype['Runs']== 'PriCell (N=6)']

fig = go.Figure()
fig.add_trace(go.Box(y=cellcnn[metric],name='CellCnn <br>  Centralized',
                marker_color = 'indianred'))
fig.add_trace(go.Box(y=local2[metric], name='Local <br>  (N=2)',
                marker_color = 'lightseagreen'  ))
fig.add_trace(go.Box(y=peg2[metric], name='<i>PriCell</i> <br> (N=2)',
                marker_color = 'blue'  ))
fig.add_trace(go.Box(y=local4[metric], name='Local  <br> (N=4)',
                marker_color = 'lightseagreen'  ))
fig.add_trace(go.Box(y=peg4[metric], name='<i>PriCell</i> <br> (N=4)',
                marker_color = 'blue'  ))
fig.add_trace(go.Box(y=local6[metric], name='Local  <br> (N=6)',
                marker_color = 'lightseagreen' ))
fig.add_trace(go.Box(y=peg6[metric], name='<i>PriCell</i> <br> (N=6)',
                marker_color = 'blue' ))

def add_p_value_annotation(fig, array_columns, subplot=None, _format=dict(interline=0.07, text_height=1.07, color='black')):
    ''' Adds notations giving the p-value between two box plot data (t-test two-sided comparison)
    
    Parameters:
    ----------
    fig: figure
        plotly boxplot figure
    array_columns: np.array
        array of which columns to compare 
        e.g.: [[0,1], [1,2]] compares column 0 with 1 and 1 with 2
    subplot: None or int
        specifies if the figures has subplots and what subplot to add the notation to
    _format: dict
        format characteristics for the lines

    Returns:
    -------
    fig: figure
        figure with the added notation
    '''
    # Specify in what y_range to plot for each pair of columns
    y_range = np.zeros([len(array_columns), 2])
    for i in range(len(array_columns)):
        y_range[i] = [1.01+i*_format['interline'], 1.02+i*_format['interline']]

    # Get values from figure
    fig_dict = fig.to_dict()

    # Get indices if working with subplots
    if subplot:
        if subplot == 1:
            subplot_str = ''
        else:
            subplot_str =str(subplot)
        indices = [] #Change the box index to the indices of the data for that subplot
        for index, data in enumerate(fig_dict['data']):
            #print(index, data['xaxis'], 'x' + subplot_str)
            if data['xaxis'] == 'x' + subplot_str:
                indices = np.append(indices, index)
        indices = [int(i) for i in indices]
        print((indices))
    else:
        subplot_str = ''

    # Print the p-values
    for index, column_pair in enumerate(array_columns):
        if subplot:
            data_pair = [indices[column_pair[0]], indices[column_pair[1]]]
        else:
            data_pair = column_pair

        # Mare sure it is selecting the data and subplot you want
        #print('0:', fig_dict['data'][data_pair[0]]['name'], fig_dict['data'][data_pair[0]]['xaxis'])
        #print('1:', fig_dict['data'][data_pair[1]]['name'], fig_dict['data'][data_pair[1]]['xaxis'])

        # Get the p-value
#         pvalue = stats.ttest_ind(
#             fig_dict['data'][data_pair[0]]['y'],
#             fig_dict['data'][data_pair[1]]['y'],
#             equal_var=False,
#         )[1]
        w, pvalue = wilcoxon(fig_dict['data'][data_pair[0]]['y'],fig_dict['data'][data_pair[1]]['y'])
        symbol = "p="+np.str(float_formatter(pvalue))
        # Vertical line
        fig.add_shape(type="line",
            xref="x"+subplot_str, yref="y"+subplot_str+" domain",
            x0=column_pair[0], y0=y_range[index][0], 
            x1=column_pair[0], y1=y_range[index][1],
            line=dict(color=_format['color'], width=2,)
        )
        # Horizontal line
        fig.add_shape(type="line",
            xref="x"+subplot_str, yref="y"+subplot_str+" domain",
            x0=column_pair[0], y0=y_range[index][1], 
            x1=column_pair[1], y1=y_range[index][1],
            line=dict(color=_format['color'], width=2,)
        )
        # Vertical line
        fig.add_shape(type="line",
            xref="x"+subplot_str, yref="y"+subplot_str+" domain",
            x0=column_pair[1], y0=y_range[index][0], 
            x1=column_pair[1], y1=y_range[index][1],
            line=dict(color=_format['color'], width=2,)
        )
        ## add text at the correct x, y coordinates
        ## for bars, there is a direct mapping from the bar number to 0, 1, 2...
        fig.add_annotation(dict(font=dict(color=_format['color'],size=14),
            x=(column_pair[0] + column_pair[1])/2,
            y=y_range[index][1]*_format['text_height'],
            showarrow=False,
            text=symbol,
            textangle=0,
            xref="x"+subplot_str,
            yref="y"+subplot_str+" domain"
        ))
    return fig
fig.update_layout(
#     title_text='HD vs. '+experiment+' -  Phenotype Classification', title_x=0.5,
    yaxis_title=metric,
    xaxis_title='HD vs. '+experiment+' -  Phenotype Classification',
    boxgap=0,
    paper_bgcolor='rgb(243, 243, 243)',
    plot_bgcolor='rgb(243, 243, 243)',
    showlegend=False
)
fig.update_layout(
    plot_bgcolor='aliceblue',
    font=dict(
#         family="Courier New, monospace",
        size=18
    )

)
fig.update_yaxes(range=[0.2, 1.001])
# fig.update_traces(notched=True)

    Returns:
    -------
    fig: figure
        figure with the added notation
    '''
    # Specify in what y_range to plot for each pair of columns
    y_range = np.zeros([len(array_columns), 2])
    for i in range(len(array_columns)):
        y_range[i] = [1.01+i*_format['interline'], 1.02+i*_format['interline']]

    # Get values from figure
    fig_dict = fig.to_dict()

    # Get indices if working with subplots
    if subplot:
        if subplot == 1:
            subplot_str = ''
        else:
            subplot_str =str(subplot)
        indices = [] #Change the box index to the indices of the data for that subplot
        for index, data in enumerate(fig_dict['data']):
            #print(index, data['xaxis'], 'x' + subplot_str)
            if data['xaxis'] == 'x' + subplot_str:
                indices = np.append(indices, index)
        indices = [int(i) for i in indices]
        print((indices))
    else:
        subplot_str = ''

    # Print the p-values
    for index, column_pair in enumerate(array_columns):
        if subplot:
            data_pair = [indices[column_pair[0]], indices[column_pair[1]]]
        else:
            data_pair = column_pair

        # Mare sure it is selecting the data and subplot you want
        #print('0:', fig_dict['data'][data_pair[0]]['name'], fig_dict['data'][data_pair[0]]['xaxis'])
        #print('1:', fig_dict['data'][data_pair[1]]['name'], fig_dict['data'][data_pair[1]]['xaxis'])

        # Get the p-value
#         pvalue = stats.ttest_ind(
#             fig_dict['data'][data_pair[0]]['y'],
#             fig_dict['data'][data_pair[1]]['y'],
#             equal_var=False,
#         )[1]
        w, pvalue = wilcoxon(fig_dict['data'][data_pair[0]]['y'],fig_dict['data'][data_pair[1]]['y'])
        symbol = "p="+np.str(float_formatter(pvalue))
        # Vertical line
        fig.add_shape(type="line",
            xref="x"+subplot_str, yref="y"+subplot_str+" domain",
            x0=column_pair[0], y0=y_range[index][0], 
            x1=column_pair[0], y1=y_range[index][1],
            line=dict(color=_format['color'], width=2,)
        )
        # Horizontal line
        fig.add_shape(type="line",
            xref="x"+subplot_str, yref="y"+subplot_str+" domain",
            x0=column_pair[0], y0=y_range[index][1], 
            x1=column_pair[1], y1=y_range[index][1],
            line=dict(color=_format['color'], width=2,)
        )
        # Vertical line
        fig.add_shape(type="line",
            xref="x"+subplot_str, yref="y"+subplot_str+" domain",
            x0=column_pair[1], y0=y_range[index][0], 
            x1=column_pair[1], y1=y_range[index][1],
            line=dict(color=_format['color'], width=2,)
        )
        ## add text at the correct x, y coordinates
        ## for bars, there is a direct mapping from the bar number to 0, 1, 2...
        fig.add_annotation(dict(font=dict(color=_format['color'],size=14),
            x=(column_pair[0] + column_pair[1])/2,
            y=y_range[index][1]*_format['text_height'],
            showarrow=False,
            text=symbol,
            textangle=0,
            xref="x"+subplot_str,
            yref="y"+subplot_str+" domain"
        ))
    return fig
fig.update_layout(
    autosize=False,
    width=820,
    height=550
)
fig.update_layout(
    title={
        'y':0.04,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.add_hline(y=pandas.DataFrame.median(cellcnn[metric]), line_width=3, line_dash="dash", line_color="green")


fig = add_p_value_annotation(fig, [[0,2], [0,4], [0,6]])
fig.update_layout(
    title={
        'y':0.04,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.show()
fig.write_image(fileToWrite+"_Phenotype.pdf")
#======================================================================================================#

#Multi-Cell classification data to color.. 
cellcnn = dfMultiCell.loc[dfMultiCell['Runs']== 'CellCnn Centralized']
local2 = dfMultiCell.loc[dfMultiCell['Runs']== 'Local_2']
local4 = dfMultiCell.loc[dfMultiCell['Runs']== 'Local_4']
local6 = dfMultiCell.loc[dfMultiCell['Runs']== 'Local_6']
peg2 = dfMultiCell.loc[dfMultiCell['Runs']== 'PriCell (N=2)']
peg4 = dfMultiCell.loc[dfMultiCell['Runs']== 'PriCell (N=4)']
peg6 = dfMultiCell.loc[dfMultiCell['Runs']== 'PriCell (N=6)']

fig = go.Figure()
fig.add_trace(go.Box(y=cellcnn[metric],name='CellCnn  <br> Centralized',
                marker_color = 'indianred' ))
fig.add_trace(go.Box(y=local2[metric], name='Local  <br> (N=2)',
                marker_color = 'lightseagreen' ))
fig.add_trace(go.Box(y=peg2[metric], name='<i>PriCell</i> <br>  (N=2)',
                marker_color = 'blue' ))
fig.add_trace(go.Box(y=local4[metric], name='Local <br>  (N=4)',
                marker_color = 'lightseagreen' ))
fig.add_trace(go.Box(y=peg4[metric], name='<i>PriCell</i> <br> (N=4)',
                marker_color = 'blue' ))
fig.add_trace(go.Box(y=local6[metric], name='Local <br>  (N=6)',
                marker_color = 'lightseagreen' ))
fig.add_trace(go.Box(y=peg6[metric], name='<i>PriCell</i> <br>  (N=6)',
                marker_color = 'blue' ))

fig.update_layout(
#     title_text='HD vs. '+experiment+' -  Multi-Cell Classification', title_x=0.5,
    yaxis_title=metric,
     xaxis_title='HD vs. '+experiment+' -  Multi-Cell Classification',
    boxgap=0,
#     boxgroupgap=0,
    paper_bgcolor='rgb(243, 243, 243)',
    plot_bgcolor='rgb(243, 243, 243)',
    showlegend=False
)
fig.update_layout(
    plot_bgcolor='aliceblue',
    font=dict(
#         family="Courier New, monospace",
        size=18
    )

)
fig.update_yaxes(range=[0.2, 1.001])
# fig.update_traces(notched=True)

fig.update_layout(
    autosize=False,
    width=820,
    height=550
)
fig.update_layout(
    title={
        'y':0.04,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.add_hline(y=pandas.DataFrame.median(cellcnn[metric]), line_width=3, line_dash="dash", line_color="green")
fig = add_p_value_annotation(fig, [[0,2], [0,4], [0,6]])
fig.show()
fig.write_image(fileToWrite+"_MultiCell.pdf")

# Boxplots
The script generates the boxplots used in the paper by reading the results from respective excell sheets 
Set the following parameters depending on which results to visualize for CMV experiments:

experiment = 'CMV'

sampling = 'BagSample'

metric = 'Accuracy' or 'Precision' or 'Recall' or 'F-score'

In [None]:
import plotly.express as px
import pandas
import plotly.graph_objects as go
import os
experiment = 'CMV'
sampling = 'BagSample'
metric= 'F-score'

fileToWrite = 'AccuracyFigures/'+experiment+'_'+sampling+'_'+metric
fileName='AllResults_'+experiment+'_10runs_'+ sampling + '.xlsx'

df = pandas.read_excel(fileName)

dfPhenotype = df.loc[df['Classification']== 'Phenotype Classification']
dfMultiCell = df.loc[df['Classification']== 'Multi-cell Classification']
# fig = px.box(dfPhenotype, x="Runs", y="Accuracy")
# fig.show()
colors = ['rgba(93, 164, 214, 0.5)', 'rgba(255, 144, 14, 0.5)', 'rgba(44, 160, 101, 0.5)',
          'rgba(255, 65, 54, 0.5)', 'rgba(207, 114, 255, 0.5)', 'rgba(127, 96, 0, 0.5)']


#Phenotype classification data to color.. 
cellcnn = dfPhenotype.loc[dfPhenotype['Runs']== 'CellCnn Centralized']
local2 = dfPhenotype.loc[dfPhenotype['Runs']== 'Local_2']
local4 = dfPhenotype.loc[dfPhenotype['Runs']== 'Local_3']
local6 = dfPhenotype.loc[dfPhenotype['Runs']== 'Local_5']
peg2 = dfPhenotype.loc[dfPhenotype['Runs']== 'PriCell (N=2)']
peg4 = dfPhenotype.loc[dfPhenotype['Runs']== 'PriCell (N=3)']
peg6 = dfPhenotype.loc[dfPhenotype['Runs']== 'PriCell (N=5)']

fig = go.Figure()
fig.add_trace(go.Box(y=cellcnn[metric],name='CellCnn <br>  Centralized',
                marker_color = 'indianred'))
fig.add_trace(go.Box(y=local2[metric], name='Local <br>  (N=2)',
                marker_color = 'lightseagreen'  ))
fig.add_trace(go.Box(y=peg2[metric], name='<i>PriCell</i> <br>  (N=2)',
                marker_color = 'blue'  ))
fig.add_trace(go.Box(y=local4[metric], name='Local  <br> (N=3)',
                marker_color = 'lightseagreen'  ))
fig.add_trace(go.Box(y=peg4[metric], name='<i>PriCell</i> <br> (N=3)',
                marker_color = 'blue'  ))
fig.add_trace(go.Box(y=local6[metric], name='Local  <br> (N=5)',
                marker_color = 'lightseagreen' ))
fig.add_trace(go.Box(y=peg6[metric], name='<i>PriCell</i> <br> (N=5)',
                marker_color = 'blue' ))


fig.update_layout(
#     title_text='HD vs. '+experiment+' -  Phenotype Classification', title_x=0.5,
    yaxis_title=metric,
    xaxis_title='HD vs. '+experiment+' -  Phenotype Classification',
    boxgap=0,
    paper_bgcolor='rgb(243, 243, 243)',
    plot_bgcolor='rgb(243, 243, 243)',
    showlegend=False
)
fig.update_layout(
    plot_bgcolor='aliceblue',
    font=dict(
#         family="Courier New, monospace",
        size=18
    )

)
fig.update_yaxes(range=[0.2, 1.001])
# fig.update_traces(notched=True)

fig.update_layout(
    autosize=False,
    width=820,
    height=550
)
fig.add_hline(y=pandas.DataFrame.median(cellcnn[metric]), line_width=3, line_dash="dash", line_color="green")

fig = add_p_value_annotation(fig, [[0,2], [0,4], [0,6]])
fig.show()
fig.write_image(fileToWrite+"_Phenotype.pdf")
#======================================================================================================#

#Multi-Cell classification data to color.. 
cellcnn = dfMultiCell.loc[dfMultiCell['Runs']== 'CellCnn Centralized']
local2 = dfMultiCell.loc[dfMultiCell['Runs']== 'Local_2']
local4 = dfMultiCell.loc[dfMultiCell['Runs']== 'Local_3']
local6 = dfMultiCell.loc[dfMultiCell['Runs']== 'Local_5']
peg2 = dfMultiCell.loc[dfMultiCell['Runs']== 'PriCell (N=2)']
peg4 = dfMultiCell.loc[dfMultiCell['Runs']== 'PriCell (N=3)']
peg6 = dfMultiCell.loc[dfMultiCell['Runs']== 'PriCell (N=5)']

fig = go.Figure()
fig.add_trace(go.Box(y=cellcnn[metric],name='CellCnn  <br> Centralized',
                marker_color = 'indianred' ))
fig.add_trace(go.Box(y=local2[metric], name='Local  <br> (N=2)',
                marker_color = 'lightseagreen' ))
fig.add_trace(go.Box(y=peg2[metric], name='<i>PriCell</i> <br>  (N=2)',
                marker_color = 'blue' ))
fig.add_trace(go.Box(y=local4[metric], name='Local <br>  (N=3)',
                marker_color = 'lightseagreen' ))
fig.add_trace(go.Box(y=peg4[metric], name='<i>PriCell</i> <br> (N=3)',
                marker_color = 'blue' ))
fig.add_trace(go.Box(y=local6[metric], name='Local <br>  (N=5)',
                marker_color = 'lightseagreen' ))
fig.add_trace(go.Box(y=peg6[metric], name='<i>PriCell</i> <br>  (N=5)',
                marker_color = 'blue' ))

fig.update_layout(
#     title_text='HD vs. '+experiment+' -  Multi-Cell Classification', title_x=0.5,
    yaxis_title=metric,
    xaxis_title='HD vs. '+experiment+' -  Multi-Cell Classification',
    boxgap=0,
    paper_bgcolor='rgb(243, 243, 243)',
    plot_bgcolor='rgb(243, 243, 243)',
    showlegend=False
)
fig.update_layout(
    plot_bgcolor='aliceblue',
    font=dict(
#         family="Courier New, monospace",
        size=18
    )

)
fig.update_yaxes(range=[0.2, 1.001])
# fig.update_traces(notched=True)

fig.update_layout(
    autosize=False,
    width=820,
    height=550
)
fig.add_hline(y=pandas.DataFrame.median(cellcnn[metric]), line_width=3, line_dash="dash", line_color="green")
fig = add_p_value_annotation(fig, [[0,2], [0,4], [0,6]])
fig.show()
fig.write_image(fileToWrite+"_MultiCell.pdf")