## Cytokine analysis app

Compiled by: Miguel A. Alcantar

Last updated November 10, 2019

Cytokine app for analyzing first round of cystic fibrosis (CF) data obtained. Data represent estimated concentration values for 20-plex ELISA (04Oct2019 CFvsNHB PA- 20plex Inflam-MR.xlsx).

In [1]:
################### Importing packages ###################

import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from pylab import *
import seaborn as sns
from statannot import add_stat_annotation
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
import math

################### Function that downloads data and calls widgets ###################

# pipeline for extracting data matrix containing cytokine information we need
def cytokine_controller():
    ################### Pre-process excel file ###################

        # loading in entire excel file that comes out of multi-plex machine -- we only care about the sheet "Conc in Range"
    cytokine_raw_df = pd.ExcelFile('../../../../CF/04Oct2019 CFvsNHB PA- 20plex Inflam-MR.xlsx') # 
    cytokine_raw_concs_df = pd.read_excel(cytokine_raw_df, 'Conc in Range', header=7) # skip first 7 rows -- just file/machine info

    # Creating list cotaining elements in first column -- this will help us parse out information we care about
    list_of_well_types = list(cytokine_raw_concs_df.iloc[:,0])

    # find indices of "type" -- again, this helps us parse the data frame
    idx_of_type =  [i for i, x in enumerate(list_of_well_types) if x == "Type"]
    start_df_idx = idx_of_type[1]

    # index dataframe such that it starts 4-rows after we see type (those rows just contain "background")
    # and ends 5 rows before the final row (which just contain labeling information)
    cytokine_concs_df = cytokine_raw_concs_df.iloc[start_df_idx+4:-5:,:]

    # renamining columns and indices
    cytokine_concs_df = cytokine_concs_df.rename(columns = {"Unnamed: 0":"Type", "Unnamed: 1": "Well",
                                                            "Unnamed: 2": "Description" })
    # # only keep columns of interest (cytokines + descriptors )
    cytokine_concs_df = cytokine_concs_df.rename(index= dict(zip(list(cytokine_concs_df.index),list(cytokine_concs_df.loc[:,"Well"])))).drop("Well", axis=1)

    # # plate map -- hard-coded
    plate_map_dict = {
        "N control T0": ['A3', 'A4', 'D3', 'D4', 'E3', 'E4', 'G3','G4' ],
        "CF control T0": ['B3', 'B4', 'C3', 'C4', 'F3','F4','H3', 'H4'],
        "N infect T0": ['A5', 'A6', 'E5', 'E6', 'G5', 'G6'],
        "CF infect T0": ['B5', 'B6', 'C5', 'C6', 'D5', 'D6', 'F5', 'F6', 'H5', 'H6'],
        "N control T8" : ['A7', 'A8', 'C7', 'C8', 'D7', 'D8', 'F7', 'F8'],
        "CF control T8": ['B7', 'B8', 'E7', 'E8', 'G7', 'G8'],
        "N infect T8": ['A9', 'A10', 'D9', 'D10', 'F9', 'F10'],
        "CF infect T8": ['B9', 'B10', 'C9', 'C10', 'E9', 'E10', 'G9', 'G10']
    }

    # adding more parseable meadata to dataframe
    for condition, values in plate_map_dict.items():
        condition_all_info = condition.split(' ')
        for index in range(0,len(values)):
            cytokine_concs_df.loc[values[index], 'Condition'] = condition
            cytokine_concs_df.loc[values[index], 'infection_stat_time'] = " ".join(condition_all_info[1:])
            cytokine_concs_df.loc[values[index], 'disease_status'] = condition_all_info[0]


    # removing OOR values and replacing with NaN
    cytokine_concs_df = cytokine_concs_df.replace(np.nan, 'Standard', regex=True) 
    cytokine_concs_df = cytokine_concs_df.replace('OOR >', np.nan, regex=True)
    cytokine_concs_df = cytokine_concs_df.replace('OOR <', np.nan, regex=True)

    # renaming cytokines so we keep their protein names and remove the parentheses
    cytokines = [cyto.split('(')[0][:-1] for cyto in cytokine_concs_df.columns[2:-4]]
    cytokine_name_replace_dict = dict(zip(list(cytokine_concs_df.columns[2:-4]), cytokines))
    cytokine_concs_df = cytokine_concs_df.rename(columns = cytokine_name_replace_dict)
    plate_map_list = list(plate_map_dict.keys())
    plate_map_list_final = list(set([cond for cond in plate_map_list]))
    potential_conds_to_compare = ["None"] + sorted(set(plate_map_list_final), reverse=True) # , key=plate_map_list_final.index


    # creating widgets for multiple user options:
    # which cytokine to plot
    cytokine_to_plot = widgets.Dropdown(options = cytokines,
                          value = cytokines[0],
                          description="Cytokine:")

    # which transwell to plot
#     transwell_to_plot = widgets.Dropdown(options =['N2', 'N3'],
#                           value = 'N3',
#                           description="N3 or N2?:")
    statistical_test_to_plot = widgets.Dropdown(options =['Mann-Whitney', 't-test_ind', 't-test_ind_ueq'],
                          value = 't-test_ind',
                          description="Stat test:")

    # how to display statistical test
    p_val_to_plot = widgets.Dropdown(options =['star', 'simple'],
                          value = 'simple',
                          description="pVal display:")
   
    # show mean values
    showMeans_to_plot = widgets.Dropdown(options =[False, True],
                          value = False,
                          description="Show means:")
    # condition 1 you want to compare
    condition_1_to_compare = widgets.Dropdown(options = potential_conds_to_compare,
                            value = potential_conds_to_compare[0],
                            description="Condition 1 :")
    # condition 2 you want to compare
    condition_2_to_compare = widgets.Dropdown(options = potential_conds_to_compare,
                            value = potential_conds_to_compare[0],
                            description="Condition 2 :")
    # save the plot
    save_plot = widgets.Dropdown(options =[False, True],
                          value = False,
                          description="save plot:")

    return(widgets.interactive(plot_cytokine,
                       cytokine_concs_df=fixed(cytokine_concs_df),
                        cytokine_name=cytokine_to_plot, 
                       # transwell=transwell_to_plot,
                       statistical_test = statistical_test_to_plot,
                       print_p_val = p_val_to_plot,
                       means=showMeans_to_plot,
                        condition_to_compare_1=condition_1_to_compare,
                        condition_to_compare_2=condition_2_to_compare,   
                       save_plot =save_plot))       

################### function for plotting data ###################

# function used in interactive widget
def plot_cytokine(cytokine_concs_df, cytokine_name, #transwell, 
                  statistical_test, print_p_val, means, condition_to_compare_1,
                  condition_to_compare_2, save_plot):
    
    # create figure object
    fig, ax = plt.subplots(figsize=(10,8), dpi=400)

    # dislay as stars or actual p-value
    if print_p_val == 'star':
        print_p_val = "star" # thresholds: [[1e-4, "****"], [1e-3, "***"], [1e-2, "**"], [0.05, "*"], [1, "ns"]]
    else:
        print_p_val = "simple"

#     # choose to plot N3 or N2 wells
#     if transwell == "N3":
        conditions_order = ["control T0", "infect T0", 'control T8', 'infect T8']
#     else:
#         conditions_order = ["0.01", '0.05']
    
    # color schemes
    light_pink = '#FFC9EC' 
    light_brown = '#E5B699'
    dark_brown = '#B25116'
    dark_pink = '#FB84D1'

    # pallete for boxplot and stripplot
    face_pal = {'N': light_brown, 'CF': light_pink}
    pal = {'N': dark_brown, 'CF': dark_pink}
    
    # order to plot results
    hue_order = ['N', 'CF']

    # plot properties
    boxprops = {'edgecolor': 'k', 'linewidth': 2}
    lineprops = {'color': 'k', 'linewidth': 2}
    boxplot_kwargs = {'boxprops': boxprops, 'medianprops': lineprops,
                      'whiskerprops': lineprops, 'capprops': lineprops,
                      'width': 0.75, 'palette': face_pal,
                      'hue_order': hue_order}
    stripplot_kwargs = {'linewidth': 0.6, 'size': 6, 'alpha': 0.9,
                        'palette': pal, 'hue_order': hue_order}
    
    # removing standards from dataframe to simplify plotting 
    cytokine_concs_remove_standards_df = cytokine_concs_df[cytokine_concs_df['Condition'] != 'Standard']
    
    # choosing transwell -- argument for plotting
    cytokine_concs_specify_transwell_df = cytokine_concs_remove_standards_df.copy()#cytokine_concs_remove_standards_df[cytokine_concs_remove_standards_df['transwell'] == transwell]
    cytokine_name = cytokine_name
    
    # Plot, using all the same parameters as above
    sns.boxplot(x='infection_stat_time', y=cytokine_name, hue='disease_status', data=cytokine_concs_specify_transwell_df, ax=ax,
        fliersize=0, showmeans=means, order=conditions_order,**boxplot_kwargs)
    sns.swarmplot(x='infection_stat_time', y=cytokine_name, hue='disease_status', data=cytokine_concs_specify_transwell_df, ax=ax,
        dodge=True, order=conditions_order, **stripplot_kwargs) #jitter=0.2,

    # removing double legend 
    handles, labels = ax.get_legend_handles_labels()
    lgd = ax.legend(handles[0:2], labels[0:2],
                   loc='upper right',
                   fontsize='large',
                   handletextpad=0.5)
    lgd.legendHandles[0]._sizes = [40]
    lgd.legendHandles[1]._sizes = [40]
    plt.xlabel('Condition')
    plt.ylabel(cytokine_name + " (pg/$\mu$L)")
    
    # only perform statistical tests if there is greater than one value for cytokine
    
    # these variables help with choosign which values to test
    conditions_all = list(cytokine_concs_specify_transwell_df["Condition"])
    unique_conditions_list = list(unique([' '.join(cond.split(' ')[1:]) for cond in conditions_all]))
    conditions_count_dict = dict(zip(unique_conditions_list, [[0,0]] * len(unique_conditions_list)))

    # see which conditions DON'T have an NAN value
    for conc, cond in cytokine_concs_specify_transwell_df[[cytokine_name, 'Condition']].itertuples(index=False):
        cond_parse = cond.split(' ')

        patient_status = cond_parse[0]
        full_condition = ' '.join(cond_parse[1:])
        if math.isnan(conc):
            continue
        else:
            if patient_status == "N":
                conditions_count_dict[full_condition] =[conditions_count_dict[full_condition][0]+1, conditions_count_dict[full_condition][1]]
            elif patient_status == "CF":
                conditions_count_dict[full_condition] =[conditions_count_dict[full_condition][0], conditions_count_dict[full_condition][1]+1]
            else:
                print(full_condition + ": incorrect condition")
            
    # make a copy of the dictionary -- this will simplify determining which conditions can and can't be compared
    conditions_count_numbs_dict = conditions_count_dict.copy()
    # specifying which pairs should be compared
    for key in conditions_count_dict.keys():
        NHB_value = conditions_count_dict[key][0]
        COPD_value = conditions_count_dict[key][1]
        if NHB_value > 1 and COPD_value > 1: # NHB_value == COPD_value and
            conditions_count_dict[key] = True
        else:
            conditions_count_dict[key] = False
    # initializing box-plot comparisions
#     if transwell == "N3":
    box_pairs=[(("infect T8", "N"), ("infect T8", "CF")),
    (("control T8", "N"), ("control T8", "CF")),
    (("infect T0", "N"), ("infect T0", "CF")),
    (("control T0", "N"), ("control T0", "CF"))
    ]
#     else:
#         box_pairs=[
#         (("0.01", "NHB"), ("0.01", "COPD")),
#         (("0.05", "NHB"), ("0.05", "COPD"))
#         ]
         
    condition_tracker = {"NHB": 0,
                        "COPD": 1}
    # remove pairs to compare if their corresponding condition does not have at least 2 values
    pairs_to_remove = []
    for pairs in box_pairs:
        MOI = pairs[0][0]
#         if MOI == "control":
#         rest = ' ' + transwell
#         else:
#             rest = " MOI " + transwell
        if not conditions_count_dict[MOI]: #+rest
            pairs_to_remove.append(pairs)

    for pair_to_remove in pairs_to_remove:
          box_pairs.remove(pair_to_remove)
    
    # custom condition to compare (only compare if data is present, otherwise, error gets thrown)
    condition_pair_to_compare = []
    box_pairs2 = box_pairs
    if condition_to_compare_1=="None" or condition_to_compare_2=="None" or condition_to_compare_1 == condition_to_compare_2:
        box_pairs2 = box_pairs
    else:
        condition_to_compare_1_split = condition_to_compare_1.split(' ')
        condition_to_compare_2_split = condition_to_compare_2.split(' ')
        # stuff for the first condition
#         if "control" in condition_to_compare_1:
#             MOI_1 = "control"
#         else:
        MOI_1 = condition_to_compare_1_split[1]
        condition_1 = condition_to_compare_1_split[0]

#         # stuff for the second condition
#         if "control" in condition_to_compare_2:
#             MOI_2 = "control"
#         else:
        MOI_2 = condition_to_compare_2_split[1]
        condition_2 = condition_to_compare_2_split[0]
        
        condition_tracker_1 = condition_tracker[condition_1]
        condition_tracker_2 = condition_tracker[condition_2]
        # check to see if we can even compare these conditions
        try:
            if conditions_count_numbs_dict[" ".join(condition_to_compare_1_split[1:])][condition_tracker_1] > 1 and conditions_count_numbs_dict[" ".join(condition_to_compare_2_split[1:])][condition_tracker_2]>1:
                box_pairs2.append(((MOI_1, condition_1), (MOI_2, condition_2)))
        except KeyError:
            nothing=1

    try:
        if box_pairs:
    # conduct statistcal test and annotate plot
            add_stat_annotation(ax, data=cytokine_concs_specify_transwell_df, x='infection_stat_time', y=cytokine_name, hue='disease_status',order=conditions_order,
                                box_pairs=box_pairs2,
                                test=statistical_test, text_format=print_p_val, loc='inside', verbose=0)    
    except ValueError:
        if box_pairs:
    # conduct statistcal test and annotate plot
            add_stat_annotation(ax, data=cytokine_concs_specify_transwell_df, x='infection_stat_time', y=cytokine_name, hue='disease_status',order=conditions_order,
                                box_pairs=box_pairs,
                                test=statistical_test, text_format=print_p_val, loc='inside', verbose=0) 
    if save_plot:
        output_dir = "../../../../CF/figs/"
        plt.savefig(output_dir + cytokine_name.replace('/','_') +".pdf")
        plt.savefig(output_dir + cytokine_name.replace('/','_') + ".png")
        plt.savefig(output_dir + cytokine_name.replace('/','_')  +".svg")
    return(box_pairs)
cytokine_controller()

interactive(children=(Dropdown(description='Cytokine:', options=('E-Selectin', 'GM-CSF', 'IFN-a', 'IFN-g', 'IL…