In [1]:
import pandas as pd
import numpy as np
from IPython.display import display
import matplotlib.pyplot as plt
import seaborn as sns
import os
import shutil
from statannot import add_stat_annotation
from scipy import stats
from statsmodels.stats.multitest import multipletests

### Input Files 

In [2]:
ec50_file = '../input/ec50_values.xlsx'
survival_file = '../input/severity_category.xlsx'
oxygen_file = '../input/oxygensuppl_category.xlsx'
ethinicity_file = '../input/ethnicity_category.xlsx'
race_file = '../input/race_category.xlsx'

### Objects

In [3]:
os.chdir('classes')
%run longit.ipynb
ecf = LONGIT()
%run categories.ipynb
cat = CATEGORY()
etnic = ETHNICITY()
race = RACE()
%run correct_pval.ipynb
os.chdir('..')

### Initializing directories  

In [4]:
outdir = '../output/'
targetVar = 'EC50'
# Initialize directory if exists
if os.path.isdir(outdir + targetVar):
    shutil.rmtree(outdir + targetVar)
os.mkdir(outdir + targetVar)

# Overview
if not os.path.isdir(outdir + targetVar + '/overview'):
    os.mkdir(outdir + targetVar + '/overview')
if not os.path.isdir(outdir + targetVar + '/overview_win5'):
    os.mkdir(outdir + targetVar + '/overview_win5')

# Survival
if not os.path.isdir(outdir + targetVar + '/survival_win5'):
    os.mkdir(outdir + targetVar + '/survival_win5')

# Oxygen
if not os.path.isdir(outdir + targetVar + '/oxygenation_win5'):
    os.mkdir(outdir + targetVar + '/oxygenation_win5')

# Severity
if not os.path.isdir(outdir + targetVar + '/severity_win5'):
    os.mkdir(outdir + targetVar + '/severity_win5')

# Ethnicity
if not os.path.isdir(outdir + targetVar + '/ethnicity_win5'):
    os.mkdir(outdir + targetVar + '/ethnicity_win5')

# Race
if not os.path.isdir(outdir + targetVar + '/race_win5'):
    os.mkdir(outdir + targetVar + '/race_win5')# Severity

### Load EC50 file and perform initial processing

In [5]:
# load input file with EC50 values
ecf.loadfiletodf(ec50_file)
# print('rows: {}\tcolumns:{}'.format(ecf.df.shape[0], ecf.df.shape[1]))

# Remove useless columns
col_2_del =['Notes','run1_description','run1_date','run1_1/EC50','run2_date','run2_description',
            'run2_1/EC50','(run1_1/EC50)/(run2_1/EC50)','Sample#']
df_1 = ecf.df.drop(columns=col_2_del)
df_1 = df_1.rename(columns={"sampleID_onset_080620": "sampleID_onset"})
# print('rows: {}\tcolumns:{}'.format(df_1.shape[0], df_1.shape[1]))

# Remove samples with missing elements
df_missing, df_2 = ecf.samples_with_missing_elements(df_1)
# display(df_missing)
# print('rows: {}\tcolumns:{}'.format(df_2.shape[0], df_2.shape[1]))

# Reformat onset
df_3 = ecf.reformat_onset(df_2)
# print('rows: {}\tcolumns:{}'.format(df_3.shape[0], df_3.shape[1]))

In [6]:
# Remove samples where EC50 dissagree by more than x
thresh = 3
df4_incoherent, df4 = ecf.incoherent_ec50(df_3, thresh)
print('Rows with incoherent EC50 values:',len(df4_incoherent))
print('Rows passing filter:',len(df4))
display(df4)

Rows with incoherent EC50 values: 0
Rows passing filter: 1056


Unnamed: 0,sampleID,Pt#,sampleID_onset,run1_EC50,run2_EC50,average_EC50
0,6-01,6.0,7,0.000951,0.000676,0.000814
1,6-02,6.0,8,0.000857,0.000484,0.000670
2,6-03,6.0,9,0.000321,0.000343,0.000332
3,6-04,6.0,10,0.000166,0.000197,0.000182
4,6-05,6.0,11,0.000028,0.000038,0.000033
...,...,...,...,...,...,...
1052,195-4,195.0,18,0.000013,0.000017,0.000015
1053,196-0,196.0,7,0.003363,0.002265,0.002814
1054,196-2,196.0,9,0.000461,0.000823,0.000642
1055,196-4,196.0,11,0.000102,0.000134,0.000118


### Loading survival data

In [7]:
df_surv = pd.read_excel(survival_file,index_col=0,header=0)
df_surv_gr = df_surv.groupby(['Outcome'])
# Outcome: (0-home, 1-nursing home, 2-dead)
# Survival groups: groups 0 & 1
survival_indx_ar = np.concatenate([np.array(df_surv_gr.get_group('Mild').index), np.array(df_surv_gr.get_group('Severe').index)])
nonsurvival_indx_ar = np.array(df_surv_gr.get_group('Non-survival').index)

### Loading patient categories

In [8]:
# Oxygen supplementation
df_ox = cat.oxygen_supply_to_df(oxygen_file)
# Ethnicity
df_etnic = etnic.cat_to_df(ethinicity_file)
# Race
df_race = race.cat_to_df(race_file)

### Loading EC50 data into df with right format for plotting

In [9]:
# Transform data for plotting
df_var = ecf.transform_df_for_plotting(df4,'Pt#','sampleID_onset','average_EC50')
df_var = -np.log10(df_var)
df_var_win5 = ecf.average_EC50_slidingWindow(df_var, 2)

### New dataframes with survival data, oxygenation data and severity data

In [10]:
# add survival category to df
df_var_surv_win5 = ecf.add_survival_to_df(df_var_win5, survival_indx_ar, nonsurvival_indx_ar)
df_var_surv_win5.to_excel(outdir + targetVar + '/survival_win5/' + targetVar + '_survival.xlsx')

In [11]:
# Add oxygenation to df
df_var_ox_win5 = ecf.add_category_to_df(df_var_win5, df_ox)

for index, row in df_var_ox_win5.iterrows():
    if row['Patient'] in nonsurvival_indx_ar:
        df_var_ox_win5.at[index, 'Category'] = 'Non-survival'

df_var_ox_win5.to_excel(outdir + targetVar + '/oxygenation_win5/' + targetVar + '_oxygen_win5.xlsx')

In [12]:
# Add severity to new df
cat_four = ['Room air','Nasal canula', 'NRM','CPAP-BPAP','HiFlow', 'Intubation']
cat_two = ['Room_air','Mild', 'Mild', 'Severe','Severe', 'Severe']
df_var_ox2_win5 = df_var_ox_win5.copy()
df_var_ox2_win5.replace(cat_four, cat_two, inplace=True)

for index, row in df_var_ox2_win5.iterrows():
    if row['Patient'] in nonsurvival_indx_ar:
        df_var_ox2_win5.at[index, 'Category'] = 'Non-survival'

df_var_ox2_win5.to_excel(outdir + targetVar + '/severity_win5/' + targetVar + '_oxygen_5cat_win.xlsx')

In [13]:
# Add ethnicity to df
df_var_etnic_win5 = ecf.add_category_to_df(df_var_win5, df_etnic)
df_var_etnic_win5.to_excel(outdir + targetVar + '/ethnicity_win5/' + targetVar + '_ethnicity_win5.xlsx')

In [14]:
# Add race to df
df_var_race_win5 = ecf.add_category_to_df(df_var_win5, df_race)
df_var_race_win5.to_excel(outdir + targetVar + '/race_win5/' + targetVar + '_race_win5.xlsx')

### New df with all info for a given variable of interest 

In [15]:
# Making a dataframe with all the required info
df_var_info = df_var.copy()
df_var_win5_info = df_var_win5.copy()

# Survival data
for item in survival_indx_ar:
    df_var_info.loc[item, 'Survival'] = 'Survival'
    df_var_win5_info.loc[item, 'Survival'] = 'Survival'
for item in nonsurvival_indx_ar:
    df_var_info.loc[item, 'Survival'] = 'Non-Survival'
    df_var_win5_info.loc[item, 'Survival'] = 'Non-Survival'
# Oxygenation type
for patient, row in df_ox.iterrows():
    cat = row[0]
    df_var_info.loc[patient, 'Oxygenation'] = cat
    df_var_win5_info.loc[patient, 'Oxygenation'] = cat
    if df_var_info.loc[patient,'Survival'] == 'Survival':
        if cat == 'Room air':
            df_var_info.loc[patient, 'Category'] = 'Room air'
            df_var_win5_info.loc[patient, 'Category'] = 'Room air'
        elif cat == 'Nasal canula' or cat == 'NRM':
            df_var_info.loc[patient, 'Category'] = 'Mild'
            df_var_win5_info.loc[patient, 'Category'] = 'Mild'
        elif cat == 'HiFlow' or cat == 'Intubation' or cat == 'CPAP-BPAP':
            df_var_info.loc[patient, 'Category'] = 'Severe'
            df_var_win5_info.loc[patient, 'Category'] = 'Severe'
    elif df_var_info.loc[patient,'Survival'] == 'Non-Survival':
        df_var_info.loc[patient, 'Category'] = 'Non-Survival'
        df_var_win5_info.loc[patient, 'Category'] = 'Non-Survival'
# ethnicity
for patient, row in df_etnic.iterrows():
#     print(row[0])
    cat = row[0]
    df_var_info.loc[patient, 'Ethnicity'] = cat
    df_var_win5_info.loc[patient, 'Ethnicity'] = cat
    
# race
for patient, row in df_race.iterrows():
    cat = row[0]
    df_var_info.loc[patient, 'Race'] = cat
    df_var_win5_info.loc[patient, 'Race'] = cat
    
df_var_info.to_excel(outdir + targetVar + '/' + targetVar + '_dataframe.xlsx')
df_var_win5_info.to_excel(outdir + targetVar + '/' + targetVar + '_win5_dataframe.xlsx')

### Plotting data coverage and histograms¶

In [16]:
%%capture
# Plot datapoints per day
ecf.plot_points_per_day(df_var,outdir + targetVar + '/overview/' + targetVar + '_histogram_patients_per_onsetday.pdf')
ecf.plot_points_per_day(df_var_win5,outdir + targetVar + '/overview_win5/' + targetVar + '_histogram_patients_per_onsetday_win5.pdf')
# Plot datapoints per patient
ecf.plot_points_per_patient(df_var,outdir + targetVar + '/overview/' + targetVar + '_histogram_patients_per_readoutDays.pdf')
ecf.plot_points_per_patient(df_var_win5,outdir + targetVar + '/overview_win5/' + targetVar + '_histogram_patients_per_readoutDays_win5.pdf')
# Plot available data for each patient in a clustermap
ecf.plot_clustermap_availdata(df_var,outdir + targetVar + '/overview/' + targetVar + '_data_coverage.pdf')
ecf.plot_clustermap_availdata(df_var_win5,outdir + targetVar + '/overview_win5/' + targetVar + '_data_coverage_win5.pdf')

### Plotting raw data for each individual patient

In [17]:
# Each day no window
ecf.plot_ec50(np.array(df_var.index), df_var, outdir + targetVar + '/time_plot', \
              targetVar + '_timeline_all.pdf', targetVar)
ecf.plot_ec50(np.array(df_var.index), df_var_win5, outdir + targetVar + '/time_plot_win5', \
              targetVar + '_timeline_all_win5.pdf', targetVar)
for indx in np.array(df_var.index):
    outfile = 'pat_' + str(indx) + '.pdf'
    ecf.plot_ec50([indx], df_var, outdir + targetVar + '/time_plot', outfile, targetVar)
    ecf.plot_ec50([indx], df_var_win5, outdir + targetVar + '/time_plot_win5', outfile, targetVar)

In [18]:
# Plotting representative patients
indx_ar = [22.0, 71.0, 6.0]
color_ar = ['#626567', '#5499C7', '#CD6155']
i = 0
f, ax = plt.subplots(figsize=[5,10])
for indx in indx_ar:
    print(indx)
    col_ar = np.array(df_var_win5.columns)
    val_ar = np.array(df_var_win5.loc[indx])
    col_clean = col_ar[np.isfinite(val_ar)]
    val_clean = val_ar[np.isfinite(val_ar)]
    plt.plot(col_clean, val_clean,'o-', label=indx, markersize=6, linewidth=1.5, color=color_ar[i])    
    i += 1
label_font = {'fontname':'Helvetica Neue', 'fontsize':16}
axis_font = {'fontname':'Helvetica Neue', 'fontsize':16}
legend_font = {'fontname':'Helvetica Neue', 'fontsize':18}
title_font = {'fontname':'Helvetica Neue', 'fontsize':20}
plt.ylim(0,6)
plt.xticks(np.arange(0,col_ar.max()+4,4))
plt.xlim(4,25)
plt.xlabel('Days after onset')
plt.ylabel(targetVar)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['bottom'].set_linewidth(1)
ax.spines['left'].set_linewidth(1)
plt.ylabel('-log10(EC50)')
plt.xlabel('Days after onset')
outfile = 'representative_patients_22_71_6.pdf'
outdir1 = outdir + targetVar + '/time_plot_win5'
f.savefig(outdir1 + '/' + outfile,dpi=300, transparent=True)
f.clear()
plt.close(f)    

22.0
71.0
6.0


### Plotting survival/nonsurvival using window5

In [19]:
%%capture
# Gather all pvalues for multiple testing correction
# apply only to days with a mimimun number of samples per category (survival, non-survival)
min_size_pvalcor = 10
day_2_pval, day_2_pvalcor = ecf.multipletesting_day_to_pval(df_var_surv_win5, min_size_pvalcor, False)
cat_order = ['Survival','Non-survival']
palette_order = ['#98FAF3','#E8384F']
# boxplot for each day, statistically comparing survival Vs nonSurvival
day_st = set(df_var_surv_win5['Day'].tolist())
for day in day_st:
    ecf.plot_ec50_surival_boxplot(day, df_var_surv_win5, \
                                  outdir + targetVar + '/survival_win5/'+ targetVar + '_survival_boxplot_win5',
                                  day_2_pval, day_2_pvalcor, min_size_pvalcor, False, targetVar, 0, 6, cat_order, palette_order)

In [20]:
# boxplot and lineplot trajectories
ecf.plot_ec50_survival_trajectory(df_var_surv_win5, \
                                  outdir + targetVar + '/survival_win5/'+ targetVar + '_survival_lineplot_trajectory_win5_mean.pdf', \
                                  False, targetVar, 'lineplot', 'mean', 0, 0, cat_order, palette_order)
ecf.plot_ec50_survival_trajectory(df_var_surv_win5, \
                                  outdir + targetVar + '/survival_win5/'+ targetVar + '_survival_lineplot_trajectory_win5_median.pdf', \
                                  False, targetVar, 'lineplot', 'median', 0, 0, cat_order, palette_order)

In [21]:
ecf.plot_ec50_survival_trajectory(df_var_surv_win5, \
                                  outdir + targetVar + '/survival_win5/'+ targetVar + '_survival_lineplot_trajectory_win5_mean_closeup.pdf', \
                                  False, targetVar, 'lineplot', 'mean', 30, 6, cat_order, palette_order)
ecf.plot_ec50_survival_trajectory(df_var_surv_win5, \
                                  outdir + targetVar + '/survival_win5/'+ targetVar + '_survival_lineplot_trajectory_win5_median_closeup.pdf', \
                                  False, targetVar, 'lineplot', 'median', 30, 6, cat_order, palette_order)

In [22]:
# All against all statistical comparison with mann-whitey
day_st = set(df_var_surv_win5['Day'].tolist())
cat_order = ['Non-survival','Survival']
for day in day_st:
    ecf.all_against_all_pairwise_mann_whitney(df_var_surv_win5, day,  outdir + targetVar + '/survival_win5',
                                              '/'+ targetVar + '_survival_pairw_stats_win5' + '.txt', \
                                              cat_order)

In [23]:
# Correct P-value
correct = CORRECTPVAL(outdir + targetVar + '/survival_win5/'+ targetVar + '_survival_pairw_stats_win5' + '.txt')
correct.readfile()
correct.correctpval_byday(0.05)
correct.save_correction()

In [24]:
day_st = set(df_var_surv_win5['Day'].tolist())
for day in day_st:
    ecf.plot_targetVar_violinplot(day, df_var_surv_win5, \
                                  outdir + targetVar + '/survival_win5/'+ targetVar + '_survival_violinplot_win5', \
                                  False, targetVar, 0, 0, cat_order, palette_order)

In [25]:
ecf.plot_targetVar_violinplot('All', df_var_surv_win5, \
                                  outdir + targetVar + '/survival_win5/'+ targetVar + '_survival_violinplot_win5', \
                                  False, targetVar, 0, 0, cat_order, palette_order)

### Plotting oxygen supplementation using window5

In [26]:
cat_order = ['Room air','Nasal canula', 'NRM','CPAP-BPAP','HiFlow', 'Intubation', 'Non-survival']
palette_order = ['#4178BC','#ABB2B9','#D6EAF8','#ABEBC6','#F6DDCC','#D7BDE2','#E8384F']

In [27]:
# Trajectory
ecf.lineplot_ec50_category_trajectory(df_var_ox_win5, 
                                      outdir + targetVar + '/oxygenation_win5/'+ targetVar + '_oxygenation_lineplot_trajectory_oxygen_win5_mean.pdf', \
                                      '', '', targetVar, 'mean', cat_order, palette_order,0,0)
ecf.lineplot_ec50_category_trajectory(df_var_ox_win5, 
                                      outdir + targetVar + '/oxygenation_win5/'+ targetVar + '_oxygenation_lineplot_trajectory_oxygen_win5_median.pdf', \
                                      '', '', targetVar, 'median', cat_order, palette_order,0,0)

In [28]:
# Trajectory
ecf.lineplot_ec50_category_trajectory(df_var_ox_win5, 
                                      outdir + targetVar + '/oxygenation_win5/'+ targetVar + '_oxygenation_lineplot_trajectory_oxygen_win5_mean_closedUP.pdf', \
                                      30, '', targetVar, 'mean', cat_order, palette_order,0,0)

In [29]:
# boxplot_day_by_day
day_st = set(df_var_ox_win5['Day'].tolist())
for day in day_st:
    ecf.plot_ec50_category_boxplot(day, df_var_ox_win5, outdir + targetVar + '/oxygenation_win5/oxygen_supp_boxplot_win5', 
                                   'oxygen_suppy', cat_order, 0, 6, targetVar, palette_order)

In [30]:
cat_order_noRoom = ['Nasal canula', 'NRM','HiFlow', 'Intubation', 'Non-survival']
palette_order_noRoom = ['#ABB2B9','#D6EAF8','#F6DDCC','#D7BDE2','#E8384F']
for day in day_st:
    ecf.plot_ec50_category_boxplot(day, df_var_ox_win5, outdir + targetVar + '/oxygenation_win5/oxygen_supp_boxplot_win5_noRoom', 
                                   'oxygen_suppy', cat_order_noRoom, 0, 6, targetVar, palette_order_noRoom)

In [31]:
# All against all statistical comparison with mann-whitey
for day in day_st:
    ecf.all_against_all_pairwise_mann_whitney(df_var_ox_win5, day,  outdir + targetVar + '/oxygenation_win5',
                                              '/' + targetVar + '_oxygenation_pairw_stats_win5' + '.txt', \
                                              cat_order)

In [32]:
# Correct P-value
correct = CORRECTPVAL(outdir + targetVar + '/oxygenation_win5/' + targetVar + '_oxygenation_pairw_stats_win5' + '.txt')
correct.readfile()
correct.correctpval_byday(0.05)
correct.save_correction()

### Plotting severity using window5¶

In [33]:
cat_order = ['Room_air', 'Mild', 'Severe', 'Non-survival']
palette_order = ['#4178BC','#37A862','#D68910','#E8384F']

In [34]:
# Trajectories
ecf.lineplot_ec50_category_trajectory(df_var_ox2_win5, \
                                      outdir + targetVar + '/severity_win5/' + targetVar + '_lineplot_severity_5cat_win5_mean.pdf', 
                                      '', '', targetVar, 'mean', cat_order, palette_order,0,0)
ecf.lineplot_ec50_category_trajectory(df_var_ox2_win5, \
                                      outdir + targetVar + '/severity_win5/' + targetVar + '_lineplot_severity_5cat_win5_median.pdf',
                                      '', '', targetVar, 'median', cat_order, palette_order,0,0)

In [35]:
ecf.lineplot_ec50_category_trajectory(df_var_ox2_win5, \
                                      outdir + targetVar + '/severity_win5/' + targetVar + '_lineplot_severity_5cat_win5_mean_closedup.pdf', 
                                      30, 6, targetVar, 'mean', cat_order, palette_order, 0, 0)
ecf.lineplot_ec50_category_trajectory(df_var_ox2_win5, \
                                      outdir + targetVar + '/severity_win5/' + targetVar + '_lineplot_severity_5cat_win5_median_closedup.pdf',
                                      30, 6, targetVar, 'median', cat_order, palette_order, 0, 0)

In [36]:
# boxplot_day_by_day
day_st = set(df_var_ox2_win5['Day'].tolist())
for day in day_st:
    ecf.plot_ec50_category_boxplot(day, df_var_ox2_win5, \
                                   outdir + targetVar + '/severity_win5/severity_5cat_boxplot_win5', 
                                   'oxygen_suppy', cat_order, 0, 6, targetVar, palette_order)

In [37]:
# All against all statistical comparison with mann-whitney
for day in day_st:
    ecf.all_against_all_pairwise_mann_whitney(df_var_ox2_win5, day, outdir + targetVar + '/severity_win5/',
                                              targetVar + '_severity_5cat_pairw_stats_win5' + '.txt', cat_order)

In [38]:
# Correct P-value
correct = CORRECTPVAL(outdir + targetVar + '/severity_win5/' + targetVar + '_severity_5cat_pairw_stats_win5' + '.txt')
correct.readfile()
correct.correctpval_byday(0.05)
correct.save_correction()

### Plotting ethnicity using window5¶

In [39]:
cat_order = ['Hispanic', 'Non-hispanic', 'Unknown']
palette_order = ['#4178BC','#37A862','#D68910']

In [40]:
# Trajectories
ecf.lineplot_ec50_category_trajectory(df_var_etnic_win5, \
                                      outdir + targetVar + '/ethnicity_win5/' + targetVar + '_lineplot_ethnicity_3cat_win5_mean.pdf', 
                                      '', '', targetVar, 'mean', cat_order, palette_order, '', '')
ecf.lineplot_ec50_category_trajectory(df_var_etnic_win5, \
                                      outdir + targetVar + '/ethnicity_win5/' + targetVar + '_lineplot_ethnicity_3cat_win5_median.pdf',
                                      '', '', targetVar, 'median', cat_order, palette_order, '', '')

In [41]:
xmin, xmax, ymin, ymax = 0, 30, 2, 5
ecf.lineplot_ec50_category_trajectory(df_var_etnic_win5, \
                                      outdir + targetVar + '/ethnicity_win5/' + targetVar + '_lineplot_ethnicity_3cat_win5_mean_closedup.pdf', \
                                      xmax, ymax, targetVar, 'mean', cat_order, palette_order,xmin,ymin)
ecf.lineplot_ec50_category_trajectory(df_var_etnic_win5, \
                                      outdir + targetVar + '/ethnicity_win5/' + targetVar + '_lineplot_ethnicity_3cat_win5_median_closedup.pdf',\
                                      xmax, ymax, targetVar, 'median', cat_order, palette_order,ymin,ymin)

In [42]:
# boxplot_day_by_day
day_st = set(df_var_etnic_win5['Day'].tolist())
for day in day_st:
    ecf.plot_ec50_category_boxplot(day, df_var_etnic_win5, \
                                   outdir + targetVar + '/ethnicity_win5/ethinicity_boxplot_win5', 
                                   'ethnicity', cat_order, 0, 0, targetVar, palette_order)

In [43]:
# All against all statistical comparison with mann-whitney
for day in day_st:
    ecf.all_against_all_pairwise_mann_whitney(df_var_etnic_win5, day, outdir + targetVar + '/ethnicity_win5/',
                                              targetVar + '_ethnicity_pairw_stats_win5' + '.txt', cat_order)

In [44]:
# Correct P-value
correct = CORRECTPVAL(outdir + targetVar + '/ethnicity_win5/' + targetVar + '_ethnicity_pairw_stats_win5' + '.txt')
correct.readfile()
correct.correctpval_byday(0.05)
correct.save_correction()

### Plotting race using window5¶

In [45]:
cat_order = ['Black', 'White', 'Other']
palette_order = ['#454545','#D68910','#1E8449']

In [46]:
# Trajectories
ecf.lineplot_ec50_category_trajectory(df_var_race_win5, \
                                      outdir + targetVar + '/race_win5/' + targetVar + '_lineplot_race_win5_mean.pdf', 
                                      '', '', targetVar, 'mean', cat_order, palette_order, '', '')
ecf.lineplot_ec50_category_trajectory(df_var_race_win5, \
                                      outdir + targetVar + '/race_win5/' + targetVar + '_lineplot_race_win5_median.pdf',
                                      '', '', targetVar, 'median', cat_order, palette_order, '', '')

In [47]:
xmin, xmax, ymin, ymax = 0, 30, 2, 5
ecf.lineplot_ec50_category_trajectory(df_var_race_win5, \
                                      outdir + targetVar + '/race_win5/' + targetVar + '_lineplot_race_win5_mean_closedup.pdf', \
                                      xmax, ymax, targetVar, 'mean', cat_order, palette_order,xmin,ymin)
ecf.lineplot_ec50_category_trajectory(df_var_race_win5, \
                                      outdir + targetVar + '/race_win5/' + targetVar + '_lineplot_race_win5_median_closedup.pdf',\
                                      xmax, ymax, targetVar, 'median', cat_order, palette_order,ymin,ymin)

In [48]:
# boxplot_day_by_day
day_st = set(df_var_race_win5['Day'].tolist())
for day in day_st:
    ecf.plot_ec50_category_boxplot(day, df_var_race_win5, \
                                   outdir + targetVar + '/race_win5/ethinicity_boxplot_win5', 
                                   'race', cat_order, 0, 0, targetVar, palette_order)

In [49]:
# All against all statistical comparison with mann-whitney
for day in day_st:
    ecf.all_against_all_pairwise_mann_whitney(df_var_race_win5, day, outdir + targetVar + '/race_win5/',
                                              targetVar + '_race_pairw_stats_win5' + '.txt', cat_order)

In [50]:
# Correct P-value
correct = CORRECTPVAL(outdir + targetVar + '/race_win5/' + targetVar + '_race_pairw_stats_win5' + '.txt')
correct.readfile()
correct.correctpval_byday(0.05)
correct.save_correction()

### The end

In [51]:
print('Finito')

Finito
