## Import

In [10]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os
import numpy as np
from statannotations.Annotator import Annotator
from lifelines import KaplanMeierFitter
from lifelines.utils import median_survival_times

## utils function

In [11]:
def remove_outlier(df):
    dead = df[df['survival_original']==1]
    dead_listeria = dead[(dead['Infection']=='Listeria')& (dead['Time'] == 'T8')]
    dead_listeria_index = dead_listeria.dropna(subset=['weight'],inplace=False).index
    infection = df[df['Infection'].isin(["Listeria","S. pneumoniae"])]
    to_high_index = infection[(infection['weight']>112) & (infection['Time'].isin(["T1","T2","T3"]))].index
    to_low_index = infection[(infection['weight']<60)].index
    #to_remove = to_high_index + to_low_index
    weight_impossible_value = to_high_index.union(to_low_index)
    index_to_remove = dead_listeria_index.union(weight_impossible_value) 
    df_clean = df.drop(index_to_remove)
    return df_clean

## Analysis

### import and cleaning

In [12]:
df = pd.read_excel("./data/df_long_format_for_analysis.xlsx",index_col=0)

In [13]:
df

Unnamed: 0,ID_Experiment,Mouse_ID,Date,Infection,Group,exp,survival_original,t_origin,Time,weight
0,ID_001,TRO-05432,2014-06-05,C. albicans,1A,1,1,9.0,Tinfection,100.0
1,ID_001,TRO-05433,2014-06-05,C. albicans,1A,1,1,9.0,Tinfection,100.0
2,ID_001,TRO-05434,2014-06-05,C. albicans,1A,1,1,9.0,Tinfection,100.0
3,ID_001,TRO-05435,2014-06-05,C. albicans,1A,1,1,6.0,Tinfection,100.0
4,ID_001,TRO-05456,2014-06-05,C. albicans,1A,1,1,7.0,Tinfection,100.0
...,...,...,...,...,...,...,...,...,...,...
32993,ID_096,TRO-028337,2023-03-03,S. pneumoniae,3,3,1,5.0,T13,
32994,ID_096,TRO-028338,2023-03-03,S. pneumoniae,3,3,1,4.0,T13,
32995,ID_096,TRO-028339,2023-03-03,S. pneumoniae,3,3,1,6.0,T13,
32996,ID_096,TRO-028342,2023-03-03,S. pneumoniae,3,3,0,8.0,T13,


In [14]:
df_clean = remove_outlier(df)
status_mapping = {0: 'Alive', 1: 'Dead'}
df_clean["survival_original"] = df_clean['survival_original'].replace(status_mapping)

infection_mapping = {'Listeria':"L. monocytogenes"}
df_clean['Infection'] =df_clean['Infection'].replace(infection_mapping)

df_clean.rename(columns={'survival_original':'Mice'},inplace=True)

time_rename = list(np.arange(0,15,1))
time_unique = df_clean['Time'].unique()
time_mapping = dict(zip(time_unique,time_rename))
df_clean['Time'] = df_clean['Time'].replace(time_mapping)
df_clean


Unnamed: 0,ID_Experiment,Mouse_ID,Date,Infection,Group,exp,Mice,t_origin,Time,weight
0,ID_001,TRO-05432,2014-06-05,C. albicans,1A,1,Dead,9.0,0,100.0
1,ID_001,TRO-05433,2014-06-05,C. albicans,1A,1,Dead,9.0,0,100.0
2,ID_001,TRO-05434,2014-06-05,C. albicans,1A,1,Dead,9.0,0,100.0
3,ID_001,TRO-05435,2014-06-05,C. albicans,1A,1,Dead,6.0,0,100.0
4,ID_001,TRO-05456,2014-06-05,C. albicans,1A,1,Dead,7.0,0,100.0
...,...,...,...,...,...,...,...,...,...,...
32993,ID_096,TRO-028337,2023-03-03,S. pneumoniae,3,3,Dead,5.0,13,
32994,ID_096,TRO-028338,2023-03-03,S. pneumoniae,3,3,Dead,4.0,13,
32995,ID_096,TRO-028339,2023-03-03,S. pneumoniae,3,3,Dead,6.0,13,
32996,ID_096,TRO-028342,2023-03-03,S. pneumoniae,3,3,Alive,8.0,13,


### time to event

In [15]:
kmf = KaplanMeierFitter()
# A4 page dimensions in inches

df_KP = df_clean.copy()
df_KP['Mice'] = df_KP['Mice'].replace({'Dead':1,'Alive':0})

kmf.fit(df_KP["t_origin"], df_KP["Mice"], label='Total')
median_ = kmf.median_survival_time_
median_confidence_interval_ = median_survival_times(kmf.confidence_interval_)
print(median_)
print(median_confidence_interval_)

for name, grouped_df in df_KP.groupby('Infection'):
    print(name)
    kmf.fit(grouped_df["t_origin"], grouped_df["Mice"], label=name)
    median_ = kmf.median_survival_time_
    median_confidence_interval_ = median_survival_times(kmf.confidence_interval_)
    print(median_)
    print(median_confidence_interval_)


11.0
     Total_lower_0.95  Total_upper_0.95
0.5              11.0              11.0
C. albicans
inf
     C. albicans_lower_0.95  C. albicans_upper_0.95
0.5                     inf                     inf
H1N1
inf
     H1N1_lower_0.95  H1N1_upper_0.95
0.5              inf              inf
L. monocytogenes
6.0
     L. monocytogenes_lower_0.95  L. monocytogenes_upper_0.95
0.5                          6.0                          6.0
S. pneumoniae
9.0
     S. pneumoniae_lower_0.95  S. pneumoniae_upper_0.95
0.5                       8.0                       9.0


In [16]:
kmf = KaplanMeierFitter()

df_KP = df_clean.copy()
df_KP['Mice'] = df_KP['Mice'].replace({'Dead':1,'Alive':0})
df_dead = df_KP[df_KP['Mice']==1]

kmf.fit(df_dead["t_origin"], df_dead["Mice"], label='Total')
median_ = kmf.median_survival_time_
median_confidence_interval_ = median_survival_times(kmf.confidence_interval_)
print(median_)
print(median_confidence_interval_)

for name, grouped_df in df_dead.groupby('Infection'):
    kmf.fit(grouped_df["t_origin"], grouped_df["Mice"], label=name)
    median_ = kmf.median_survival_time_
    median_confidence_interval_ = median_survival_times(kmf.confidence_interval_)
    print(median_)
    print(median_confidence_interval_)


5.0
     Total_lower_0.95  Total_upper_0.95
0.5               5.0               5.0
8.0
     C. albicans_lower_0.95  C. albicans_upper_0.95
0.5                     8.0                     8.0
7.0
     H1N1_lower_0.95  H1N1_upper_0.95
0.5              7.0              7.0
4.0
     L. monocytogenes_lower_0.95  L. monocytogenes_upper_0.95
0.5                          4.0                          4.0
5.0
     S. pneumoniae_lower_0.95  S. pneumoniae_upper_0.95
0.5                       5.0                       5.0


In [21]:
df = pd.read_excel("./data/df_for_analysis.xlsx",index_col=0)
df.groupby('Infection')['time_original'].mean()


Infection
C. albicans      11.472222
H1N1              9.764881
Listeria          5.665076
S. pneumoniae     7.554785
Name: time_original, dtype: float64

In [23]:
df['time_original'].mean()

7.448451421298261