# Introduction

This jupiter notebook will use the data created in statistical_analysis_over_time.ipynb (tidy format) and compute mixed effect model

## Mixed effect model analysis
Model are compute two times:
1. Up to first N days in order to maximize the number of complete input data (no censoring from missing values)
2. A complete model from Day 1 to Day 10

## Load data and packages

In [1]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

import statsmodels.formula.api as smf

In [2]:
df_longer_weight = pd.read_excel("./data/df_long_format_for_analysis.xlsx",index_col=0)
df_longer_weight

Unnamed: 0,ID_Experiment,Mouse_ID,Date,Infection,Group,exp,survival_original,t_origin,Time,weight
0,ID_001,TRO-05432,2014-06-05,C. albicans,1A,1,1,9.0,Tinfection,100.0
1,ID_001,TRO-05433,2014-06-05,C. albicans,1A,1,1,9.0,Tinfection,100.0
2,ID_001,TRO-05434,2014-06-05,C. albicans,1A,1,1,9.0,Tinfection,100.0
3,ID_001,TRO-05435,2014-06-05,C. albicans,1A,1,1,6.0,Tinfection,100.0
4,ID_001,TRO-05456,2014-06-05,C. albicans,1A,1,1,7.0,Tinfection,100.0
...,...,...,...,...,...,...,...,...,...,...
32993,ID_096,TRO-028337,2023-03-03,S. pneumoniae,3,3,1,5.0,T13,
32994,ID_096,TRO-028338,2023-03-03,S. pneumoniae,3,3,1,4.0,T13,
32995,ID_096,TRO-028339,2023-03-03,S. pneumoniae,3,3,1,6.0,T13,
32996,ID_096,TRO-028342,2023-03-03,S. pneumoniae,3,3,0,8.0,T13,


## Mixed effect model
### Function

In [3]:
def Mixed_Effects_Models(df,chosen_infection = 'S. pneumoniae',time_to_exclude = 8,variable='Time',group = 'survival_original'):
    df_infection = df[df['Infection'] == chosen_infection]
    df_infection = df_infection[~df_infection['Time'].isin([f"T{n}" for n in range(time_to_exclude,15,1)])]#remove unused data
    
    time_point = df_infection['Time'].unique()
    weight_point_to_integer = dict(zip(time_point,[n for n in range(len(time_point))]))

    df_infection['Time'] = df_infection['Time'].replace(weight_point_to_integer)
    model = smf.mixedlm(f"weight ~ {variable}+{variable}:{group}",df_infection,groups=df_infection[group],missing="drop").fit()
    return model.summary()

#### S. pneumoniae

In [4]:
# model with 3 days
Mixed_Effects_Models(df_longer_weight,"S. pneumoniae",3,variable = 'Time',group='survival_original')

0,1,2,3
Model:,MixedLM,Dependent Variable:,weight
No. Observations:,2118,Method:,REML
No. Groups:,2,Scale:,23.3736
Min. group size:,981,Log-Likelihood:,-6345.4933
Max. group size:,1137,Converged:,Yes
Mean group size:,1059.0,,

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
Intercept,99.716,0.359,277.904,0.000,99.013,100.419
Time,-0.906,0.195,-4.639,0.000,-1.288,-0.523
Time:survival_original,-2.473,0.273,-9.047,0.000,-3.009,-1.937
Group Var,0.203,0.075,,,,


In [5]:
# full model
Mixed_Effects_Models(df_longer_weight,"S. pneumoniae",14)



0,1,2,3
Model:,MixedLM,Dependent Variable:,weight
No. Observations:,4682,Method:,REML
No. Groups:,2,Scale:,48.5389
Min. group size:,1836,Log-Likelihood:,-15738.0945
Max. group size:,2846,Converged:,Yes
Mean group size:,2341.0,,

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
Intercept,98.151,1.088,90.189,0.000,96.018,100.284
Time,0.172,0.043,3.989,0.000,0.088,0.257
Time:survival_original,-1.864,0.093,-20.121,0.000,-2.046,-1.683
Group Var,2.313,,,,,


#### L. monocytogenes

In [6]:
# model with 3 days
Mixed_Effects_Models(df_longer_weight,"Listeria",3)



0,1,2,3
Model:,MixedLM,Dependent Variable:,weight
No. Observations:,3104,Method:,REML
No. Groups:,2,Scale:,12.4679
Min. group size:,1442,Log-Likelihood:,-8325.8141
Max. group size:,1662,Converged:,No
Mean group size:,1552.0,,

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
Intercept,100.790,0.828,121.670,0.000,99.166,102.414
Time,-3.649,0.106,-34.454,0.000,-3.857,-3.441
Time:survival_original,-2.674,0.155,-17.242,0.000,-2.978,-2.370
Group Var,1.353,,,,,


In [7]:
# full model
Mixed_Effects_Models(df_longer_weight,"Listeria",14)

0,1,2,3
Model:,MixedLM,Dependent Variable:,weight
No. Observations:,5311,Method:,REML
No. Groups:,2,Scale:,39.1170
Min. group size:,1913,Log-Likelihood:,-17279.0008
Max. group size:,3398,Converged:,Yes
Mean group size:,2655.5,,

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
Intercept,98.362,1.697,57.949,0.000,95.035,101.689
Time,-0.481,0.045,-10.704,0.000,-0.569,-0.393
Time:survival_original,-5.164,0.117,-44.311,0.000,-5.392,-4.936
Group Var,5.723,1.321,,,,


#### C. albicans

In [8]:
# model with 5 days
Mixed_Effects_Models(df_longer_weight,"C. albicans",5)

0,1,2,3
Model:,MixedLM,Dependent Variable:,weight
No. Observations:,1199,Method:,REML
No. Groups:,2,Scale:,35.4894
Min. group size:,379,Log-Likelihood:,-3843.5756
Max. group size:,820,Converged:,Yes
Mean group size:,599.5,,

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
Intercept,98.244,0.881,111.512,0.000,96.517,99.971
Time,-3.054,0.149,-20.479,0.000,-3.347,-2.762
Time:survival_original,-2.566,0.271,-9.452,0.000,-3.098,-2.034
Group Var,1.360,0.368,,,,


In [9]:
# full model
Mixed_Effects_Models(df_longer_weight,"C. albicans",14)

0,1,2,3
Model:,MixedLM,Dependent Variable:,weight
No. Observations:,2337,Method:,REML
No. Groups:,2,Scale:,69.1404
Min. group size:,605,Log-Likelihood:,-8270.1222
Max. group size:,1732,Converged:,Yes
Mean group size:,1168.5,,

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
Intercept,93.287,0.897,103.977,0.000,91.529,95.045
Time,-0.346,0.053,-6.524,0.000,-0.449,-0.242
Time:survival_original,-2.637,0.131,-20.070,0.000,-2.895,-2.380
Group Var,1.395,0.274,,,,


#### H1N1

In [10]:
# model with 5 days
Mixed_Effects_Models(df_longer_weight,"H1N1",5)

0,1,2,3
Model:,MixedLM,Dependent Variable:,weight
No. Observations:,1666,Method:,REML
No. Groups:,2,Scale:,32.5622
Min. group size:,651,Log-Likelihood:,-5267.9977
Max. group size:,1015,Converged:,Yes
Mean group size:,833.0,,

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
Intercept,100.607,0.380,264.786,0.000,99.862,101.351
Time,-2.021,0.134,-15.132,0.000,-2.283,-1.760
Time:survival_original,-2.527,0.231,-10.928,0.000,-2.980,-2.074
Group Var,0.164,0.071,,,,


In [11]:
# full model
Mixed_Effects_Models(df_longer_weight,"H1N1",14)

0,1,2,3
Model:,MixedLM,Dependent Variable:,weight
No. Observations:,2872,Method:,REML
No. Groups:,2,Scale:,71.9239
Min. group size:,891,Log-Likelihood:,-10218.9329
Max. group size:,1981,Converged:,Yes
Mean group size:,1436.0,,

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
Intercept,98.381,0.712,138.114,0.000,96.984,99.777
Time,-0.856,0.062,-13.872,0.000,-0.977,-0.735
Time:survival_original,-2.809,0.138,-20.414,0.000,-3.079,-2.539
Group Var,0.847,0.169,,,,


## Supplementary analysis by cohort

In [12]:
import datetime
df_longer_weight['cohort'] = df_longer_weight['Date'].apply(lambda x: 'first_cohort' if x<datetime.datetime(2018,5,1) else 'second_cohort')
df_longer_weight


Unnamed: 0,ID_Experiment,Mouse_ID,Date,Infection,Group,exp,survival_original,t_origin,Time,weight,cohort
0,ID_001,TRO-05432,2014-06-05,C. albicans,1A,1,1,9.0,Tinfection,100.0,first_cohort
1,ID_001,TRO-05433,2014-06-05,C. albicans,1A,1,1,9.0,Tinfection,100.0,first_cohort
2,ID_001,TRO-05434,2014-06-05,C. albicans,1A,1,1,9.0,Tinfection,100.0,first_cohort
3,ID_001,TRO-05435,2014-06-05,C. albicans,1A,1,1,6.0,Tinfection,100.0,first_cohort
4,ID_001,TRO-05456,2014-06-05,C. albicans,1A,1,1,7.0,Tinfection,100.0,first_cohort
...,...,...,...,...,...,...,...,...,...,...,...
32993,ID_096,TRO-028337,2023-03-03,S. pneumoniae,3,3,1,5.0,T13,,second_cohort
32994,ID_096,TRO-028338,2023-03-03,S. pneumoniae,3,3,1,4.0,T13,,second_cohort
32995,ID_096,TRO-028339,2023-03-03,S. pneumoniae,3,3,1,6.0,T13,,second_cohort
32996,ID_096,TRO-028342,2023-03-03,S. pneumoniae,3,3,0,8.0,T13,,second_cohort


In [13]:
df_longer_weight_dead = df_longer_weight[df_longer_weight['survival_original']==1]

### Listeria

In [19]:
Mixed_Effects_Models(df_longer_weight_dead,time_to_exclude=3,variable='Time',group='cohort',chosen_infection='Listeria')

0,1,2,3
Model:,MixedLM,Dependent Variable:,weight
No. Observations:,1442,Method:,REML
No. Groups:,2,Scale:,8.1822
Min. group size:,351,Log-Likelihood:,-3565.0000
Max. group size:,1091,Converged:,Yes
Mean group size:,721.0,,

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
Intercept,100.881,0.273,368.888,0.000,100.345,101.417
Time,-6.529,0.197,-33.067,0.000,-6.916,-6.142
Time:cohort[T.second_cohort],0.272,0.231,1.179,0.238,-0.180,0.725
Group Var,0.110,0.073,,,,


### S. pneumoniae

In [20]:
Mixed_Effects_Models(df_longer_weight_dead,time_to_exclude=3,variable='Time',group='cohort',chosen_infection='S. pneumoniae')

  sdf[0:self.k_fe, 1] = np.sqrt(np.diag(self.cov_params()[0:self.k_fe]))


0,1,2,3
Model:,MixedLM,Dependent Variable:,weight
No. Observations:,1137,Method:,REML
No. Groups:,2,Scale:,21.0827
Min. group size:,310,Log-Likelihood:,-3347.6591
Max. group size:,827,Converged:,Yes
Mean group size:,568.5,,

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
Intercept,100.028,0.290,345.201,0.000,99.460,100.596
Time,-4.374,0.107,-40.686,0.000,-4.584,-4.163
Time:cohort[T.second_cohort],1.308,,,,,
Group Var,0.103,,,,,


### H1N1

In [18]:
Mixed_Effects_Models(df_longer_weight_dead,time_to_exclude=5,variable='Time',group='cohort',chosen_infection='H1N1')

0,1,2,3
Model:,MixedLM,Dependent Variable:,weight
No. Observations:,651,Method:,REML
No. Groups:,2,Scale:,20.9680
Min. group size:,40,Log-Likelihood:,-1915.8512
Max. group size:,611,Converged:,Yes
Mean group size:,325.5,,

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
Intercept,102.230,1.649,62.007,0.000,98.999,105.462
Time,-5.619,0.536,-10.476,0.000,-6.671,-4.568
Time:cohort[T.second_cohort],1.072,0.556,1.928,0.054,-0.018,2.161
Group Var,4.498,1.644,,,,
