# Mixed effect model analysis

## Load data and packages

In [51]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

import statsmodels.formula.api as smf

In [52]:
df = pd.read_excel("./data/df_for_analysis.xlsx",index_col=0)

In [53]:
def keep_weight_post_infection(x,weight_end = "weight_T14"):
    """
    Given a DataFrame `x`, returns a Series containing the weights post-infection.

    *Arguments*
    - x: DataFrame containing the data.
    - weight_end: Column name of the last weight measurement.

    *Returns*
    - shifted_series: Series containing the weights post-infection.
    """
    # Extract relevant columns from the DataFrame
    dates = x['Dates']
    t_infection = x['Time_infection']
    datas = x['weight_T_infection':weight_end]

    # Find the date closest to the infection time
    new_time_infection = dates[dates <= t_infection][-1]
    location_of_TI = dates.get_loc(new_time_infection)

    # Return the original series if the data at the infection time is NaN
    if np.isnan(datas[location_of_TI]):
        return datas
    
    # Shift the values of the input series by the specified index
    shifted_series = pd.Series([np.nan] * len(datas), index=datas.index)
    if location_of_TI == 0:
        return datas
    else:
        shifted_series[:-location_of_TI] = datas.values.tolist()[location_of_TI:]
    # Shift the values of the input series by the specified index
    return shifted_series

### Transform data to long format

Keep only data that are after the time of infeciton

In [54]:
# change dates column to datetimindex and transform weight datas to numeric only
df.loc[:,"weight_T_infection":"weight_T14"] = df.loc[:,"weight_T_infection":"weight_T14"].apply(pd.to_numeric,errors='coerce')
serie_dates = df['Time_point'].apply(lambda x: pd.to_datetime(x.split(','),dayfirst=True))
df['Dates'] = serie_dates
data = df.apply(lambda x: keep_weight_post_infection(x),axis=1)
data

  df.loc[:,"weight_T_infection":"weight_T14"] = df.loc[:,"weight_T_infection":"weight_T14"].apply(pd.to_numeric,errors='coerce')


Unnamed: 0,weight_T_infection,weight_T1,weight_T2,weight_T3,weight_T4,weight_T5,weight_T6,weight_T7,weight_T8,weight_T9,weight_T10,weight_T11,weight_T12,weight_T13,weight_T14
0,23.92,21.72,20.96,19.38,18.16,16.44,15.49,15.44,15.05,,,,,,
1,21.40,19.45,18.84,17.82,16.80,15.02,14.14,14.40,14.73,,,,,,
2,22.56,21.45,20.83,18.67,16.82,15.30,14.74,14.80,14.42,,,,,,
3,20.39,18.69,16.60,15.58,14.17,13.54,,,,,,,,,
4,23.72,21.74,20.29,19.56,18.50,16.80,16.78,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2352,27.70,25.00,22.80,21.10,,,,,,,,,,,
2353,26.10,24.90,22.60,,,,,,,,,,,,
2354,24.60,25.80,25.70,24.90,22.80,,,,,,,,,,
2355,27.40,27.30,28.20,27.80,27.70,27.80,27.60,28.00,,,,,,,


Normalization by weight at T infection and replace the normalize data into the original dataframe

In [55]:

normalize = data.div(data['weight_T_infection'],axis=0)*100
df_normalize = df.copy()
df_normalize.loc[:,"weight_T_infection":"weight_T14"] = normalize

df_normalize['min_weight'] = df_normalize.loc[:,"weight_T_infection":"weight_T14"].min(axis=1)
df_normalize['t_origin'] = df_normalize['time_original']
df_normalize

Unnamed: 0,Mouse_ID,ID_Experiment,Cage,Strain,Date,Experiment,Group,Group_info,H0,Pre_traitment,...,survival_0.06,time_0.05,survival_0.05,time_original,survival_original,max_loss_weight_percentage,exp,sub_exp,min_weight,t_origin
0,TRO-05432,ID_001,A,BALB/cByJ,2014-06-05,Candida/Propionate,1A,Propionate / 2*10^5,1,propionate,...,1,1.5,1,9.0,1,0.629181,1,A,62.918060,9.0
1,TRO-05433,ID_001,A,BALB/cByJ,2014-06-05,Candida/Propionate,1A,Propionate / 2*10^5,1,propionate,...,1,1.5,1,9.0,1,0.660748,1,A,66.074766,9.0
2,TRO-05434,ID_001,A,BALB/cByJ,2014-06-05,Candida/Propionate,1A,Propionate / 2*10^5,1,propionate,...,1,2.5,1,9.0,1,0.639184,1,A,63.918440,9.0
3,TRO-05435,ID_001,A,BALB/cByJ,2014-06-05,Candida/Propionate,1A,Propionate / 2*10^5,1,propionate,...,1,1.5,1,6.0,1,0.664051,1,A,66.405101,6.0
4,TRO-05456,ID_001,B,BALB/cByJ,2014-06-05,Candida/Propionate,1A,Propionate / 2*10^5,1,propionate,...,1,1.5,1,7.0,1,0.707420,1,A,70.741990,7.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2352,TRO-028337,ID_096,ETRO-01911,C57BL/6J,2023-03-03,Pneumococcus/Training/Cross-fostering/male,3,D. Zy-Zy,1,training/cross-fostering,...,1,1.5,1,5.0,1,0.761733,3,no,76.173285,5.0
2353,TRO-028338,ID_096,ETRO-01911,C57BL/6J,2023-03-03,Pneumococcus/Training/Cross-fostering/male,3,D. Zy-Zy,1,training/cross-fostering,...,1,2.5,1,4.0,1,0.865900,3,no,86.590038,4.0
2354,TRO-028339,ID_096,ETRO-01911,C57BL/6J,2023-03-03,Pneumococcus/Training/Cross-fostering/male,3,D. Zy-Zy,1,training/cross-fostering,...,1,5.5,1,6.0,1,0.926829,3,no,92.682927,6.0
2355,TRO-028342,ID_096,ETRO-01911,C57BL/6J,2023-03-03,Pneumococcus/Training/Cross-fostering/male,3,D. Zy-Zy,1,training/cross-fostering,...,0,11.0,0,11.0,0,0.996350,3,no,99.635036,11.0


Find only releavent columns


In [56]:
columns = df_normalize.loc[:,"weight_T_infection":"weight_T14"].columns.tolist()
columns_index = df_normalize.loc[:, ~df_normalize.columns.isin(columns)]
column_time = [n for n in df_normalize.columns.tolist() if "time_" in n]
column_time = [column_time[-1]] + column_time[:-1]
columns_index_time = ['ID_Experiment','Mouse_ID','Date','Infection','Group','exp','survival_original','t_origin']

Transform to tidy data for releavent columns

In [57]:
df_longer_weight = df_normalize.melt(id_vars=columns_index_time,value_vars=columns,var_name="Time",value_name="weight")
df_longer_weight['Time'] = df_longer_weight['Time'].apply(lambda x: "".join(x.split("_")[1:]))
df_longer_weight

Unnamed: 0,ID_Experiment,Mouse_ID,Date,Infection,Group,exp,survival_original,t_origin,Time,weight
0,ID_001,TRO-05432,2014-06-05,C. albicans,1A,1,1,9.0,Tinfection,100.0
1,ID_001,TRO-05433,2014-06-05,C. albicans,1A,1,1,9.0,Tinfection,100.0
2,ID_001,TRO-05434,2014-06-05,C. albicans,1A,1,1,9.0,Tinfection,100.0
3,ID_001,TRO-05435,2014-06-05,C. albicans,1A,1,1,6.0,Tinfection,100.0
4,ID_001,TRO-05456,2014-06-05,C. albicans,1A,1,1,7.0,Tinfection,100.0
...,...,...,...,...,...,...,...,...,...,...
35350,ID_096,TRO-028337,2023-03-03,S. pneumoniae,3,3,1,5.0,T14,
35351,ID_096,TRO-028338,2023-03-03,S. pneumoniae,3,3,1,4.0,T14,
35352,ID_096,TRO-028339,2023-03-03,S. pneumoniae,3,3,1,6.0,T14,
35353,ID_096,TRO-028342,2023-03-03,S. pneumoniae,3,3,0,11.0,T14,


save the datas

In [58]:
df_longer_weight.to_excel("./data/df_long_format_for_analysis.xlsx")

## Mixed effect model
### function

In [59]:
def Mixed_Effects_Models(df,chosen_infection = 'S. pneumoniae',time_to_exclude = 8):
    df_infection = df[df['Infection'] == chosen_infection]
    df_infection = df_infection[~df_infection['Time'].isin([f"T{n}" for n in range(time_to_exclude,15,1)])]#remove unused data
    
    time_point = df_infection['Time'].unique()
    weight_point_to_integer = dict(zip(time_point,[n for n in range(len(time_point))]))

    df_infection['Time'] = df_infection['Time'].replace(weight_point_to_integer)
    model = smf.mixedlm("weight ~ Time",df_infection,groups=df_infection['survival_original'],missing="drop").fit()
    return model.summary()

In [60]:
Mixed_Effects_Models(df_longer_weight,"S. pneumoniae",4)

0,1,2,3
Model:,MixedLM,Dependent Variable:,weight
No. Observations:,2776,Method:,REML
No. Groups:,2,Scale:,25.1702
Min. group size:,1230,Log-Likelihood:,-8421.0727
Max. group size:,1546,Converged:,Yes
Mean group size:,1388.0,,

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
Intercept,100.007,1.763,56.719,0.000,96.551,103.463
Time,-1.718,0.086,-20.074,0.000,-1.885,-1.550
Group Var,6.168,1.747,,,,


In [61]:
Mixed_Effects_Models(df_longer_weight,"Listeria",5)

0,1,2,3
Model:,MixedLM,Dependent Variable:,weight
No. Observations:,4506,Method:,REML
No. Groups:,2,Scale:,30.7630
Min. group size:,2235,Log-Likelihood:,-14119.2407
Max. group size:,2271,Converged:,Yes
Mean group size:,2253.0,,

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
Intercept,100.171,3.009,33.286,0.000,94.273,106.069
Time,-3.824,0.063,-60.939,0.000,-3.947,-3.701
Group Var,18.075,4.747,,,,


In [62]:
Mixed_Effects_Models(df_longer_weight,"C. albicans",8)

0,1,2,3
Model:,MixedLM,Dependent Variable:,weight
No. Observations:,1776,Method:,REML
No. Groups:,2,Scale:,67.1258
Min. group size:,590,Log-Likelihood:,-6259.6263
Max. group size:,1186,Converged:,Yes
Mean group size:,888.0,,

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
Intercept,93.931,3.870,24.275,0.000,86.347,101.516
Time,-1.883,0.088,-21.378,0.000,-2.055,-1.710
Group Var,29.706,5.093,,,,


In [63]:
Mixed_Effects_Models(df_longer_weight,"H1N1",8)

0,1,2,3
Model:,MixedLM,Dependent Variable:,weight
No. Observations:,2416,Method:,REML
No. Groups:,2,Scale:,64.0678
Min. group size:,984,Log-Likelihood:,-8457.9685
Max. group size:,1432,Converged:,Yes
Mean group size:,1208.0,,

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
Intercept,99.228,3.443,28.820,0.000,92.480,105.976
Time,-2.458,0.075,-32.918,0.000,-2.604,-2.311
Group Var,23.543,4.218,,,,
