In [1]:
import pandas as pd
import numpy as np
from math import sqrt
from statsmodels.stats.power import TTestPower

In [2]:
df = pd.read_excel("../Fig2|EDFig1b_2_3|TableS2/ST1_oralASV_mouse.xlsx", sheet_name="Table S1b")
df.head()

Unnamed: 0,Sample ID,Collection body site,Collection time point,Mouse ID,Treatment group,Exclusion,Total bacterial load,Oral bacterial fraction,Oral bacterial load,Gut bacterial fraction,Gut bacterial load
0,KP_Exp_8_D0_AVN1,Fecal,pre,Abx_1A,Antibiotic,No,419597600000.0,3.5e-05,14827120.0,0.999965,419582700000.0
1,KP_Exp_8_D8_AVN1,Fecal,w1,Abx_1A,Antibiotic,No,174348100.0,0.224412,39125790.0,0.775588,135222400.0
2,KP_Exp_8_D0_OR_AVN1,Oral,pre,Abx_1A,Antibiotic,No,,,,,
3,KP_Exp_8_D0_AVN2,Fecal,pre,Abx_1B,Antibiotic,No,494726500000.0,4.3e-05,21078430.0,0.999957,494705400000.0
4,KP_Exp_8_D8_AVN2,Fecal,w1,Abx_1B,Antibiotic,No,146669800.0,0.289092,42401050.0,0.710908,104268700.0


# First experiment only

In [3]:
df1 = df[df['Mouse ID'].str.contains('Abx_1')][['Mouse ID','Collection time point','Gut bacterial load']]
df1 = pd.pivot_table(df1, index='Mouse ID', columns='Collection time point', values='Gut bacterial load')
df1

Collection time point,pre,w1
Mouse ID,Unnamed: 1_level_1,Unnamed: 2_level_1
Abx_1A,419582700000.0,135222400.0
Abx_1B,494705400000.0,104268700.0
Abx_1C,536172000000.0,90924050.0


In [4]:
n_pre = 3
n_post = 3
gutload_pre = list(df1.pre)
gutload_post = list(df1.w1)
s_pre, s_post = np.var(gutload_pre), np.var(gutload_post)
s = sqrt(((n_pre - 1) * s_pre + (n_post - 1) * s_post) / (n_pre + n_post - 2)) # calculate the pooled standard deviation 
u_pre, u_post = np.mean(gutload_pre), np.mean(gutload_post) # means of the samples
d = (u_pre - u_post) / s # calculate the effect size
alpha = 0.05 # significance leve
obj = TTestPower()
power = obj.power(effect_size=d, nobs=3, alpha=alpha, df=None, alternative='larger')
print('power of 3 mice: %.6f'%(power))

power of 3 mice: 1.000000


# Combine both experiments

## Mouse Abx_2B lacks pre-treatment fecal sample

In [5]:
df12 = df[df['Mouse ID'].str.contains('Abx')][['Mouse ID','Collection time point','Gut bacterial load']]
df12 = pd.pivot_table(df12, index='Mouse ID', columns='Collection time point', values='Gut bacterial load')
df12['post'] = np.nanmean(df12[['d3','w1']], axis=1)
df12 = df12[['pre','post']]
df12 = df12[(df12.pre.notnull()) & (df12.post.notnull())]
df12

Collection time point,pre,post
Mouse ID,Unnamed: 1_level_1,Unnamed: 2_level_1
Abx_1A,419582700000.0,135222400.0
Abx_1B,494705400000.0,104268700.0
Abx_1C,536172000000.0,90924050.0
Abx_2A,11532140000.0,12550680.0
Abx_2C,12222370000.0,2140783.0
Abx_2D,8795456000.0,4037800.0
Abx_2E,9157845000.0,3101776.0


In [6]:
n_pre = 7
n_post = 7
gutload_pre = list(df12.pre)
gutload_post = list(df12.post)
s_pre, s_post = np.var(gutload_pre), np.var(gutload_post)
s = sqrt(((n_pre - 1) * s_pre + (n_post - 1) * s_post) / (n_pre + n_post - 2)) # calculate the pooled standard deviation 
u_pre, u_post = np.mean(gutload_pre), np.mean(gutload_post) # means of the samples
d = (u_pre - u_post) / s # calculate the effect size
alpha = 0.05 # significance leve
obj = TTestPower()
obj = TTestPower()
power = obj.power(effect_size=d, nobs=7, alpha=alpha, df=None, alternative='larger')
print('power of 7 mice: %.6f'%(power))

power of 7 mice: 0.906716
