# Effect size analysis

### Dependencies

In [1]:
import numpy as np
import pandas as pd

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
%load_ext rpy2.ipython

In [4]:
%R require(vegan)

array([1], dtype=int32)

### Input files

Metadata

In [5]:
meta_fp = '../metadata.neo.txt'

PCoA

In [6]:
pcoa_fp = 'abund_jaccard_ITS.house.pcoa'

### Parameters

Number of axes to retain

In [7]:
n = 10

Metadata columns to test

In [8]:
cols = ['village_socio2', 'temp_inside_house', 'carbon_dioxide_inside', 'light_lux_inside']

### Processing

Process PCoA result

In [9]:
loads, coords = [], []
with open(pcoa_fp, 'r') as f:
    reading = False
    for line in f:
        line = line.rstrip('\r\n')
        if line.startswith('Proportion explained'):
            loads = [float(x) for x in next(f).rstrip('\r\n').split('\t')]
            continue
        if line.startswith('Site'):
            reading = True
            continue
        if reading:
            if line == '':
                break
            row = line.split('\t')
            coords.append([row[0]] + [float(x) for x in row[1:n + 1]])

In [10]:
pcoa = pd.DataFrame(coords)
pcoa.columns = ['#SampleID'] + ['PC%d' % (i + 1) for i in range(10)]
pcoa.set_index('#SampleID', inplace=True)
pcoa.head()

Unnamed: 0_level_0,PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8,PC9,PC10
#SampleID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Iqu.1159.kitc,-0.043603,0.526196,-0.226334,0.135684,-0.086652,0.095999,-0.043165,0.191635,-0.09084,0.084364
Iqu.1198.bath,-0.022113,0.17307,-0.212129,-0.015912,0.144807,0.088161,0.129988,0.200196,0.039793,-0.130706
Che.378.bedr,0.309049,-0.124818,-0.144513,0.204565,0.098956,-0.121639,-0.101838,0.0763,-0.048983,0.018965
Che.422.bedr,0.175388,-0.123175,-0.101195,0.200906,0.161773,-0.071774,-0.094298,0.029263,-0.017368,-0.007131
Che.118.kitc,0.500366,-0.096905,-0.130212,0.120746,0.019666,-0.361973,-0.069801,-0.02814,-0.009557,0.122989


In [11]:
pcoa.shape[0]

401

Process metadata

In [12]:
meta = pd.read_csv(meta_fp, sep='\t', index_col=0)
meta.head()

Unnamed: 0_level_0,collection_year,Description,host_or_room,host_type,host_type2,house_number,sample_site,description,sample_site_general,socioeconomic_level,...,Shannon_16S,chao_16S,Shannon_18S,chao_18S,Shannon_18SnoAPF,chao_18SnoAPF,Shannon_ITS,chao_ITS,Shannon_MS,Observed_richness_MS
#SampleID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Che.214.bedr,2012,Peru2012,bedroom,house,floor,6.0,floor,floor,floor,,...,6.032639,384.558823,4.451843,42.0,,,,,4.843564,109.0
Che.216.livi,2012,Peru2012,living,house,floor,6.0,floor,floor,floor,,...,4.882074,399.235294,,,,,,,4.766682,106.0
Che.209.kitc,2012,Peru2012,kitchen,house,floor,6.0,floor,floor,floor,,...,7.249349,753.5,,,,,,,4.588729,101.0
Che.284.livi,2012,Peru2012,living,house,floor,7.0,floor,floor,floor,,...,5.944746,433.217391,6.031111,285.066667,5.46525,131.433333,5.755548,217.487179,4.645737,109.0
Che.278.kitc,2012,Peru2012,kitchen,house,kitchen,7.0,cup,cup,cup,,...,3.071067,53.5,1.572153,7.0,0.484648,2.0,1.689245,5.0,,


In [13]:
meta.shape[0]

2270

Filter metadata to those in PCoA

In [14]:
meta = meta.loc[pcoa.index]
meta.shape[0]

401

Filter metadata to those with all test variables

In [15]:
meta = meta[cols].dropna()
meta.shape[0]

124

Filter PCoA to those in filtered metadata

In [16]:
pcoa = pcoa.loc[meta.index]
pcoa.shape[0]

124

In [17]:
df = pd.concat([meta, pcoa], axis=1)
df.head()

Unnamed: 0_level_0,village_socio2,temp_inside_house,carbon_dioxide_inside,light_lux_inside,PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8,PC9,PC10
#SampleID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Iqu.1029.bath,Iquitos,29.0,443.0,13.0,-0.008255,-0.097649,-0.006701,0.051108,0.172698,0.01771,-0.025928,-0.012968,0.065092,-0.081307
Pue.630.bath,Puerto Almendras,26.0,446.0,366.0,0.145713,-0.067697,-0.098993,0.248177,0.095273,0.056089,0.006065,0.017477,0.010924,-0.062381
Man.1605.kitc,Manaus middle,33.0,471.0,964.0,0.082739,0.019453,0.133187,-0.080968,0.050221,-0.027537,0.063594,0.072533,-0.095644,-0.020715
Iqu.865.kitc,Iquitos,29.0,461.0,3029.0,-0.082312,0.316902,0.049112,0.052528,-0.054116,0.002821,-0.002373,0.014652,-0.11647,0.009453
Pue.626.kitc,Puerto Almendras,26.0,446.0,366.0,0.155853,-0.126487,-0.123328,0.00399,0.122759,0.365893,0.018334,-0.111312,0.02267,0.082601


Convert Python lists to numpy arrays so they can be imported into R

In [18]:
xcols = np.array(cols)
ycols = np.array(['PC%d' % (i+1) for i in range(n)])

### Analysis

Import data into R and split

In [19]:
%%R -i df,xcols,ycols
X <- df[, xcols]
Y <- df[, ycols]

Run RDA

In [20]:
%%R
mod0 <- rda(Y ~ 1, X)
mod1 <- rda(Y ~ ., X)

Model fitting

In [21]:
%%R
model <- ordiR2step(mod0, mod1, perm.max = 1000)

Step: R2.adj= 0 
Call: Y ~ 1 
 
                        R2.adjusted
<All variables>          0.23526671
+ village_socio2         0.23024750
+ carbon_dioxide_inside  0.03088074
+ temp_inside_house      0.02319487
+ light_lux_inside       0.01111018
<none>                   0.00000000

                 Df    AIC      F Pr(>F)   
+ village_socio2  3 -247.5 13.264  0.002 **
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Step: R2.adj= 0.2302475 
Call: Y ~ village_socio2 
 
                        R2.adjusted
<All variables>           0.2352667
+ light_lux_inside        0.2342682
+ carbon_dioxide_inside   0.2333398
<none>                    0.2302475
+ temp_inside_house       0.2280135

                   Df     AIC      F Pr(>F)
+ light_lux_inside  1 -247.18 1.6301   0.15



In [22]:
%%R
model$anova

                  R2.adj Df    AIC      F Pr(>F)   
+ village_socio2 0.23025  3 -247.5 13.264  0.002 **
<All variables>  0.23527                           
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1


In [23]:
0.23527 - 0.23025

0.005019999999999997