In [None]:
library(repr)
options(repr.plot.width=4, repr.plot.height=4)

### Raup Crick Distance via Bray Curtis: 
#### Resamples OTUs into a community to match observed richness, based on site occupancy distribution across the full data set and calculates bray curtis distance. The value is the proportion of runs where the distance is less than expected, scaled between -1,1. 

### Mean nearest taxon distance (MNTD): 
#### The mean distance between each species in community A and its closest relative in community B across 999 resampling trials
#### Called Nearest Taxon Index (MNTI), when standardized (i.e. z-value) 

### Mean pairwise distance (MPD): 
#### The mean distance between all pairs of species in community A and community B across in 999 resampling trials
#### Called Net Relatedness Index (MNRI), when standardized (i.e. z-value) 


In [None]:
row_reference = "../otu_data/WaterQualityData/matched_cleaned_data/all_mdata_colset_2.tsv"
included_samples <- read.delim(row_reference, row.names=1)
incl_samps = rownames(included_samples)
stoch_df <-data.frame(matrix(nrow=sum((dim(included_samples)[1]-1):1), ncol=5))
names(stoch_df)<-c("Com1", "Com2", 'BNTI', 'RC', 'Outcome')
print(dim(stoch_df))

row_counter=0
for (i in 1:length(incl_samps)){
    for (j in i:length(incl_samps)){
        if (i != j ){
            row_counter = row_counter + 1
            stoch_df[row_counter, 'Com1'] <- incl_samps[i]
            stoch_df[row_counter, 'Com2'] <- incl_samps[j]
        }
    }
}

print(row_counter)
print(head(stoch_df))

In [None]:
nti_fn = "../otu_data/dispersal_selection_data/ses_nti.RData"
load(nti_fn)
bnti_df = mntd_scores$comdistnt.obs.z[incl_samps, incl_samps]
bnti_dist = as.dist(bnti_df)

options(repr.plot.width=6, repr.plot.height=3)
h <- hist(bnti_dist)
abline(v = 2, col="red", lwd=2, lty=2)
abline(v = -2, col="red", lwd=2, lty=2)

for (row in 1:dim(stoch_df)[1]){
    stoch_df[row, 'BNTI'] = bnti_df[stoch_df[row, 'Com1'], stoch_df[row, 'Com2']]
}

print(sum(is.na(stoch_df[,'BNTI'])))

In [None]:
rc_fn = "../otu_data/dispersal_selection_data/raup_crick_data.tsv"
rc_df <- read.delim(rc_fn, row.names=1)[rownames(included_samples), rownames(included_samples)]
rc_df2 = rc_df
rc_df2[is.na(rc_df2)] <- 0

rc_dft = t(rc_df)
rc_dft2 = rc_dft
rc_dft2[is.na(rc_dft2)] <- 0

rc_sym = as.matrix(rc_df2 + rc_dft2)
for (i in 1:dim(rc_df)[1]){ 
    rc_sym[i,i] <- rc_df[i,i]
}

rc_dist = as.dist(rc_sym)
print(sum(rc_dist > 1))
rc_dist[rc_dist > 1] <- 1

options(repr.plot.width=6, repr.plot.height=3)
h <- hist(rc_dist, breaks=-1:41/20 - 1)
abline(v = 0.95, col="red", lwd=2, lty=2)
abline(v = -0.95, col="red", lwd=2, lty=2)

for (row in 1:dim(stoch_df)[1]){
    stoch_df[row, 'RC'] = rc_sym[stoch_df[row, 'Com1'], stoch_df[row, 'Com2']]
}

print(sum(is.na(stoch_df[,'RC'])))

In [None]:
print(c('Number of pairwise comparisons:', dim(stoch_df)[1]))
print(c(""))
print(c('Deterministic via phylogeny:', round(sum(abs(stoch_df$BNTI) > 2)/dim(stoch_df)[1],3)*100 ))
print(c('-> Undergoing hetero. selection:', round(sum(stoch_df$BNTI > 2)/dim(stoch_df)[1],3)*100 ))

heter_disp = (stoch_df$BNTI > 2) & (stoch_df$RC > 0.95)
print(c('--> Heterog. dispersal:', round(sum(heter_disp)/dim(stoch_df)[1],3)*100 ))

print(c('-> Undergoing homo. selection:', round(sum(stoch_df$BNTI < -2)/dim(stoch_df)[1],3)*100 ))

homo_disp = (stoch_df$BNTI < -2) & (stoch_df$RC > 0.95)
print(c('--> Homog. dispersal:', round(sum(homo_disp)/dim(stoch_df)[1],3)*100 ))

print(c(""))
stoch_frac = (abs(stoch_df$BNTI) < 2)
print(c('Stochastic via phylogeny:', round(sum(stoch_frac)/dim(stoch_df)[1],3)*100 ))

disp_frac = (abs(stoch_df$BNTI) < 2) & (abs(stoch_df$RC) > 0.95)
print(c('-> Influenced by dispersal:', round(sum(disp_frac)/dim(stoch_df)[1],3)*100 ))

homo_frac = (abs(stoch_df$BNTI) < 2) & (stoch_df$RC < -0.95)
print(c('--> High dispersal:', round(sum(homo_frac)/dim(stoch_df)[1],3)*100 ))

ltd_frac = (abs(stoch_df$BNTI) < 2) & (stoch_df$RC > 0.95)
print(c('--> Low dispersal:', round(sum(ltd_frac)/dim(stoch_df)[1],3)*100 ))

und_frac = (abs(stoch_df$BNTI) < 2) & (abs(stoch_df$RC) < 0.95)
print(c('-> Undominated:', round(sum(und_frac)/dim(stoch_df)[1],3)*100 ))




In [None]:
# A comparison of deviations between this an BNTI would reveal instances where one sample represents essentially a subgroup of another 
#nri_fn = "../otu_data/dispersal_selection_data/ses_mpd.RData"
#nri_df <- read.delim(nri_fn, row.names=1)