In [1]:
library(MASS)
library(pvclust)

In [4]:
data_dir = '/Volumes/KeithSSD/ChesapeakeMicrobiome/data'
transect_data_fn = "environmental_raw_data/Transformed_WQVars_Hab_and_aDiv.txt"
transect_data_f = paste(data_dir, transect_data_fn, sep="/")
tran_df <- read.delim(transect_data_f, row.names=1)

otu_table_fn = 'otu_tables/final_rarefied_table.tsv'
otu_table_f = paste(data_dir, otu_table_fn, sep="/")
tsv.data <- read.delim(otu_table_f, row.names=1)

tsv.data = tsv.data[rownames(tran_df), ]
tsv.data = tsv.data[which(rowSums(tsv.data) > 0), which(colSums(tsv.data) > 0)]
tsv.data2 = (tsv.data + 0.1)/rowSums(tsv.data + 0.1)
tsv.data3 = t(t(tsv.data2)/rowSums(t(tsv.data2)))

dim(tsv.data3)

In [11]:
library(parallel)
cl_inst <- makeCluster(4, type = "PSOCK")

oeu_file_1 = "/Volumes/KeithSSD/ChesapeakeMicrobiome/data/oeu_clusters/otuclusters2.RData"
oeu_file_2 = "/Volumes/KeithSSD/ChesapeakeMicrobiome/data/oeu_clusters/otuclusters3.RData"

if (file.exists(oeu_file_1)) {
    load(oeu_file_1)
    oeu_obj1 = res.pv
    rm(res.pv)
} else {
    res.pv <- pvclust(tsv.data3, method.hclust = "complete", method.dist = "euclidean", 
                  parallel=cl_inst, iseed = 1, nboot=1000, r=seq(.5,1.4,by=.15))
    save(res.pv, file=oeu_file_1)
}

if (file.exists(oeu_file_2)) {
    load(oeu_file_2)
    oeu_obj2 = res.pv
    rm(res.pv)
} else {
    res.pv <- pvclust(tsv.data3, method.hclust = "ward.D2", method.dist = "euclidean", 
                      parallel=cl_inst, iseed = 1, nboot=1000, r=seq(.5,1.4,by=.15))
    save(res.pv, file=oeu_file_2)
}

stopCluster(cl_inst)
ls()

In [16]:
oeus1 = pvpick(oeu_obj1, alpha=0.95, pv="au")
oeus2 = pvpick(oeu_obj2, alpha=0.95, pv="au")

In [39]:
cluster_assignments = data.frame('ward_clusters'=rep(NA, dim(tsv.data3)[2]), 
                                 'complete_clusters'=rep(NA, dim(tsv.data3)[2]), 
                                 row.names=colnames(tsv.data3))

mean(unlist(lapply(oeus2$clusters, length)))
for (cln in 1:length(oeus2$clusters)){
    cln_membs = oeus2$clusters[[cln]]
    idx_cln_membs = which(rownames(cluster_assignments) %in% cln_membs)
    cluster_assignments[idx_cln_membs, 'ward_clusters'] = cln
    if (length(cln_membs) > 3){
        print(paste(c(cln, length(cln_membs)), collapse=": "))
    }
}

mean(unlist(lapply(oeus1$clusters, length)))
for (cln in 1:length(oeus1$clusters)){
    cln_membs = oeus1$clusters[[cln]]
    idx_cln_membs = which(rownames(cluster_assignments) %in% cln_membs)
    cluster_assignments[idx_cln_membs, 'complete_clusters'] = cln
    if (length(cln_membs) > 3){
        print(paste(c(cln, length(cln_membs)), collapse=": "))
    }
}

write.table(cluster_assignments, file='../data/oeu_clusters/cluster_assignments.txt', sep="\t")
head(cluster_assignments)

[1] "1: 130"
[1] "13: 6"
[1] "19: 5"
[1] "51: 4"
[1] "57: 4"
[1] "58: 5"
[1] "59: 4"
[1] "60: 5"
[1] "63: 4"
[1] "64: 6"
[1] "72: 9"
[1] "76: 7"
[1] "77: 5"
[1] "85: 5"
[1] "95: 5"
[1] "97: 4"
[1] "99: 7"
[1] "102: 4"
[1] "104: 4"
[1] "108: 4"
[1] "113: 6"
[1] "121: 4"
[1] "126: 4"
[1] "131: 5"
[1] "132: 4"
[1] "134: 4"
[1] "136: 4"
[1] "137: 4"
[1] "139: 4"
[1] "141: 5"
[1] "142: 4"
[1] "143: 4"
[1] "146: 5"
[1] "147: 5"
[1] "148: 5"
[1] "149: 4"
[1] "150: 4"
[1] "152: 4"
[1] "153: 4"
[1] "154: 6"
[1] "155: 7"
[1] "156: 4"
[1] "157: 6"
[1] "158: 6"
[1] "159: 5"
[1] "161: 5"
[1] "163: 4"
[1] "165: 6"
[1] "166: 8"
[1] "167: 10"
[1] "168: 4"
[1] "169: 7"
[1] "170: 6"
[1] "173: 6"
[1] "174: 7"
[1] "175: 7"
[1] "177: 8"
[1] "178: 8"
[1] "179: 9"
[1] "180: 9"
[1] "182: 5"
[1] "183: 9"
[1] "184: 7"
[1] "185: 7"
[1] "186: 6"
[1] "187: 17"
[1] "188: 6"
[1] "189: 8"
[1] "190: 22"


[1] "1: 130"
[1] "15: 7"
[1] "19: 6"
[1] "39: 6"
[1] "42: 8"
[1] "44: 4"
[1] "53: 4"
[1] "55: 5"
[1] "60: 5"
[1] "63: 4"
[1] "69: 4"
[1] "70: 4"
[1] "71: 6"
[1] "73: 4"
[1] "75: 4"
[1] "76: 4"
[1] "82: 4"
[1] "88: 7"
[1] "89: 5"
[1] "91: 4"
[1] "97: 4"
[1] "109: 4"
[1] "113: 6"
[1] "118: 4"
[1] "121: 4"
[1] "122: 4"
[1] "130: 5"
[1] "134: 4"
[1] "139: 5"
[1] "143: 10"
[1] "144: 4"
[1] "147: 4"
[1] "148: 4"
[1] "149: 6"
[1] "151: 5"
[1] "154: 5"
[1] "158: 7"
[1] "159: 4"
[1] "165: 5"
[1] "166: 4"
[1] "167: 5"
[1] "168: 4"
[1] "169: 4"
[1] "171: 5"
[1] "172: 6"
[1] "173: 4"
[1] "174: 9"
[1] "175: 4"
[1] "176: 4"
[1] "179: 6"
[1] "181: 6"
[1] "182: 8"
[1] "183: 9"
[1] "185: 5"
[1] "186: 7"
[1] "187: 4"
[1] "188: 5"
[1] "190: 10"
[1] "191: 9"
[1] "192: 8"
[1] "193: 14"
[1] "194: 11"


Unnamed: 0_level_0,ward_clusters,complete_clusters
Unnamed: 0_level_1,<int>,<int>
OTU1,64.0,71.0
OTU2,,
OTU3,72.0,42.0
OTU4,7.0,9.0
OTU5,,39.0
OTU7,60.0,
