# librerias

In [None]:
require(vegan)
require(iNEXT)

# funciones

In [None]:
# funcion para cargar abundancias
parse_lines <- function(lines) {
  lst <- vector("list", length(lines))   # pre‑allocate
  for (i in seq_along(lines)) {
    parts <- strsplit(lines[i], ",")[[1]]
    lst[[i]] <- as.numeric(parts[-1])
  }
  names(lst) <- sapply(lines, \(ln) strsplit(ln, ",")[[1]][1])
  lst
}

# diversidad

In [None]:
# calcular diversidad
cgroup.abundance <- parse_lines(readLines("itsall.abund.txt"))
cinext <- iNEXT(cgroup.abundance, q=0, datatype="abundance")

In [None]:
# diversidad cruda
write.table(cinext$AsyEst, "itsall.AsyEst.tsv", quote=F, sep="\t")

In [None]:
# informacion
write.table(cinext$DataInfo, "itsall.DataInfo.tsv", quote=F, sep="\t")

In [None]:
# diversidad basada en cobertura minima
cestimated <- estimateD(cgroup.abundance, datatype="abundance", base="coverage", level=min(cinext$DataInfo$SC))
write.table(cestimated, "itsall.min-cov-estimated-diveristy.tsv", quote=F, sep="\t")

In [None]:
# diversidad basada en conteos minimos
cestimated <- estimateD(cgroup.abundance, datatype="abundance", base="size", level=min(cinext$DataInfo$n))
write.table(cestimated, "itsall.min-size-estimated-diveristy.tsv", quote=F, sep="\t")

In [None]:
# plot Sample-size-based R/E curve (type=1)
ggiNEXT(cinext, type=1, se=TRUE, facet.var="None", color.var="Assemblage", grey=F)

In [None]:
# plot Sample completeness curve (type=2)
ggiNEXT(cinext, type=2, se=TRUE, facet.var="None", color.var="Assemblage", grey=F)

In [None]:
# plot Coverage-based R/E curve (type=3)
ggiNEXT(cinext, type=3, se=TRUE, facet.var="None", color.var="Assemblage", grey=F)

# pruebas de hipotesis

In [None]:
# cargar metadatos
csamplemd <- read.table('/home/daemsel/its.samplemd.tsv', sep='\t', header=T)
rownames(csamplemd$Sample)
str(csamplemd)

In [None]:
str(cinext$AsyEst)

In [None]:
# cargar tabla de conteos
otu <- read.table('itsall.otu.csv', sep=',', header = TRUE, comment.char = "")
str(otu)

In [None]:
# extraer riqueza
# Extract richness values from diversity estimates
richness_data <- cinext$AsyEst[cinext$AsyEst$Diversity == "Species richness", ]
richness_data <- richness_data[, c("Assemblage", "Observed")]
names(richness_data) <- c("Sample", "Richness")

In [None]:
# añadir metadatos
data_merged <- merge(csamplemd, richness_data, by = "Sample")

# prueba de comparacion de medias

## prueba pareada (t)

In [None]:
# Prueba de t
sistema_groups <- split(data_merged$Richness, data_merged$Sistema)

group_names <- names(sistema_groups)
t_test_result <- t.test(sistema_groups[[1]], sistema_groups[[2]])
cat("\nTwo-sample t-test between", group_names[1], "and", group_names[2], "\n")
print(t_test_result)

In [None]:
# graficar
boxplot(Richness ~ Sistema, data = data_merged, 
        main = "Species Richness by Sistema",
        xlab = "Sistema", ylab = "Species Richness",
        col = c("lightgreen", "lightblue"))

## ANOVA

In [None]:
# llevar a cabo analisis de varianza
anova_ph <- aov(Richness ~ pH_cat, data = data_merged)
print(summary(anova_ph))

In [None]:
# prueba host hoc (suponiendo que ANOVA fue significativo)
tukey_result <- TukeyHSD(anova_ph)
print(tukey_result)

In [None]:
# graficar
boxplot(Richness ~ pH_cat, data = data_merged,
        main = "Species Richness by pH Category",
        xlab = "pH Category", ylab = "Species Richness",
        col = rainbow(length(unique(data_merged$pH_cat))))

# pruebas sobre diversidad beta

In [None]:
otu_matrix <- as.matrix(otu[, -1])
rownames(otu_matrix) <- otu$X.OTU.ID
otu_matrix_t <- t(otu_matrix)  # muestras en filas, OTUs en columnas

In [None]:
otu_matrix_t

In [None]:
# calcular disimilitud
bray_dist <- vegdist(otu_matrix_t, method = "bray")

In [None]:
bray_dist

## PERMANOVA

In [None]:
# asegurarse de que los metadatos se encuentran en el mismo orden que en la matriz de disimilitud
metadata_ordered <- csamplemd[match(rownames(otu_matrix_t), csamplemd$Sample), ]

In [None]:
# llevar a cabo la prueba sobre el modelo completo
permanova_result <- adonis2(bray_dist ~ Sistema + pH + Humedad, 
                            data = metadata_ordered, 
                            permutations = 999,
                            method = "bray")
print(permanova_result)

In [None]:
# llevar a cabo la prueba sobre todas las variables
permanova_result <- adonis2(bray_dist ~ Sistema + pH + Humedad, 
                            data = metadata_ordered, 
                            permutations = 999,
                            by="margin",
                            method = "bray")
print(permanova_result)

In [None]:
# probar homogeneidad de dispersiones
disp_sistema <- betadisper(bray_dist, metadata_ordered$Sistema)
print(anova(disp_sistema))

# ANOSIM

In [None]:
# probar sistema
anosim_sistema <- anosim(bray_dist, metadata_ordered$Sistema, 
                         permutations = 999)
print(summary(anosim_sistema))

In [None]:
# probar caegorias de pH
anosim_ph_cat <- anosim(bray_dist, metadata_ordered$pH_cat, 
                        permutations = 999)
print(summary(anosim_ph_cat))

In [None]:
# probar humedad creando categorias
humedad_groups <- cut(metadata_ordered$Humedad,
                     breaks = quantile(metadata_ordered$Humedad, probs = c(0, 0.5, 1)),
                     labels = c("Low", "High"),
                     include.lowest = TRUE)

anosim_humedad <- anosim(bray_dist, humedad_groups, permutations = 999)
print(summary(anosim_humedad))

# Analisis de correspondencia canonica

In [None]:
# preparar metadatos
env_data <- metadata_ordered[, c("pH", "Humedad", "Altitud")]

In [None]:
# CCA con variables continuas
cca_result <- cca(otu_matrix_t ~ pH + Humedad + Altitud, 
                  data = metadata_ordered)

# resumen del CCA
print(summary(cca_result))

# significancia CCA
anova_cca_overall <- anova(cca_result, permutations = 999)
cat("\nOverall CCA model significance:\n")
print(anova_cca_overall)

In [None]:
# probar significancia por eje
anova_cca_axis <- anova(cca_result, by = "axis", permutations = 999)
print(anova_cca_axis)

In [None]:
# probar significancia por variable
anova_cca_terms <- anova(cca_result, by = "terms", permutations = 999)
cat("\nSignificance by environmental variables:\n")
print(anova_cca_terms)

In [None]:
# grafica de sitios y variables
plot(cca_result, type = "n", main = "CCA - Sites and Environment")
points(cca_result, display = "sites", 
       col = as.numeric(factor(metadata_ordered$Sistema)) + 1,
       pch = 16, cex = 1.5)
text(cca_result, display = "bp", col = "blue", cex = 1.2)
legend("topright", legend = unique(metadata_ordered$Sistema),
       col = 1:length(unique(metadata_ordered$Sistema)) + 1,
       pch = 16, cex = 0.8)

In [None]:
# grafica de especies 
plot(cca_result, type = "n", display = c("species"), 
     main = "CCA - Species")
# mostrar solo las especies mas abundantes
species_abundance <- colSums(otu_matrix_t)
top_species <- names(sort(species_abundance, decreasing = TRUE)[1:30])
points(cca_result, display = "species", pch = 3, col = "gray")

In [None]:
# grafica con sitios, especies y variables
plot(cca_result, type = "n", main = "CCA - Triplot")
points(cca_result, display = "sites", 
       col = as.numeric(factor(metadata_ordered$Sistema)) + 1,
       pch = 16, cex = 2)
points(cca_result, display = "species", pch = 3, col = "gray", cex = 0.5)
text(cca_result, display = "bp", col = "blue", cex = 1.2)
legend("topright", legend = unique(metadata_ordered$Sistema),
       col = 1:length(unique(metadata_ordered$Sistemat)) + 1,
       pch = 16, cex = 0.8)