# Visualise GWAS output

## Patristic distances

In [1]:
library(readr)

# Construct the file path using environment variable
genomics_data_base <- Sys.getenv("GENOMICS_DATA_BASE")
file_path <- file.path(genomics_data_base, "annotations", "e_coli", "gene_sequences", "gene_presence_absence", "mortality_gene_presence_absence.plot")

# Read the file, skipping comment lines
plot.df <- read_tsv(
  file_path,
  comment = "#",
  col_names = c("CHR", "SNP", "BP", "minLOG10_p", "log10_p", "r2")
)

plot.df$p <- 10^(-plot.df$log10_p)
plot.df$SNP <- paste(plot.df$CHR, plot.df$BP, sep = "_")

head(plot.df)

[1mRows: [22m[34m8440[39m [1mColumns: [22m[34m6[39m
[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m "\t"
[31mchr[39m (2): SNP, BP
[32mdbl[39m (4): CHR, minLOG10_p, log10_p, r2

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


CHR,SNP,BP,minLOG10_p,log10_p,r2,p
<dbl>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>
1,1_8712,8712,0.00480371,0.00480371,0,0.989
1,1_dgoR,dgoR,0.00480371,0.00480371,0,0.989
1,1_yehL~~~moxR,yehL~~~moxR,0.00480371,0.00480371,0,0.989
1,1_ygfK,ygfK,0.00480371,0.00480371,0,0.989
1,1_ybcH,ybcH,0.00480371,0.00480371,0,0.989
1,1_ydbL,ydbL,0.00480371,0.00480371,0,0.989


## Bonferroni correction

In [None]:
p.value.cutoff = -log10(0.05/nrow(plot.df))
p.value.cutoff

In [11]:
plot.df[plot.df$log10_p > p.value.cutoff, ]

CHR,SNP,BP,minLOG10_p,log10_p,r2,p
<dbl>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>


## holm (FDR) correction

In [13]:
p_values <- plot.df$p
alpha <- 0.05

# Use the p.adjust function with method=["bonferroni", "holm", "hochberg", "hommel", "BH", "BY"]
adjusted_p_values <- p.adjust(p_values, method = "holm")

significant <- adjusted_p_values < alpha

signif.genes <- plot.df$SNP[significant]

signif.genes

## hochberg correction

In [14]:
p_values <- plot.df$p
alpha <- 0.05

# Use the p.adjust function with method=["bonferroni", "holm", "hochberg", "hommel", "BH", "BY"]
adjusted_p_values <- p.adjust(p_values, method = "hochberg")

significant <- adjusted_p_values < alpha

signif.genes <- plot.df$SNP[significant]

signif.genes

## hommel correction

In [15]:
p_values <- plot.df$p
alpha <- 0.05

# Use the p.adjust function with method=["bonferroni", "holm", "hochberg", "hommel", "BH", "BY"]
adjusted_p_values <- p.adjust(p_values, method = "hommel")

significant <- adjusted_p_values < alpha

signif.genes <- plot.df$SNP[significant]

signif.genes

## BH correction

In [16]:
p_values <- plot.df$p
alpha <- 0.05

# Use the p.adjust function with method=["bonferroni", "holm", "hochberg", "hommel", "BH", "BY"]
adjusted_p_values <- p.adjust(p_values, method = "BH")

significant <- adjusted_p_values < alpha

signif.genes <- plot.df$SNP[significant]

signif.genes

## BY correction

In [17]:
p_values <- plot.df$p
alpha <- 0.05

# Use the p.adjust function with method=["bonferroni", "holm", "hochberg", "hommel", "BH", "BY"]
adjusted_p_values <- p.adjust(p_values, method = "BY")

significant <- adjusted_p_values < alpha

signif.genes <- plot.df$SNP[significant]

signif.genes