In [1]:
library(tidyverse)
library(stringr)
library(ggplot2)

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.1 ──

[32m✔[39m [34mggplot2[39m 3.3.5     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.1.4     [32m✔[39m [34mdplyr  [39m 1.0.7
[32m✔[39m [34mtidyr  [39m 1.1.3     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 2.0.1     [32m✔[39m [34mforcats[39m 0.5.1

── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()



In [2]:
spectrum_generator <- function(count=100,coverage=2/3,offset=0,reverse=FALSE) {
    tau = 2*pi
    color = vector('list',3)
    coverage = ((-1)^reverse)*coverage
    offset = ((-1)^reverse)*offset
    for (iter in 1:3) {
        values = c()
        theta = (offset + iter - 1) * tau/3
        for (x in seq(0,tau*coverage,tau*coverage/(count-1))[1:count]) {
            values = c(values, cos(x + theta)/2 + 0.5)
        }
        color[[iter]] = values
    }
    return(rgb(color[[1]],color[[2]],color[[3]]))
}

In [3]:
statsfile_path = "PD.sumstats"

In [4]:
gwas_raw <- read.csv(statsfile_path, sep="\t")

In [5]:
names(gwas_raw)[names(gwas_raw) == "POS"] <- "BP"
gwas_raw$LOG10P <- -log10(gwas_raw$P)

In [6]:
head(gwas_raw)

Unnamed: 0_level_0,SNP,CHR,BP,A1,A2,REF,EAF,Beta,se,P,N,INFO,LOG10P
Unnamed: 0_level_1,<chr>,<int>,<int>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<dbl>,<dbl>
1,rs10399793,1,49298,T,C,T,0.376217,-0.000281739,0.000363748,0.44,459256,0.342797,0.35654732
2,rs2462492,1,54676,C,T,C,0.599414,0.000113573,0.000360334,0.75,459256,0.340158,0.12493874
3,rs3107975,1,55326,T,C,T,0.991551,0.00308387,0.00200589,0.12,459256,0.324228,0.92081875
4,rs74447903,1,57033,T,C,T,0.998221,-0.00311399,0.0044642,0.49,459256,0.296256,0.30980392
5,1:70728_C_T,1,70728,C,T,C,0.997834,-0.000368448,0.00363137,0.92,459256,0.365713,0.03621217
6,rs2462495,1,79033,A,G,A,0.00129077,-0.00526357,0.0046222,0.25,459256,0.536566,0.60205999


In [7]:
gwas_data <- gwas_raw

In [8]:
don <- gwas_data %>% 
  
  # Compute chromosome size
  group_by(CHR) %>% 
  summarise(chr_len=max(BP)) %>% 
  
  # Calculate cumulative position of each chromosome
  mutate(tot=cumsum(as.numeric(chr_len))-chr_len) %>%
  select(-chr_len) %>%
  
  # Add this info to the initial dataset
  left_join(gwas_data, ., by=c("CHR"="CHR")) %>%
  
  # Add a cumulative position of each SNP
  arrange(CHR, BP) %>%
  mutate(BPcum=BP+tot)

axisdf <- don %>% group_by(CHR) %>% summarize(center=( max(BPcum) + min(BPcum) ) / 2 )

spectrum <- spectrum_generator(
    count = 22,
    coverage = 5/6,
    offset = -0.25,
    reverse = TRUE
)

manplot <- ggplot(don, aes(x=BPcum, y=LOG10P)) +
    
    # Show all points
    geom_point( aes(color=as.factor(CHR)), alpha=1, size=0.2) +
    scale_color_manual(values = spectrum) +
    
    # custom X axis:
    scale_x_continuous( label = axisdf$CHR, breaks= axisdf$center ) +
    scale_y_continuous(expand = c(0, 0) ) +     # remove space between plot area and x axis
  
    # Custom theme:
    theme_bw() +
    theme( 
      legend.position="none",
      panel.border = element_blank(),
      panel.grid.major.x = element_blank(),
      panel.grid.minor.x = element_blank()
    )

In [9]:
ggsave('PD_manplot.png', plot=manplot, width=24, height=8, units='cm', dpi=600 )

In [20]:
gwas_data <- gwas_raw[abs(gwas_raw$Beta) > gwas_raw$se,]

In [23]:
head(gwas_data)

Unnamed: 0_level_0,SNP,CHR,BP,A1,A2,REF,EAF,Beta,se,P,N,INFO,LOG10P
Unnamed: 0_level_1,<chr>,<int>,<int>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<dbl>,<dbl>
3,rs3107975,1,55326,T,C,T,0.991551,0.00308387,0.00200589,0.12,459256,0.324228,0.9208188
6,rs2462495,1,79033,A,G,A,0.00129077,-0.00526357,0.0046222,0.25,459256,0.536566,0.60206
14,rs576404767,1,544584,C,T,C,0.99811,0.00336628,0.0032898,0.31,459256,0.492517,0.5086383
19,rs538567606,1,565196,T,C,T,0.99769,0.00447451,0.00349176,0.2,459256,0.369036,0.69897
37,rs375217967,1,568800,G,A,G,0.983068,0.00160406,0.00138525,0.25,459256,0.328855,0.60206
50,rs544671234,1,705942,A,T,A,0.996641,0.00246894,0.00226337,0.28,459256,0.607378,0.552842


In [24]:
don <- gwas_data %>% 
  
  # Compute chromosome size
  group_by(CHR) %>% 
  summarise(chr_len=max(BP)) %>% 
  
  # Calculate cumulative position of each chromosome
  mutate(tot=cumsum(as.numeric(chr_len))-chr_len) %>%
  select(-chr_len) %>%
  
  # Add this info to the initial dataset
  left_join(gwas_data, ., by=c("CHR"="CHR")) %>%
  
  # Add a cumulative position of each SNP
  arrange(CHR, BP) %>%
  mutate(BPcum=BP+tot)

axisdf <- don %>% group_by(CHR) %>% summarize(center=( max(BPcum) + min(BPcum) ) / 2 )

spectrum <- spectrum_generator(
    count = 22,
    coverage = 5/6,
    offset = -0.25,
    reverse = TRUE
)

manplot <- ggplot(don, aes(x=BPcum, y=LOG10P)) +
    
    # Show all points
    geom_point( aes(color=as.factor(CHR)), alpha=1, size=0.2) +
    scale_color_manual(values = spectrum) +
    
    # custom X axis:
    scale_x_continuous( label = axisdf$CHR, breaks= axisdf$center ) +
    scale_y_continuous(expand = c(0, 0) ) +     # remove space between plot area and x axis
  
    # Custom theme:
    theme_bw() +
    theme( 
      legend.position="none",
      panel.border = element_blank(),
      panel.grid.major.x = element_blank(),
      panel.grid.minor.x = element_blank()
    )

In [25]:
ggsave('PD_manplot_reduced.png', plot=manplot, width=24, height=8, units='cm', dpi=600 )