# Filter imputed SNPs based on r<sup>2</sup> value

Get list of imputed and non-imputed SNPs

In [None]:
# R code
# set working directory
setwd("/nesi/nobackup/project/project_2/imputation-highCov-allTests/")

# thin5
thin5 <- read.delim("./test_4-combination-gp/target-sample1-thin5_beagle5-w60-i24-st1600-sg6-gp.r2", header = FALSE)
thin5a <- thin5[thin5$V10 == "1",]
thin5b <- as.data.frame(thin5a[,3])
head(thin5b)
nrow(thin5b)
write.table(thin5b, "./merge/thin5-imputedsnps.list", col.names = FALSE, row.names = FALSE, quote = FALSE)


Merge imputed files across samples and split into imputed vs non-imputed SNPs

In [None]:
#!/bin/bash -e

#SBATCH --job-name      results-merge
#SBATCH --mem           10G
#SBATCH --time          5:00:00
#SBATCH --cpus-per-task 12
#SBATCH --account       project
#SBATCH --partition=milan
#SBATCH --output=/nesi/nobackup/project/project_2/imputation-highCov-allTests/merge/logs/%x-%j.out

# load modules
module load BCFtools/1.19-GCC-11.3.0
module load SAMtools/1.19-GCC-12.3.0
module load VCFtools/0.1.15-GCC-9.2.0-Perl-5.30.1

# set working directory
cd /nesi/nobackup/project/project_2/imputation-highCov-allTests/

# MERGE FILES
# thin5
bcftools merge ./test_4-combination-gp/target-*-thin5_beagle5-w60-i24-st1600-sg6-gp.vcf.gz --info-rules AF:join,DR2:avg -Oz -o ./merge/imputed-thin5-w60-i24-st1600-sg6-gp.vcf.gz

# set working directory
cd /nesi/nobackup/project/project_2/imputation-highCov-allTests/merge

## SPLIT FILES INTO IMPUTED AND NON-IMPUTED
# thin5
bcftools view -e 'ID=@thin5-imputedsnps.list' imputed-thin5-w60-i24-st1600-sg6-gp.vcf.gz -Oz -o \
imputed-thin5-w60-i24-st1600-sg6-gp-nonImputed.vcf.gz
tabix -f imputed-thin5-w60-i24-st1600-sg6-gp-nonImputed.vcf.gz
bcftools stats -s "-" imputed-thin5-w60-i24-st1600-sg6-gp-nonImputed.vcf.gz > imputed-thin5-w60-i24-st1600-sg6-gp-nonImputed.stats

bcftools view -i 'ID=@thin5-imputedsnps.list' imputed-thin5-w60-i24-st1600-sg6-gp.vcf.gz -Oz -o \
imputed-thin5-w60-i24-st1600-sg6-gp-imputed.vcf.gz
tabix -f imputed-thin5-w60-i24-st1600-sg6-gp-imputed.vcf.gz
bcftools stats -s "-" imputed-thin5-w60-i24-st1600-sg6-gp-imputed.vcf.gz > imputed-thin5-w60-i24-st1600-sg6-gp-imputed.stats


Get list of SNPs below accuracy threshold

In [None]:
# load libraries
library(data.table)

# set working directory
setwd("/nesi/nobackup/project/project_2/imputation-highCov-allTests/merge")

#### list of snps to remove ####b

thinList <- list("thin5", "thin27", "thin45", "thin62", "thin93", "thinSnpchip", "thinRadseq")

for (thin in thinList){
  variant <- read.delim(paste0("imputed-",thin,"-w60-i24-st1600-sg6-gp.vcf_per_variant_results.txt"))
  variant8 <- variant[variant$r2 < 0.8,]
  variant8a <- as.data.frame(variant8[,1])
  colnames(variant8a) <- c("position")
  variant8a$position <- gsub(":", "_", variant8a$position)
  assign(paste0(thin,"-0p8-remove"),variant8a)
  variant6 <- variant[variant$r2 < 0.6,]
  variant6a <- as.data.frame(variant6[,1])
  colnames(variant6a) <- c("position")
  variant6a$position <- gsub(":", "_", variant6a$position)
  assign(paste0(thin,"-0p6-remove"),variant6a)
}

for (thin in thinList){
  # output SNPs < 0.8
  file8 <- get(paste0(thin,"-0p8-remove"))
  write.table(file8, file = paste0(thin,"-0p8-remove.list"), quote = FALSE, row.names = FALSE, col.names = FALSE)
  # output SNPs < 0.6
  file6 <- get(paste0(thin,"-0p6-remove"))
  write.table(file6, file = paste0(thin,"-0p6-remove.list"), quote = FALSE, row.names = FALSE, col.names = FALSE)
}

Filter imputed SNPs

In [None]:
#!/bin/bash -e

#SBATCH --job-name      merge-filter_site
#SBATCH --mem           10G
#SBATCH --time          2:00:00
#SBATCH --cpus-per-task=12
#SBATCH --account       project

# set up environment
module load BCFtools/1.19-GCC-11.3.0
module load SAMtools/1.19-GCC-12.3.0
module load VCFtools/0.1.15-GCC-9.2.0-Perl-5.30.1

# set working directory
cd /nesi/nobackup/project/project_2/imputation-highCov-allTests/merge

for thin in thin5 thin27 thin45 thin62 thin93 thinSnpchip thinRadseq
    do
    # filter imputed file with DR2 >= 0.8
    bcftools view -e ID=@${thin}-0p8-remove.list imputed-${thin}-w60-i24-st1600-sg6-gp-imputed.vcf.gz -Oz -o \
    imputed-${thin}-w60-i24-st1600-sg6-gp-imputed-r2_0p8.vcf.gz
    tabix -f imputed-${thin}-w60-i24-st1600-sg6-gp-imputed-r2_0p8.vcf.gz
    # filter imputed file with DR2 >= 0.6
    bcftools view -e ID=@${thin}-0p6-remove.list imputed-${thin}-w60-i24-st1600-sg6-gp-imputed.vcf.gz -Oz -o \
    imputed-${thin}-w60-i24-st1600-sg6-gp-imputed-r2_0p6.vcf.gz
    tabix -f imputed-${thin}-w60-i24-st1600-sg6-gp-imputed-r2_0p6.vcf.gz
    ## merge imputed with non-imputed
    # 0.8
    bcftools concat -a imputed-${thin}-w60-i24-st1600-sg6-gp-nonImputed.vcf.gz imputed-${thin}-w60-i24-st1600-sg6-gp-imputed-r2_0p8.vcf.gz | \
    bcftools sort -Oz -o imputed-${thin}-w60-i24-st1600-sg6-gp-r2_0p8.vcf.gz
    bcftools stats -s "-" imputed-${thin}-w60-i24-st1600-sg6-gp-r2_0p8.vcf.gz > imputed-${thin}-w60-i24-st1600-sg6-gp-r2_0p8.stats
    # 0.6
    bcftools concat -a imputed-${thin}-w60-i24-st1600-sg6-gp-nonImputed.vcf.gz imputed-${thin}-w60-i24-st1600-sg6-gp-imputed-r2_0p6.vcf.gz | \
    bcftools sort -Oz -o imputed-${thin}-w60-i24-st1600-sg6-gp-r2_0p6.vcf.gz
    bcftools stats -s "-" imputed-${thin}-w60-i24-st1600-sg6-gp-r2_0p6.vcf.gz > imputed-${thin}-w60-i24-st1600-sg6-gp-r2_0p6.stats


# Calculate observed individual heterozygosity

In [None]:
#!/bin/bash -e

#SBATCH --job-name      vcf-heterozygosity
#SBATCH --mem           10G
#SBATCH --time          1:00:00
#SBATCH --cpus-per-task=12
#SBATCH --account       project

# load programs
module load VCFtools/0.1.15-GCC-9.2.0-Perl-5.30.1

# set working directory
cd /nesi/nobackup/project/project_2/vcftools-het/output

# ground truth
vcftools --het --gzvcf /nesi/nobackup/project/project_2/imputation-input/ref/hihi_wgs_filter_highcov_no83318-autosomes-snpID-noMissing-noSingleton-2allele.vcf.gz \
--out hihi_wgs_filter_highcov_no83318-autosomes-snpID-noMissing-noSingleton-2allele

# thin tests
for t in 5 27 45 62 93 Snpchip Radseq
    do
    for f in gp gp-r2_0p8 gp-r2_0p6
        do
        vcftools --het --gzvcf /nesi/nobackup/project/project_2/imputation-highCov-allTests/merge/imputed-thin${t}-w60-i24-st1600-sg6-${f}.vcf.gz \
        --out imputed-thin${t}-w60-i24-st1600-sg6-${f}
        done
    done

# combine results
grep "" imputed*.het > all_imputed_results.het


Plot heterozygosity

In [None]:
# R code
# load packages
library(tidyr)
library(ggplot2)

# set working directory
setwd("/nesi/project/project/project_2/2Imputation/vcftools-het/output")

# load nSNPs_postImputation
nsnp <- read.delim("/nesi/project/project/project_2/2Imputation/vcftools-het/codes/nSNP_postImputation.txt")

# imputed results
het <- read.delim("all_imputed_results.het")
het2 <- separate_wider_delim(het, cols = imputed.thin27.w60.i24.st1600.sg6.gp.het.INDV, delim = ":", names = c("temp", "INDV"))
het3 <- separate_wider_delim(het2, cols = temp, delim = "-", names = c("temp1", "test", "window", "iter", "impState", "impSeg", "temp2"), too_many = "merge")
het4 <- separate_wider_delim(het3, cols = temp2, delim = ".", names = c("temp3", "temp4"), too_many = "merge")
het5 <- separate_wider_delim(het4, cols = temp3, delim = "_", names = c("temp5",  "filter"), too_few = "align_start")
het5 <- het5 %>% replace_na(list(filter = "none"))
het5$N_SITES <- as.numeric(het4$N_SITES)
het6 <- na.omit(het5)
het6$N_SITES_2 <- nsnp$nSNP_postImputation[match(paste0(het6$test,het6$filter),paste0(nsnp$test, nsnp$filter))]
het6$O.HOM. <- as.numeric(het6$O.HOM.)
het6$O.HET <- het6$N_SITES - het6$O.HOM.
het6$propO.HET <- (het6$O.HET / het6$N_SITES_2)
het7 <- het6[,c("INDV", "propO.HET", "test", "filter")]



# Calculate allele frequency

In [None]:
#!/bin/bash -e

#SBATCH --job-name      2024-10-02_results-merge
#SBATCH --mem           10G
#SBATCH --time          5:00:00
#SBATCH --cpus-per-task 12
#SBATCH --account       project
#SBATCH --partition=milan
#SBATCH --output=/nesi/nobackup/project/project_2/imputation-highCov-allTests/merge/logs/%x-%j.out

# load modules
module load BCFtools/1.19-GCC-11.3.0
module load VCFtools/0.1.15-GCC-9.2.0-Perl-5.30.1

# set working directory
cd /nesi/nobackup/project/project_2/imputation-highCov-allTests/merge

# calculate allele frequency using vcftools
vcftools --gzvcf imputed-thin5-w60-i24-st1600-sg6-gp.vcf.gz --freq --out imputed-thin5-w60-i24-st1600-sg6-gp-vcftools


Plot SFS

In [None]:
# R code
# load packages
options(scipen = 999)
library(tidyr)
library(ggplot2)
library(cowplot)
library(patchwork)   
library(grid)
library(gridExtra)
library(ggpattern)

# IMPORT DATA
#### ground truth ####
truth <- read.delim("/nesi/project/project/project_2/2Imputation/imputation-input/ref/hihi_wgs_filter_highcov_no83318-autosomes-snpID-noMissing-noSingleton-2allele-vcftools.frq", fill = TRUE, row.names = NULL)
colnames(truth) <- c("chrom", "pos", "n_alleles", "n_chr", "temp1", "temp2")
truth2 <- separate_wider_delim(truth, cols = temp1, delim = ":", names = c("allele1", "allele1freq"))
truth2 <- separate_wider_delim(truth2, cols = temp2, delim = ":", names = c("allele2", "allele2freq"))
truth2$allele1freq <- as.numeric(truth2$allele1freq)
truth2$allele2freq <- as.numeric(truth2$allele2freq)
head(truth2)

# query allele 2
quantile(truth2$allele2freq)

# fold allele 2 (alt)
truth2$allele2freqFold <- ifelse(truth2$allele2freq > 0.5,
                                 1-truth2$allele2freq,
                                 truth2$allele2freq)
quantile(truth2$allele2freqFold)
truth2$type <- ifelse(truth2$allele2freqFold == 0,
                     "zero",
                     "notZero")

truthPlot <- ggplot(truth2, aes(x=allele2freqFold)) +
  geom_histogram_pattern(fill = "#44AA99",
                         binwidth = 0.1,
                         aes(pattern = type),
                         pattern_fill = "black",
                         pattern_density = 0.1,
                         pattern_spacing = 0.025) +
  theme_classic() +
  labs(title = "Ground truth\n2.87K SNPs/Mb", subtitle = "", x = "Minor allele frequency", y = "Number of SNPs") +
  ggtitle("Ground truth\n2.87K SNPs/Mb") +
  scale_y_continuous(labels = scales::comma) +
  scale_pattern_manual(values = c(zero = "stripe", notZero = "none")) +
  theme(axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        plot.title = element_text(size = 12, face = "bold"),
        plot.subtitle = element_text(size = 12),
        legend.position = "none") +
  guides(pattern = guide_legend(override.aes = list(fill = "white")))

#### RAW ####
setwd("/nesi/nobackup/project/project_2/imputation-highCov-allTests/merge")

tests <- c("thin5", "thin27", "thin45", "thin62", "thin93", "thinSnpchip", "thinRadseq")
#rawTitles <- c("Missingness 5%", "27%", "45%", "62%", "93%", "98% (SNP-chip)", "99% (RADseq)")
rawTitles <- c("SNP density (SNPs/Mb)\n2.73K", " \n2.10K", " \n1.58K", " \n1.09K", " \n197", " \n48.9 (SNP-chip)", " \n19.6 (RADseq)")
rawSubtitles <- c("raw imputed", "", "", "", "", "", "")

for (i in 1:7) {
  thin <- read.delim(paste0("imputed-",tests[i],"-w60-i24-st1600-sg6-gp-vcftools.frq"), fill = TRUE, row.names = NULL)
  colnames(thin) <- c("chrom", "pos", "n_alleles", "n_chr", "temp1", "temp2")
  thin2 <- separate_wider_delim(thin, cols = temp1, delim = ":", names = c("allele1", "allele1freq"))
  thin2 <- separate_wider_delim(thin2, cols = temp2, delim = ":", names = c("allele2", "allele2freq"))
  thin2$allele1freq <- as.numeric(thin2$allele1freq)
  thin2$allele2freq <- as.numeric(thin2$allele2freq)
  head(thin2)
  # fold allele 2 (alt)
  thin2$allele2freqFold <- ifelse(thin2$allele2freq > 0.5,
                                  1-thin2$allele2freq,
                                  thin2$allele2freq)
  quantile(thin2$allele2freqFold)
  thin2$type <- ifelse(thin2$allele2freqFold == 0,
                        "zero",
                        "notZero")
  plot <- ggplot(thin2, aes(x=allele2freqFold)) +
    geom_histogram_pattern(fill = "#88CCEE",
                           binwidth = 0.1,
                           aes(pattern = type),
                           pattern_fill = "black",
                           pattern_density = 0.1,
                           pattern_spacing = 0.025) +
    theme_classic() +
    labs(title = rawTitles[i], subtitle = rawSubtitles[i]) +
    ggtitle(paste0(rawTitles[i])) +
    scale_y_continuous(labels = scales::comma, limits = c(0,1200000)) +
    scale_pattern_manual(values = c(zero = "stripe", notZero = "none")) +
    theme(axis.title.x = element_blank(),
          axis.title.y = element_blank(),
          plot.title = element_text(size = 12, face = "bold"),
          plot.subtitle = element_text(size = 12),
          axis.text.y = element_blank(),
          legend.position = "none") +
    guides(pattern = guide_legend(override.aes = list(fill = "white")))
  assign(paste0("plot-raw_",tests[i]), plot, envir = .GlobalEnv)
}

#### FILTER R2 >=0.6 ####
setwd("/nesi/nobackup/project/project_2/imputation-highCov-allTests/merge")
tests <- c("thin5", "thin27", "thin45", "thin62", "thin93", "thinSnpchip", "thinRadseq")
sixSubtitles <- c(expression(paste("r"^2,"\u22650.6")),"","","","","","")

for (i in 1:7) {
  thin <- read.delim(paste0("imputed-",tests[i],"-w60-i24-st1600-sg6-gp-r2_0p6-vcftools.frq"), fill = TRUE, row.names = NULL)
  colnames(thin) <- c("chrom", "pos", "n_alleles", "n_chr", "temp1", "temp2")
  thin2 <- separate_wider_delim(thin, cols = temp1, delim = ":", names = c("allele1", "allele1freq"))
  thin2 <- separate_wider_delim(thin2, cols = temp2, delim = ":", names = c("allele2", "allele2freq"))
  thin2$allele1freq <- as.numeric(thin2$allele1freq)
  thin2$allele2freq <- as.numeric(thin2$allele2freq)
  head(thin2)
  # fold allele 2 (alt)
  thin2$allele2freqFold <- ifelse(thin2$allele2freq > 0.5,
                                  1-thin2$allele2freq,
                                  thin2$allele2freq)
  quantile(thin2$allele2freqFold)
  thin2$type <- ifelse(thin2$allele2freqFold == 0,
                       "zero",
                       "notZero")
  plot <- ggplot(thin2, aes(x=allele2freqFold)) +
    geom_histogram_pattern(fill = "#74a1d4",
                           binwidth = 0.1,
                           aes(pattern = type),
                           pattern_fill = "black",
                           pattern_density = 0.1,
                           pattern_spacing = 0.025) +
    theme_classic() +
    labs(title = "", subtitle = sixSubtitles[i]) +
    scale_y_continuous(labels = scales::comma, limits = c(0,600000)) +
    scale_pattern_manual(values = c(zero = "stripe", notZero = "none")) +
    theme(axis.title.x = element_blank(),
          axis.title.y = element_blank(),
          axis.text.y = element_blank(),
          plot.title = element_text(size = 12, face = "bold"),
          plot.subtitle = element_text(size = 12),
          legend.position = "none") +
    guides(pattern = guide_legend(override.aes = list(fill = "white")))
  assign(paste0("plot-0p6_",tests[i]), plot, envir = .GlobalEnv)
}

#### FILTER R2 >=0.8 ####
setwd("/nesi/nobackup/project/project_2/imputation-highCov-allTests/merge")

tests <- c("thin5", "thin27", "thin45", "thin62", "thin93", "thinSnpchip", "thinRadseq")
eightSubtitles <- c(expression(paste("r"^2,"\u22650.8")),"","","","","","")


for (i in 1:7) {
  thin <- read.delim(paste0("imputed-",tests[i],"-w60-i24-st1600-sg6-gp-r2_0p8-vcftools.frq"), fill = TRUE, row.names = NULL)
  colnames(thin) <- c("chrom", "pos", "n_alleles", "n_chr", "temp1", "temp2")
  thin2 <- separate_wider_delim(thin, cols = temp1, delim = ":", names = c("allele1", "allele1freq"))
  thin2 <- separate_wider_delim(thin2, cols = temp2, delim = ":", names = c("allele2", "allele2freq"))
  thin2$allele1freq <- as.numeric(thin2$allele1freq)
  thin2$allele2freq <- as.numeric(thin2$allele2freq)
  head(thin2)
  # fold allele 2 (alt)
  thin2$allele2freqFold <- ifelse(thin2$allele2freq > 0.5,
                                  1-thin2$allele2freq,
                                  thin2$allele2freq)
  quantile(thin2$allele2freqFold)
  thin2$type <- ifelse(thin2$allele2freqFold == 0,
                       "zero",
                       "notZero")
  plot <- ggplot(thin2, aes(x=allele2freqFold)) +
    geom_histogram_pattern(fill = "#5E77BB",
                           binwidth = 0.1,
                           aes(pattern = type),
                           pattern_fill = "black",
                           pattern_density = 0.1,
                           pattern_spacing = 0.025) +
    theme_classic() +
    labs(title = "", subtitle = eightSubtitles[i]) +
    scale_y_continuous(labels = scales::comma, limits = c(0,600000)) +
    scale_pattern_manual(values = c(zero = "stripe", notZero = "none")) +
    theme(axis.title.x = element_blank(),
          axis.title.y = element_blank(),
          axis.text.y = element_blank(),
          plot.title = element_text(size = 12, face = "bold"),
          plot.subtitle = element_text(size = 12),
          legend.position = "none") +
    guides(pattern = guide_legend(override.aes = list(fill = "white")))
  assign(paste0("plot-0p8_",tests[i]), plot, envir = .GlobalEnv)
}

## plot


# combine plots
plot_sfs <- plot_grid(plot_grid(truthPlot + ylim(0,1200000), `plot-raw_thin5`, `plot-raw_thin27`, `plot-raw_thin45`, `plot-raw_thin62`, `plot-raw_thin93`, `plot-raw_thinSnpchip`, `plot-raw_thinRadseq`, nrow = 1, align = "h", axis = "bt", rel_widths = c(1.3,1,1,1,1,1,1,1)),
          plot_grid(truthPlot + theme(plot.title = element_blank()) + ylim(0,600000), `plot-0p6_thin5`, `plot-0p6_thin27`, `plot-0p6_thin45`, `plot-0p6_thin62`, `plot-0p6_thin93`, `plot-0p6_thinSnpchip`, `plot-0p6_thinRadseq`, nrow = 1, align = "h", axis = "bt", rel_widths = c(1.3,1,1,1,1,1,1,1)),
          plot_grid(truthPlot + theme(plot.title = element_blank()) +ylim(0,600000), `plot-0p8_thin5`, `plot-0p8_thin27`, `plot-0p8_thin45`, `plot-0p8_thin62`, `plot-0p8_thin93`, `plot-0p8_thinSnpchip`, `plot-0p8_thinRadseq`, nrow = 1, align = "h", axis = "bt", rel_widths = c(1.3,1,1,1,1,1,1,1)),
          ncol = 1)

# create common x and y labels
y.grob <- textGrob("Number of SNPs", 
                   gp=gpar(fontface="bold", fontsize=15), rot=90)
x.grob <- textGrob("Minor allele frequency", 
                   gp=gpar(fontface="bold", fontsize=15))

png("/nesi/project/project/project_2/2Imputation/sfs/imputation-highCov-foldedSfs-filtered-v7.png", width = 14, height = 8, units = "in", res = 300)
grid.arrange(plot_sfs, left = y.grob, bottom = x.grob)
dev.off()


# Runs of homozygosity

R script

In [None]:
args <- commandArgs(trailingOnly = TRUE)
str(args)
cat(args, sep = "\n")

# load packages
library(ggplot2)
library(data.table)
library(tidyr)
library(dplyr)
library(RZooRoH)

# set working directory
setwd("/nesi/nobackup/project/project_2/rzooroh/")

## import data
# Specifying the data file and genotype / sequence format
imputed_GP <- zoodata(genofile = paste0("./input/imputed-",args[1],"-gp-r2_",args[2],".gen"), zformat = "gp", samplefile = "./codes/sampleFile.txt", supcol = 6, chrcol = 1, poscol = 4)

## define the model
# model
# err=0.25% (0.0025) - Laura
mix13R <- zoomodel(K=13,krates=c( 10, 20, 30, 40, 50, 100, 200, 500, 600, 700, 1000, 2000, 2000),err=0.0025,layers = TRUE)

## run model
results <- zoorun(mix13R, imputed_GP, localhbd = TRUE, nT = 12)
name <- paste0("imputed-",args[1],"-gp-r2_",args[2],"_results")
assign(name, results)

# save results
save(list = name, file = paste0("/nesi/nobackup/project/project_2/rzooroh/output/imputed-",args[1],"-gp-r2_",args[2],"_results.RData"))


To run R script

In [None]:
#!/bin/bash -e

#SBATCH --job-name      RZooROH_K13-imputed-gp-v2
#SBATCH --mem           90GB
#SBATCH --time          15:00:00
#SBATCH --cpus-per-task 12
#SBATCH --array         1-14
#SBATCH --account       project

## load modules
module load BCFtools/1.19-GCC-11.3.0

## extract variables
# reference config file
config=/nesi/nobackup/project/project_2/rzooroh/codes/array-v2.config

# extract variables
test=$(awk -v ArrayTaskID=$SLURM_ARRAY_TASK_ID '$1==ArrayTaskID {print $2}' $config)
filter=$(awk -v ArrayTaskID=$SLURM_ARRAY_TASK_ID '$1==ArrayTaskID {print $3}' $config)

## make input files
# recode a VCF to the Oxford GEN format (zformat is genotype)
cd /nesi/nobackup/project/project_2/rzooroh/input

bcftools convert --gensample imputed-${test}-gp-r2_${filter} --3N6 --tag GT /nesi/nobackup/project/project_2/imputation-highCov-allTests/merge/imputed-${test}-w60-i24-st1600-sg6-gp-r2_${filter}.vcf.gz
gunzip -f imputed-${test}-gp-r2_${filter}.gen.gz

## run RZooRoH
module load R/4.3.2-foss-2023a

# run script
cd /nesi/nobackup/project/project_2/rzooroh/codes
echo "Executing R ..."
srun Rscript --vanilla RZooRoH_K13-imputed-gp-v2.R ${test} ${filter}
echo "R habis."