# GWAS in three cohorts
Author: Jose Jaime Martinez-Magana
Day: 04172025
In this analysis an GWAS was performed using Regenie for three cohorts in the LAGC.

In [None]:
## Perform a PCA
# Request computational resources
srun --pty --mem=128G --time=02:00:00 -p day bash
# Upload PLINK for LD pruning
module load PLINK

## First, we will perform an LD pruning with plink
in="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas"
# Perform first pass of LD pruning
# MxGDAR-Fz1
plink2 --pfile ${in}00mxgdarfz1/00databases/01genotypes/01forgwas/mxgdar1_merged\
    --indep-pairwise 50 5 0.2 --geno 0.01 --mind 0.01 --maf 0.05\
    --out ${in}00mxgdarfz1/00databases/01genotypes/00forpca/mxgdar1_merged_indep_snps
plink2 --pfile ${in}00mxgdarfz1/00databases/01genotypes/01forgwas/mxgdar1_merged\
    --extract ${in}00mxgdarfz1/00databases/01genotypes/00forpca/mxgdar1_merged_indep_snps.prune.in --make-bed\
    --out ${in}00mxgdarfz1/00databases/01genotypes/00forpca/mxgdar1_merged_forpcair

# MxGDAR-Fz2
plink2 --pfile ${in}01mxgdarfz2/00databases/01genotypes/01forgwas/mxgdar2_merged\
    --indep-pairwise 50 5 0.2 --geno 0.01 --mind 0.01 --maf 0.05\
    --out ${in}01mxgdarfz2/00databases/01genotypes/00forpca/mxgdar2_merged_indep_snps
plink2 --pfile ${in}01mxgdarfz2/00databases/01genotypes/01forgwas/mxgdar2_merged\
    --extract ${in}01mxgdarfz2/00databases/01genotypes/00forpca/mxgdar2_merged_indep_snps.prune.in --make-bed\
    --out ${in}01mxgdarfz2/00databases/01genotypes/00forpca/mxgdar2_merged_forpcair

# BHRC
plink2 --pfile ${in}02bhrc/00databases/01genotypes/01forgwas/bhrc_merged\
    --indep-pairwise 50 5 0.2 --geno 0.01 --mind 0.01 --maf 0.05\
    --out ${in}02bhrc/00databases/01genotypes/00forpca/bhrc_merged_indep_snps
plink2 --pfile ${in}02bhrc/00databases/01genotypes/01forgwas/bhrc_merged\
    --extract ${in}02bhrc/00databases/01genotypes/00forpca/bhrc_merged_indep_snps.prune.in --make-bed\
    --out ${in}02bhrc/00databases/01genotypes/00forpca/bhrc_merged_forpcair

### PCA with PCAiR

In [None]:
#!/bin/bash
#SBATCH --job-name=pca_on_merged_data
#SBATCH --out="slurm-%j.out"
#SBATCH --time=04:00:00
#SBATCH --nodes=1 --ntasks=1
#SBATCH --mem-per-cpu=128G
#SBATCH --mail-type=ALL
#SBATCH --partition=day
####################################################################################
# Script for PCA without reference in several cohorts
# day: 08 April 2025
# analyzer: Jose Jaime Martinez-Magana
# cluster: Grace - HPC Yale
####################################################################################
## Load modules to cluster
# Load Genesis for PCA analysis
module load miniconda;conda activate genesis_pca
# Execute Rscript
Rscript pca.R

####################################################################################
# Add this into a pca.R file
####################################################################################
# load libraries
library(GENESIS)
library(GWASTools)
library(SNPRelate)
library(SeqArray)
library(parallel)
library(BiocParallel);

############################################################################################
## !!! This is the only section that needs adjustment based on your data !!! ##
## Set parameters
# Change directory to the directory where the PLINK LD pruned files are stored
wkdir="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/00mxgdarfz1/00databases/01genotypes/00forpca"
setwd(wkdir)
# Create a list of files for input to PCAiR
filename="mxgdar1_merged_forpcair"
bed_f=paste0(filename,".bed",sep="")
bim_f=paste0(filename,".bim",sep="")
fam_f=paste0(filename,".fam",sep="")

# Create output files for a GDS file, keep the GDS extension
gds_output="mxgdar1_merged_forpcair.gds"

# Create a file name to store the PCA output, keep the CSV extension
outfile_name="mxgdar1_pcs32_04122025.csv"

# create gds output files
snpgdsBED2GDS(bed.fn=bed_f,
              bim.fn=bim_f,
              fam.fn=fam_f,
              family=TRUE,
              out.gdsfn=gds_output)
####################################################################################
# establishing a seed
set.seed(1000)
# establishing cores
cores=detectCores()
####################################################################################
# Create functions
# Create a function for LD pruning
ld_prun = function(gds){
snpset=snpgdsLDpruning(gds,
                       method="corr",
                       slide.max.bp=10e6,
                       ld.threshold=sqrt(0.1),
                       maf=0.01,
                       missing.rate=0.01,
                       verbose=TRUE, num.thread=cores);
    pruned=unlist(snpset,
                  use.names=FALSE);
return(pruned)
}

# Create a function for KING-robust analysis
king_mat = function(gds){
samp.id=read.gdsn(index.gdsn(gds, "sample.id"))
ibd.robust=snpgdsIBDKING(gds, sample.id=samp.id, family.id=NULL, maf=0.01,missing.rate=0.01,num.thread=cores)
return(ibd.robust)
}

# Create a function for PCAiR
pcair_r = function(gds_geno, pruned, KINGmat){
pcair=pcair(gds_geno, snp.include=pruned,
            kinobj=KINGmat, divobj=KINGmat)
return(pcair)
}
####################################################################################
# set input data
gds=gds_output

# run analysis for PCs with relationships
# open the GDS object
gds=snpgdsOpen(gds);
# LD pruning
pruned=ld_prun(gds)
# build KING matrix
KINGmat=king_mat(gds)
# adjust KING matrix
KINGmat_m=KINGmat$kinship
# add sampleIDs to colnames y row names
colnames(KINGmat_m)=KINGmat$sample.id
rownames(KINGmat_m)=KINGmat$sample.id
# get samples in gds
gds_samples=read.gdsn(index.gdsn(gds, "sample.id"))
# close the gds object
snpgdsClose(gds)
####################################################################################
# Warning: review this step!!!!!
# read the GDS object
gds=GdsGenotypeReader(filename=gds_output)

# create a GenotypeData class object
gds_geno=GenotypeData(gds)

# run PCAiR
PCair=pcair_r(gds_geno, pruned, KINGmat_m)

# Create a file of PCs
PCair_df=as.data.frame(PCair$vectors)
# assign colnames
colnames(PCair_df)=paste0("PC",rep(1:32))
# add IID
PCair_df$IID=row.names(PCair_df)

# save file
write.csv(file=outfile_name,
         PCair_df,
         quote=FALSE,
         row.names=FALSE)

### Running GWAS

In [None]:
## Recode pgen to bgen 
# The pgen file gave an error in the psam file
module load PLINK
plink2 --pfile bhrc_merged --export bgen-1.1 --snps-only 'just-acgt' --max-alleles 2 --out bhrc_merged --threads 30

# Transform pgen to bfile for step1
plink2\
    --pfile bhrc_merged\
    --maf 0.01 --mac 100 --geno 0.1 --hwe 1e-10 --mind 0.1\
    --write-snplist --write-samples --no-id-header --make-bed --snps-only 'just-acgt' --max-alleles 2\
    --out bhrc_merged

In [None]:
#!/bin/bash
#SBATCH --job-name=step1_bhrc
#SBATCH --out="slurm-%j.out"
#SBATCH --time=6-23:00:00
#SBATCH --nodes=1 --ntasks=1
#SBATCH --mem-per-cpu=128G
#SBATCH --mail-type=ALL
#SBATCH --partition=week
####################################################################################
# Script for GWAS of smoking traits in BHRC
# day: 18 April 2025
# analyzer: Jose Jaime Martinez-Magana
# cluster: Grace - HPC Yale
####################################################################################
## Load modules to cluster
# Load Regenie for GWAS analysis
module load miniconda;conda activate regenie_env_v01202024

## Set parameters
# Set genotype data
inge="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/02bhrc/00databases/01genotypes/01forgwas/bhrc_merged"
# Set phenotype and covariates files for females
female_inph_bt="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/02bhrc/00databases/00phenotype/bhrc_pheno_female_pheno_bt_forregenie_v04182025.txt"
female_inph_qt="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/02bhrc/00databases/00phenotype/bhrc_pheno_female_pheno_qt_forregenie_v04182025.txt"
female_inco="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/02bhrc/00databases/00phenotype/bhrc_pheno_female_covar_forregenie_v04182025.txt"
# Set phenotype and covariates files for males
male_inph_bt="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/02bhrc/00databases/00phenotype/bhrc_pheno_male_pheno_bt_forregenie_v04182025.txt"
male_inph_qt="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/02bhrc/00databases/00phenotype/bhrc_pheno_male_pheno_qt_forregenie_v04182025.txt"
male_inco="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/02bhrc/00databases/00phenotype/bhrc_pheno_male_covar_forregenie_v04182025.txt"
# Set output for step 1
out_step1_qt_female="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/02bhrc/01results/00step1/out_step1_qt_female_04182025"
out_step1_bt_female="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/02bhrc/01results/00step1/out_step1_bt_female_04182025"
out_step1_qt_male="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/02bhrc/01results/00step1/out_step1_qt_male_04182025"
out_step1_bt_male="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/02bhrc/01results/00step1/out_step1_bt_female_04182025"
# Set output for step 2
out_step2_qt_female="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/02bhrc/01results/01step2/out_step2_qt_female_04182025"
out_step2_bt_female="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/02bhrc/01results/01step2/out_step2_bt_female_04182025"
out_step2_qt_male="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/02bhrc/01results/01step2/out_step2_qt_female_04182025"
out_step2_bt_male="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/02bhrc/01results/01step2/out_step2_bt_female_04182025"
## Add zeros to the FID if you get errors. You could use the following code in bash
# awk 'BEGIN {OFS="\t"} NR==1 {print "FID", $0; next} {print "0", $0}' "${female_inph_qt}" > tmpfile && mv tmpfile "${female_inph_qt}"

## !!! Warning, we will use HapMap SNPs for Step 1!!!
# Hapmap SNPs path
hapmap="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/11references/w_hm3_hg38.snplist"

## Running Step1
# Running for females for quantitative traits
regenie\
    --step 1\
    --bed ${inge}\
    --covarFile ${female_inco}\
    --phenoFile ${female_inph_qt}\
    --bsize 10000\
    --qt\
    --extract ${hapmap}\
    --force-step1\
    --out ${out_step1_qt_female}
# Running for females for binary traits
regenie\
    --step 1\
    --bed ${inge}\
    --covarFile ${female_inco}\
    --phenoFile ${female_inph_bt}\
    --bsize 10000\
    --iid-only\
    --bt\
    --extract ${hapmap}\
    --force-step1\
    --out ${out_step1_bt_female}
# Running for males for quantitative traits
regenie\
    --step 1\
    --bed ${inge}\
    --covarFile ${male_inco}\
    --phenoFile ${male_inph_qt}\
    --bsize 10000\
    --iid-only\
    --qt\
    --extract ${hapmap}\
    --force-step1\
    --out ${out_step1_qt_male}
# Running for males for binary traits
regenie\
    --step 1\
    --bed ${inge}\
    --covarFile ${male_inco}\
    --phenoFile ${male_inph_bt}\
    --bsize 10000\
    --iid-only\
    --bt\
    --extract ${hapmap}\
    --force-step1\
    --out ${out_step1_bt_male}

In [None]:
#### WARNING: using 1M SNPs takes a lot of time, reducing the number of SNPs to 250K
# Hapmap SNPs path
hapmap="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/11references/w_hm3_hg38.snplist"
shuf -n250000 ${hapmap} > w_hm3_hg38_random250K.snplist

#### WARNING: running with 500K SNPs

### Step 1
This was the script used for the step 1 of regenie

In [None]:
#!/bin/bash
#SBATCH --job-name=step1_bhrc
#SBATCH --out="slurm-%j.out"
#SBATCH --time=6-23:00:00
#SBATCH --nodes=1 --ntasks=1
#SBATCH --mem-per-cpu=128G
#SBATCH --mail-type=ALL
#SBATCH --partition=week
####################################################################################
# Script for GWAS of smoking traits in BHRC
# day: 18 April 2025
# analyzer: Jose Jaime Martinez-Magana
# cluster: Grace - HPC Yale
####################################################################################
## Load modules to cluster
# Load Regenie for GWAS analysis
module load miniconda;conda activate regenie_env_v01202024

## Set parameters
# Set genotype data
inge="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/02bhrc/00databases/01genotypes/01forgwas/bhrc_merged"
# Set phenotype and covariates files for females
female_inph_bt="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/02bhrc/00databases/00phenotype/bhrc_pheno_female_pheno_bt_forregenie_v04182025.txt"
female_inph_qt="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/02bhrc/00databases/00phenotype/bhrc_pheno_female_pheno_qt_forregenie_v04182025.txt"
female_inco="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/02bhrc/00databases/00phenotype/bhrc_pheno_female_covar_forregenie_v04182025.txt"
# Set phenotype and covariates files for males
male_inph_bt="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/02bhrc/00databases/00phenotype/bhrc_pheno_male_pheno_bt_forregenie_v04182025.txt"
male_inph_qt="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/02bhrc/00databases/00phenotype/bhrc_pheno_male_pheno_qt_forregenie_v04182025.txt"
male_inco="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/02bhrc/00databases/00phenotype/bhrc_pheno_male_covar_forregenie_v04182025.txt"
# Set output for step1
out_step1_qt_female="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/02bhrc/01results/00step1/out_step1_qt_female_04182025"
out_step1_bt_female="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/02bhrc/01results/00step1/out_step1_bt_female_04182025"
out_step1_qt_male="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/02bhrc/01results/00step1/out_step1_qt_male_04182025"
out_step1_bt_male="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/02bhrc/01results/00step1/out_step1_bt_male_04182025"
# Set output for step2
out_step2_qt_female="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/02bhrc/01results/01step2/out_step2_qt_female_04182025"
out_step2_bt_female="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/02bhrc/01results/01step2/out_step2_bt_female_04182025"
out_step2_qt_male="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/02bhrc/01results/01step2/out_step2_qt_male_04182025"
out_step2_bt_male="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/02bhrc/01results/01step2/out_step2_bt_male_04182025"
## Add zeros to the FID if you get errors, you could use the following code in bash
# awk 'BEGIN {OFS="\t"} NR==1 {print "FID", $0; next} {print "0", $0}' "${female_inph_qt}" > tmpfile && mv tmpfile "${female_inph_qt}"

## !!! Warning, we will used HapMap SNPs for Step 1!!!
# Hapmap SNPs path
hapmap="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/11references/w_hm3_hg38_random250K.snplist"

## Running Step1
# Running for females for quantitative traits
regenie\
    --step 1\
    --bed ${inge}\
    --covarFile ${female_inco}\
    --phenoFile ${female_inph_qt}\
    --bsize 400\
    --qt\
    --extract ${hapmap}\
    --force-step1\
    --out ${out_step1_qt_female}
# Running for females for binary traits
regenie\
    --step 1\
    --bed ${inge}\
    --covarFile ${female_inco}\
    --phenoFile ${female_inph_bt}\
    --bsize 400\
    --iid-only\
    --bt\
    --extract ${hapmap}\
    --force-step1\
    --out ${out_step1_bt_female}
# Running for males for quantitative traits
regenie\
    --step 1\
    --bed ${inge}\
    --covarFile ${male_inco}\
    --phenoFile ${male_inph_qt}\
    --bsize 400\
    --iid-only\
    --qt\
    --extract ${hapmap}\
    --force-step1\
    --out ${out_step1_qt_male}
# Running for males for binary traits
regenie\
    --step 1\
    --bed ${inge}\
    --covarFile ${male_inco}\
    --phenoFile ${male_inph_bt}\
    --bsize 400\
    --iid-only\
    --bt\
    --extract ${hapmap}\
    --force-step1\
    --out ${out_step1_bt_male}

## Step 2

In [None]:
#!/bin/bash
#SBATCH --job-name=step2_bhrc
#SBATCH --out="slurm-%j.out"
#SBATCH --time=6-23:00:00
#SBATCH --nodes=1 --ntasks=1
#SBATCH --mem-per-cpu=128G
#SBATCH --mail-type=ALL
#SBATCH --partition=week
####################################################################################
# Script for GWAS of smoking traits in BHRC
# day: 18 April 2025
# analyzer: Jose Jaime Martinez-Magana
# cluster: Grace - HPC Yale
####################################################################################
## Load modules to cluster
# Load Regenie for GWAS analysis
module load miniconda;conda activate regenie_env_v01202024

## Set parameters
# Set genotype data
inge="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/02bhrc/00databases/01genotypes/01forgwas/bhrc_merged"
# Set phenotype and covariates files for females
female_inph_bt="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/02bhrc/00databases/00phenotype/bhrc_pheno_female_pheno_bt_forregenie_v04182025.txt"
female_inph_qt="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/02bhrc/00databases/00phenotype/bhrc_pheno_female_pheno_qt_forregenie_v04182025.txt"
female_inco="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/02bhrc/00databases/00phenotype/bhrc_pheno_female_covar_forregenie_v04182025.txt"
# Set phenotype and covariates files for males
male_inph_bt="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/02bhrc/00databases/00phenotype/bhrc_pheno_male_pheno_bt_forregenie_v04182025.txt"
male_inph_qt="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/02bhrc/00databases/00phenotype/bhrc_pheno_male_pheno_qt_forregenie_v04182025.txt"
male_inco="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/02bhrc/00databases/00phenotype/bhrc_pheno_male_covar_forregenie_v04182025.txt"
# Set output for step1
out_step1_qt_female="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/02bhrc/01results/00step1/out_step1_qt_female_04182025"
out_step1_bt_female="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/02bhrc/01results/00step1/out_step1_bt_female_04182025"
out_step1_qt_male="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/02bhrc/01results/00step1/out_step1_qt_male_04182025"
out_step1_bt_male="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/02bhrc/01results/00step1/out_step1_bt_male_04182025"
# Set output for step2
out_step2_qt_female="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/02bhrc/01results/01step2/out_step2_qt_female_04182025"
out_step2_bt_female="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/02bhrc/01results/01step2/out_step2_bt_female_04182025"
out_step2_qt_male="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/02bhrc/01results/01step2/out_step2_qt_male_04182025"
out_step2_bt_male="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/02bhrc/01results/01step2/out_step2_bt_male_04182025"

# Running GWAS for females 
regenie \
  --step 2 \
  --bgen ${inge}.bgen \
  --ref-first \
  --sample ${inge}.sample \
  --phenoFile ${female_inph_qt} \
  --covarFile ${female_inco} \
  --qt \
  --pred ${out_step1_qt_female}_pred.list \
  --bsize 400 \
  --out ${out_step2_qt_female}

regenie \
  --step 2 \
  --bgen ${inge}.bgen \
  --ref-first \
  --sample ${inge}.sample \
  --phenoFile ${female_inph_bt} \
  --covarFile ${female_inco} \
  --bt \
  --firth --approx --pThresh 0.01 \
  --pred ${out_step1_qt_female}_pred.list \
  --bsize 400 \
  --out ${out_step2_qt_female}

# Running GWAS for males 
regenie \
  --step 2 \
  --bgen ${inge}.bgen \
  --ref-first \
  --sample ${inge}.sample \
  --phenoFile ${male_inph_qt} \
  --covarFile ${male_inco} \
  --qt \
  --pred ${out_step1_qt_male}_pred.list \
  --bsize 400 \
  --out ${out_step2_qt_male}

regenie \
  --step 2 \
  --bgen ${inge}.bgen \
  --ref-first \
  --sample ${inge}.sample \
  --phenoFile ${male_inph_bt} \
  --covarFile ${male_inco} \
  --bt \
  --firth --approx --pThresh 0.01 \
  --pred ${out_step1_qt_male}_pred.list \
  --bsize 400 \
  --out ${out_step2_qt_male}

# Code for MXGDARfz1

In [None]:
#!/bin/bash
#SBATCH --job-name=step1_mxgdar1
#SBATCH --out="slurm-%j.out"
#SBATCH --time=6-23:00:00
#SBATCH --nodes=1 --ntasks=1
#SBATCH --mem-per-cpu=128G
#SBATCH --mail-type=ALL
#SBATCH --partition=week
####################################################################################
# Script for GWAS of smoking traits in MxGDAR-fz1
# day: 18 April 2025
# analyzer: Jose Jaime Martinez-Magana
# cluster: Grace - HPC Yale
####################################################################################
## Load modules to cluster
# Load Regenie for GWAS analysis
module load miniconda;conda activate regenie_env_v01202024

## Set parameters
# Set genotype data
inge="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/00mxgdarfz1/00databases/01genotypes/01forgwas/mxgdar1_merged"
# Set phenotype and covariates files for females
female_inph_bt="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/00mxgdarfz1/00databases/00phenotype/mxgdarfz1_pheno_female_pheno_bt_forregenie_v04202025.txt"
female_inph_qt="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/00mxgdarfz1/00databases/00phenotype/mxgdarfz1_pheno_female_pheno_qt_forregenie_v04202025.txt"
female_inco="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/00mxgdarfz1/00databases/00phenotype/mxgdarfz1_pheno_female_covar_forregenie_v04202025.txt"
# Set phenotype and covariates files for males
male_inph_bt="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/00mxgdarfz1/00databases/00phenotype/mxgdarfz1_pheno_male_pheno_bt_forregenie_v04202025.txt"
male_inph_qt="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/00mxgdarfz1/00databases/00phenotype/mxgdarfz1_pheno_male_pheno_qt_forregenie_v04202025.txt"
male_inco="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/00mxgdarfz1/00databases/00phenotype/mxgdarfz1_pheno_male_covar_forregenie_v04202025.txt"
# Set output for step1
out_step1_qt_female="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/00mxgdarfz1/01results/00step1/out_step1_qt_female_04202025"
out_step1_bt_female="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/00mxgdarfz1/01results/00step1/out_step1_bt_female_04202025"
out_step1_qt_male="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/00mxgdarfz1/01results/00step1/out_step1_qt_male_04202025"
out_step1_bt_male="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/00mxgdarfz1/01results/00step1/out_step1_bt_male_04202025"
# Set output for step2
out_step2_qt_female="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/00mxgdarfz1/01results/01step2/out_step2_qt_female_04202025"
out_step2_bt_female="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/00mxgdarfz1/01results/01step2/out_step2_bt_female_04202025"
out_step2_qt_male="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/00mxgdarfz1/01results/01step2/out_step2_qt_male_04202025"
out_step2_bt_male="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/00mxgdarfz1/01results/01step2/out_step2_bt_male_04202025"
## Add zeros to the FID if you get errors, you could use the following code in bash
# awk 'BEGIN {OFS="\t"} NR==1 {print "FID", $0; next} {print "0", $0}' "${female_inph_qt}" > tmpfile && mv tmpfile "${female_inph_qt}"

## !!! Warning, we will used HapMap SNPs for Step 1!!!
# Hapmap SNPs path
hapmap="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/11references/w_hm3_hg38_random250K.snplist"

## Running Step1
# Running for females for quantitative traits
regenie\
    --step 1\
    --bed ${inge}\
    --covarFile ${female_inco}\
    --phenoFile ${female_inph_qt}\
    --bsize 400\
    --qt\
    --extract ${hapmap}\
    --force-step1\
    --out ${out_step1_qt_female}
# Running for females for binary traits
regenie\
    --step 1\
    --bed ${inge}\
    --covarFile ${female_inco}\
    --phenoFile ${female_inph_bt}\
    --bsize 400\
    --iid-only\
    --bt\
    --extract ${hapmap}\
    --force-step1\
    --out ${out_step1_bt_female}
# Running for males for quantitative traits
regenie\
    --step 1\
    --bed ${inge}\
    --covarFile ${male_inco}\
    --phenoFile ${male_inph_qt}\
    --bsize 400\
    --iid-only\
    --qt\
    --extract ${hapmap}\
    --force-step1\
    --out ${out_step1_qt_male}
# Running for males for binary traits
regenie\
    --step 1\
    --bed ${inge}\
    --covarFile ${male_inco}\
    --phenoFile ${male_inph_bt}\
    --bsize 400\
    --iid-only\
    --bt\
    --extract ${hapmap}\
    --force-step1\
    --out ${out_step1_bt_male}

In [None]:
#!/bin/bash
#SBATCH --job-name=step2_mxgdar1
#SBATCH --out="slurm-%j.out"
#SBATCH --time=6-23:00:00
#SBATCH --nodes=1 --ntasks=1
#SBATCH --mem-per-cpu=128G
#SBATCH --mail-type=ALL
#SBATCH --partition=week
####################################################################################
# Script for GWAS of smoking traits in MxGDAR-fz1
# day: 18 April 2025
# analyzer: Jose Jaime Martinez-Magana
# cluster: Grace - HPC Yale
####################################################################################
## Load modules to cluster
# Load Regenie for GWAS analysis
module load miniconda;conda activate regenie_env_v01202024

## Set parameters
# Set genotype data
inge="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/00mxgdarfz1/00databases/01genotypes/01forgwas/mxgdar1_merged"
# Set phenotype and covariates files for females
female_inph_bt="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/00mxgdarfz1/00databases/00phenotype/mxgdarfz1_pheno_female_pheno_bt_forregenie_v04202025.txt"
female_inph_qt="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/00mxgdarfz1/00databases/00phenotype/mxgdarfz1_pheno_female_pheno_qt_forregenie_v04202025.txt"
female_inco="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/00mxgdarfz1/00databases/00phenotype/mxgdarfz1_pheno_female_covar_forregenie_v04202025.txt"
# Set phenotype and covariates files for males
male_inph_bt="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/00mxgdarfz1/00databases/00phenotype/mxgdarfz1_pheno_male_pheno_bt_forregenie_v04202025.txt"
male_inph_qt="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/00mxgdarfz1/00databases/00phenotype/mxgdarfz1_pheno_male_pheno_qt_forregenie_v04202025.txt"
male_inco="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/00mxgdarfz1/00databases/00phenotype/mxgdarfz1_pheno_male_covar_forregenie_v04202025.txt"
# Set output for step1
out_step1_qt_female="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/00mxgdarfz1/01results/00step1/out_step1_qt_female_04202025"
out_step1_bt_female="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/00mxgdarfz1/01results/00step1/out_step1_bt_female_04202025"
out_step1_qt_male="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/00mxgdarfz1/01results/00step1/out_step1_qt_male_04202025"
out_step1_bt_male="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/00mxgdarfz1/01results/00step1/out_step1_bt_male_04202025"
# Set output for step2
out_step2_qt_female="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/00mxgdarfz1/01results/01step2/out_step2_qt_female_04202025"
out_step2_bt_female="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/00mxgdarfz1/01results/01step2/out_step2_bt_female_04202025"
out_step2_qt_male="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/00mxgdarfz1/01results/01step2/out_step2_qt_male_04202025"
out_step2_bt_male="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/00mxgdarfz1/01results/01step2/out_step2_bt_male_04202025"

# Running GWAS for females 
regenie \
  --step 2 \
  --bgen ${inge}.bgen \
  --ref-first \
  --sample ${inge}.sample \
  --phenoFile ${female_inph_qt} \
  --covarFile ${female_inco} \
  --qt \
  --pred ${out_step1_qt_female}_pred.list \
  --bsize 400 \
  --out ${out_step2_qt_female}

regenie \
  --step 2 \
  --bgen ${inge}.bgen \
  --ref-first \
  --sample ${inge}.sample \
  --phenoFile ${female_inph_bt} \
  --covarFile ${female_inco} \
  --bt \
  --firth --approx --pThresh 0.01 \
  --pred ${out_step1_bt_female}_pred.list \
  --bsize 400 \
  --out ${out_step2_bt_female}

# Running GWAS for males 
regenie \
  --step 2 \
  --bgen ${inge}.bgen \
  --ref-first \
  --sample ${inge}.sample \
  --phenoFile ${male_inph_qt} \
  --covarFile ${male_inco} \
  --qt \
  --pred ${out_step1_qt_male}_pred.list \
  --bsize 400 \
  --out ${out_step2_qt_male}

regenie \
  --step 2 \
  --bgen ${inge}.bgen \
  --ref-first \
  --sample ${inge}.sample \
  --phenoFile ${male_inph_bt} \
  --covarFile ${male_inco} \
  --bt \
  --firth --approx --pThresh 0.01 \
  --pred ${out_step1_bt_male}_pred.list \
  --bsize 400 \
  --out ${out_step2_bt_male}

# MxGDARfz2

In [None]:
#!/bin/bash
#SBATCH --job-name=step1_mxgdar2
#SBATCH --out="slurm-%j.out"
#SBATCH --time=6-23:00:00
#SBATCH --nodes=1 --ntasks=1
#SBATCH --mem-per-cpu=128G
#SBATCH --mail-type=ALL
#SBATCH --partition=week
####################################################################################
# Script for GWAS of smoking traits in MxGDAR-fz2
# day: 18 April 2025
# analyzer: Jose Jaime Martinez-Magana
# cluster: Grace - HPC Yale
####################################################################################
## Load modules to cluster
# Load Regenie for GWAS analysis
module load miniconda;conda activate regenie_env_v01202024

## Set parameters
# Set genotype data
inge="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/01mxgdarfz2/00databases/01genotypes/01forgwas/mxgdar2_merged"
# Set phenotype and covariates files for females
female_inph_bt="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/01mxgdarfz2/00databases/00phenotype/mxgdarfz2_pheno_female_pheno_bt_forregenie_v04202025.txt"
female_inph_qt="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/01mxgdarfz2/00databases/00phenotype/mxgdarfz2_pheno_female_pheno_qt_forregenie_v04202025.txt"
female_inco="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/01mxgdarfz2/00databases/00phenotype/mxgdarfz2_pheno_female_covar_forregenie_v04202025.txt"
# Set phenotype and covariates files for males
male_inph_bt="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/01mxgdarfz2/00databases/00phenotype/mxgdarfz2_pheno_male_pheno_bt_forregenie_v04202025.txt"
male_inph_qt="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/01mxgdarfz2/00databases/00phenotype/mxgdarfz2_pheno_male_pheno_qt_forregenie_v04202025.txt"
male_inco="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/01mxgdarfz2/00databases/00phenotype/mxgdarfz2_pheno_male_covar_forregenie_v04202025.txt"
# Set output for step1
out_step1_qt_female="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/01mxgdarfz2/01results/00step1/out_step1_qt_female_04202025"
out_step1_bt_female="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/01mxgdarfz2/01results/00step1/out_step1_bt_female_04202025"
out_step1_qt_male="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/01mxgdarfz2/01results/00step1/out_step1_qt_male_04202025"
out_step1_bt_male="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/01mxgdarfz2/01results/00step1/out_step1_bt_male_04202025"
# Set output for step2
out_step2_qt_female="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/01mxgdarfz2/01results/01step2/out_step2_qt_female_04202025"
out_step2_bt_female="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/01mxgdarfz2/01results/01step2/out_step2_bt_female_04202025"
out_step2_qt_male="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/01mxgdarfz2/01results/01step2/out_step2_qt_male_04202025"
out_step2_bt_male="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/01mxgdarfz2/01results/01step2/out_step2_bt_male_04202025"
## Add zeros to the FID if you get errors. You could use the following code in bash
# awk 'BEGIN {OFS="\t"} NR==1 {print "FID", $0; next} {print "0", $0}' "${female_inph_qt}" > tmpfile && mv tmpfile "${female_inph_qt}"

## !!! Warning, we will used HapMap SNPs for Step 1!!!
# Hapmap SNPs path
hapmap="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/11references/w_hm3_hg38_random250K.snplist"

## Running Step1
# Running for females for quantitative traits
regenie\
    --step 1\
    --bed ${inge}\
    --covarFile ${female_inco}\
    --phenoFile ${female_inph_qt}\
    --bsize 400\
    --qt\
    --extract ${hapmap}\
    --force-step1\
    --out ${out_step1_qt_female}
# Running for females for binary traits
regenie\
    --step 1\
    --bed ${inge}\
    --covarFile ${female_inco}\
    --phenoFile ${female_inph_bt}\
    --bsize 400\
    --iid-only\
    --bt\
    --extract ${hapmap}\
    --force-step1\
    --out ${out_step1_bt_female}
# Running for males for quantitative traits
regenie\
    --step 1\
    --bed ${inge}\
    --covarFile ${male_inco}\
    --phenoFile ${male_inph_qt}\
    --bsize 400\
    --iid-only\
    --qt\
    --extract ${hapmap}\
    --force-step1\
    --out ${out_step1_qt_male}
# Running for males for binary traits
regenie\
    --step 1\
    --bed ${inge}\
    --covarFile ${male_inco}\
    --phenoFile ${male_inph_bt}\
    --bsize 400\
    --iid-only\
    --bt\
    --extract ${hapmap}\
    --force-step1\
    --out ${out_step1_bt_male}

In [None]:
#!/bin/bash
#SBATCH --job-name=step2_mxgdar2
#SBATCH --out="slurm-%j.out"
#SBATCH --time=6-23:00:00
#SBATCH --nodes=1 --ntasks=1
#SBATCH --mem-per-cpu=32G
#SBATCH --mail-type=ALL
#SBATCH --partition=week
####################################################################################
# Script for GWAS of smoking traits in MxGDAR-fz2
# day: 18 April 2025
# analyzer: Jose Jaime Martinez-Magana
# cluster: Grace - HPC Yale
####################################################################################
## Load modules to cluster
# Load Regenie for GWAS analysis
module load miniconda;conda activate regenie_env_v01202024

## Set parameters
# Set genotype data
inge="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/01mxgdarfz2/00databases/01genotypes/01forgwas/mxgdar2_merged"
# Set phenotype and covariates files for females
female_inph_bt="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/01mxgdarfz2/00databases/00phenotype/mxgdarfz2_pheno_female_pheno_bt_forregenie_v04202025.txt"
female_inph_qt="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/01mxgdarfz2/00databases/00phenotype/mxgdarfz2_pheno_female_pheno_qt_forregenie_v04202025.txt"
female_inco="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/01mxgdarfz2/00databases/00phenotype/mxgdarfz2_pheno_female_covar_forregenie_v04202025.txt"
# Set phenotype and covariates files for males
male_inph_bt="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/01mxgdarfz2/00databases/00phenotype/mxgdarfz2_pheno_male_pheno_bt_forregenie_v04202025.txt"
male_inph_qt="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/01mxgdarfz2/00databases/00phenotype/mxgdarfz2_pheno_male_pheno_qt_forregenie_v04202025.txt"
male_inco="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/01mxgdarfz2/00databases/00phenotype/mxgdarfz2_pheno_male_covar_forregenie_v04202025.txt"
# Set output for step1
out_step1_qt_female="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/01mxgdarfz2/01results/00step1/out_step1_qt_female_04202025"
out_step1_bt_female="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/01mxgdarfz2/01results/00step1/out_step1_bt_female_04202025"
out_step1_qt_male="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/01mxgdarfz2/01results/00step1/out_step1_qt_male_04202025"
out_step1_bt_male="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/01mxgdarfz2/01results/00step1/out_step1_bt_male_04202025"
# Set output for step2
out_step2_qt_female="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/01mxgdarfz2/01results/01step2/out_step2_qt_female_04202025"
out_step2_bt_female="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/01mxgdarfz2/01results/01step2/out_step2_bt_female_04202025"
out_step2_qt_male="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/01mxgdarfz2/01results/01step2/out_step2_qt_male_04202025"
out_step2_bt_male="/vast/palmer/scratch/montalvo-ortiz/jjm262/02lagc_smoking_gwas/01mxgdarfz2/01results/01step2/out_step2_bt_male_04202025"

# Running GWAS for females 
regenie \
  --step 2 \
  --bgen ${inge}.bgen \
  --ref-first \
  --sample ${inge}.sample \
  --phenoFile ${female_inph_qt} \
  --covarFile ${female_inco} \
  --qt \
  --pred ${out_step1_qt_female}_pred.list \
  --bsize 400 \
  --out ${out_step2_qt_female}

regenie \
  --step 2 \
  --bgen ${inge}.bgen \
  --ref-first \
  --sample ${inge}.sample \
  --phenoFile ${female_inph_bt} \
  --covarFile ${female_inco} \
  --bt \
  --firth --approx --pThresh 0.01 \
  --pred ${out_step1_bt_female}_pred.list \
  --bsize 400 \
  --out ${out_step2_bt_female}

# Running GWAS for males 
regenie \
  --step 2 \
  --bgen ${inge}.bgen \
  --ref-first \
  --sample ${inge}.sample \
  --phenoFile ${male_inph_qt} \
  --covarFile ${male_inco} \
  --qt \
  --pred ${out_step1_qt_male}_pred.list \
  --bsize 400 \
  --out ${out_step2_qt_male}

regenie \
  --step 2 \
  --bgen ${inge}.bgen \
  --ref-first \
  --sample ${inge}.sample \
  --phenoFile ${male_inph_bt} \
  --covarFile ${male_inco} \
  --bt \
  --firth --approx --pThresh 0.01 \
  --pred ${out_step1_bt_male}_pred.list \
  --bsize 400 \
  --out ${out_step2_bt_male}