In [None]:
import os
import numpy as np
import pandas as pd
import pandas_profiling
import plotnine
from plotnine import *  # Provides a ggplot-like interface to matplotlib.
from IPython.display import display


In [None]:
bucket = os.getenv("WORKSPACE_BUCKET")
bucket

# run PRS for PRS-CS for AoU Pops

In [None]:
%%writefile /home/jupyter/panukbb/scripts/run_prs_forPRS-CS_aou.sh

set -o errexit
set -o nounset

PLINK_BFILE="${PLINK_FILES%.*}"
echo ${PLINK_BFILE}

unzip "${PLINK2_SOFT}"

       
    ./plink2 --bfile "${PLINK_BFILE}" \
        --score "${SCOREFILE}" 2 4 6 cols=+scoresums \
        --out "${OUT_DIR}"/"${POP}"_"${PHENO}"_chr"${CHROM}"
    


In [None]:
%%bash

cd /home/jupyter/panukbb/v2/prscs_aou/results_231201
ls -d * > pheno.list

pops=("AoU_afr" "AoU_amr" "AoU_eur" "AoU_meta" "AoU_UKB_eur")

# Header
echo -e "--env POP\t--env CHROM\t--env PHENO\t--input SCOREFILE\t--input PLINK_FILES" > /home/jupyter/panukbb/scripts/tasks_prscs_prs_aou.tsv

# Nested loop for combinations
for pop in "${pops[@]}"; do
    for chrom in {1..22}; do
        while read pheno;do
        echo -e "${pop}\t${chrom}\t${pheno}\t${WORKSPACE_BUCKET}/panukbb/prs_cs/aou/results_231201/${pheno}/${pop}/tmp_pst_eff_a1_b0.5_phiauto_chr${chrom}.txt\t${WORKSPACE_BUCKET}/panukbb/data/aou_v7_testPops_chr${chrom}.*" >> /home/jupyter/panukbb/scripts/tasks_prscs_prs_aou.tsv
        done < /home/jupyter/panukbb/v2/prscs_aou/results_231201/pheno.list
    done
done

In [None]:
%%bash --out TEST_JOB_ID

source ~/aou_dsub.bash


aou_dsub \
  --image gcr.io/ukbb-diversepops-neale/yw-prs-r:test \
  --input PLINK2_SOFT="${WORKSPACE_BUCKET}/software/plink2.zip" \
  --output-recursive OUT_DIR="${WORKSPACE_BUCKET}/panukbb/profiles/prs_cs/aou/chrs/" \
  --min-cores 1 \
  --min-ram 10 \
  --disk-size 10 \
  --boot-disk-size 10 \
  --name "prs_prscs_1214" \
  --label "batch=prs_prscs_1214_v3" \
  --tasks  '/home/jupyter/panukbb/scripts/tasks_prscs_prs_aou.tsv' \
  --script '/home/jupyter/panukbb/scripts/run_prs_forPRS-CS_aou.sh'

In [None]:
%%writefile /home/jupyter/panukbb/scripts/merge_scoreFiles_PRSCS_aou.R
#!/usr/bin/env Rscript

library(data.table)

setDTthreads(1)


scores1 <- c("IID", "SCORE1_SUM")

#print(paste0("PRS_DIRs is ", Sys.getenv("PRS_DIRs")))
#print(paste0("OUT_DIR is ", Sys.getenv("OUT_DIR")))
#print(paste0("Phen is ", Sys.getenv("PHENO")))

prsdir2 <- dirname(Sys.getenv("PRS_DIRs"))#prs-cs-meta
phe <- Sys.getenv("PHENO")
outdir <- Sys.getenv("OUT_DIR")

listDf1 <- list()

pops <- c("AoU_afr", "AoU_amr", "AoU_eur", "AoU_meta", "AoU_UKB_eur")

for(pop in pops){
    
    for(chrom in 1:22){    
         
        df1 <- fread(paste0(prsdir2, "/", pop, "_", phe, "_chr", chrom, ".sscore"),  stringsAsFactors = F)[,..scores1] 
    
        names(df1)[2] <- paste0("chr", chrom)
        listDf1[[chrom]] <- df1
        rm(df1)
    }
    
    dfMerged <- Reduce(function(...) merge(..., by = c("IID")), listDf1)
    dfMerged[,SCORE1_SUM := rowSums(as.matrix(dfMerged[,2:ncol(dfMerged)]))]
    prs <- dfMerged[,c("IID", "SCORE1_SUM")]
    
    fwrite(prs, file = paste0(outdir, "/", pop, "_", phe, "_PRS-CS.sscore"), col.names = T, row.names = F, quote = F, sep = "\t")
}