In [None]:
import os
import numpy as np
import pandas as pd
import pandas_profiling
import plotnine
from plotnine import *  # Provides a ggplot-like interface to matplotlib.
from IPython.display import display



In [None]:
bucket = os.getenv("WORKSPACE_BUCKET")
bucket

# PRS-CSx for AoU GWAS

In [None]:
%%writefile /home/jupyter/panukbb/scripts/run_prs_forPRS-CSx_aou.sh

set -o errexit
set -o nounset

PLINK_BFILE="${PLINK_FILES%.*}"
echo ${PLINK_BFILE}

unzip "${PLINK2_SOFT}"

       
    ./plink2 --bfile "${PLINK_BFILE}" \
        --score "${SCOREFILE}" 2 4 6 cols=+scoresums \
        --out "${OUT_DIR}"/"${PHENO}"_chr"${CHROM}"

In [None]:
%%bash

cd /home/jupyter/panukbb/v2/prscsx/prscsx_aou_meta/
ls *chr22.txt > pheno.list
sed 's/_META_pst_eff_a1_b0.5_phiauto_chr22.txt//g' pheno.list > pheno.list2


# Header
echo -e "--env CHROM\t--env PHENO\t--input SCOREFILE\t--input PLINK_FILES" > /home/jupyter/panukbb/scripts/tasks_prscsx_prs_aou.tsv

# Nested loop for combinations
    for chrom in {1..22}; do
        while read pheno;do
        echo -e "${chrom}\t${pheno}\t${WORKSPACE_BUCKET}/panukbb/prs_csx/aou/${pheno}_META_pst_eff_a1_b0.5_phiauto_chr${chrom}.txt\t${WORKSPACE_BUCKET}/panukbb/data/aou_v7_testPops_chr${chrom}.*" >> /home/jupyter/panukbb/scripts/tasks_prscsx_prs_aou.tsv
        done < /home/jupyter/panukbb/v2/prscsx/prscsx_aou_meta/pheno.list2
    done

In [None]:
%%bash --out TEST_JOB_ID

source ~/aou_dsub.bash


aou_dsub \
  --image gcr.io/ukbb-diversepops-neale/yw-prs-r:test \
  --input PLINK2_SOFT="${WORKSPACE_BUCKET}/software/plink2.zip" \
  --output-recursive OUT_DIR="${WORKSPACE_BUCKET}/panukbb/profiles/prs_csx/aou/chrs/" \
  --min-cores 1 \
  --min-ram 10 \
  --disk-size 10 \
  --boot-disk-size 10 \
  --name "prs_prscsx_1217" \
  --label "batch=prs_prscsx_1217" \
  --tasks  '/home/jupyter/panukbb/scripts/tasks_prscsx_prs_aou.tsv' \
  --script '/home/jupyter/panukbb/scripts/run_prs_forPRS-CSx_aou.sh'

In [None]:
%%writefile /home/jupyter/panukbb/scripts/merge_scoreFiles_PRSCSx_aou.R
#!/usr/bin/env Rscript

library(data.table)

setDTthreads(1)


scores1 <- c("IID", "SCORE1_SUM")

#print(paste0("PRS_DIRs is ", Sys.getenv("PRS_DIRs")))
#print(paste0("OUT_DIR is ", Sys.getenv("OUT_DIR")))
#print(paste0("Phen is ", Sys.getenv("PHENO")))

prsdir2 <- dirname(Sys.getenv("PRS_DIRs"))#prs-cs-meta
phe <- Sys.getenv("PHENO")
outdir <- Sys.getenv("OUT_DIR")

listDf1 <- list()


    
    for(chrom in 1:22){    
         
        df1 <- fread(paste0(prsdir2, "/", phe, "_chr", chrom, ".sscore"),  stringsAsFactors = F)[,..scores1] 
    
        names(df1)[2] <- paste0("chr", chrom)
        listDf1[[chrom]] <- df1
        rm(df1)
    }
    
    dfMerged <- Reduce(function(...) merge(..., by = c("IID")), listDf1)
    dfMerged[,SCORE1_SUM := rowSums(as.matrix(dfMerged[,2:ncol(dfMerged)]))]
    prs <- dfMerged[,c("IID", "SCORE1_SUM")]
    
    fwrite(prs, file = paste0(outdir, "/", phe, "_PRS-CSx.sscore"), col.names = T, row.names = F, quote = F, sep = "\t")


In [None]:
%%bash --out TEST3_JOB_ID

source ~/aou_dsub.bash

while read pheno;do
#if gsutil -q stat ${WORKSPACE_BUCKET}/panukbb/profiles/combined/${pheno}_S10_PT_EUR.sscore
#then
#echo "exist"
#else
aou_dsub \
  --image gcr.io/ukbb-diversepops-neale/yw-prs-r:test \
  --min-cores 1 \
  --min-ram 10 \
  --disk-size 10 \
  --boot-disk-size 10 \
  --name "prscs_merge" \
  --preemptible \
  --input PRS_DIRs="${WORKSPACE_BUCKET}/panukbb/profiles/prs_csx/aou/chrs/${pheno}_*.sscore" \
  --output-recursive OUT_DIR="${WORKSPACE_BUCKET}/panukbb/profiles/prs_csx/aou/combined/" \
  --env PHENO=${pheno} \
  --label "batch=prscsx_merge_1217" \
  --script '/home/jupyter/panukbb/scripts/merge_scoreFiles_PRSCSx_aou.R'
#fi
done < /home/jupyter/panukbb/v2/prscsx/prscsx_aou_meta/pheno.list2