In [None]:
import os
import numpy as np
import pandas as pd
#import pandas_profiling
#import plotnine
#from plotnine import *  # Provides a ggplot-like interface to matplotlib.
#from IPython.display import display


In [None]:
!pip install --upgrade dsub


In [None]:
bucket = os.getenv("WORKSPACE_BUCKET")
bucket

In [None]:
%%writefile ~/aou_dsub.bash

#!/bin/bash

function aou_dsub () {

  # Get a shorter username to leave more characters for the job name.
  local DSUB_USER_NAME="$(echo "${OWNER_EMAIL}" | cut -d@ -f1)"

  # For AoU RWB projects network name is "network".
  local AOU_NETWORK=network
  local AOU_SUBNETWORK=subnetwork

  dsub \
      --provider google-cls-v2 \
      --user-project "${GOOGLE_PROJECT}"\
      --project "${GOOGLE_PROJECT}"\
      --network "${AOU_NETWORK}" \
      --subnetwork "${AOU_SUBNETWORK}" \
      --service-account "$(gcloud config get-value account)" \
      --user "${DSUB_USER_NAME}" \
      --regions us-central1 \
      --boot-disk-size 55 \
      --logging "${WORKSPACE_BUCKET}/dsub/logs/{job-name}/{user-id}/$(date +'%Y%m%d/%H%M%S')/{job-id}-{task-id}-{task-attempt}.log" \
      "$@"
}

In [None]:
%%bash
chmod +x ~/aou_dsub.bash
echo source ~/aou_dsub.bash >> ~/.bashrc

In [None]:
USER_NAME = os.getenv('OWNER_EMAIL').split('@')[0].replace('.','-')

# Save this Python variable as an environment variable so that its easier to use within %%bash cells.
%env USER_NAME={USER_NAME}

# run PRS for PRS-CS for AoU Pops

In [None]:
!gsutil cp run_prs_forPRS-CS_aou.sh {bucket}/panukbb/individualPRS
        
        

In [None]:
!gsutil -m cp -r /home/jupyter/panukbb/v2/prscs_aou/results_231201/ ${WORKSPACE_BUCKET}/panukbb/prs_cs/aou/results_231201/

In [None]:
%%writefile /home/jupyter/run_prs_forPRS-CS_aou.sh

set -o errexit
set -o nounset

PLINK_BFILE="${PLINK_FILES%.*}"
echo ${PLINK_BFILE}

unzip "${PLINK2_SOFT}"

       
    ./plink2 --bfile "${PLINK_BFILE}" \
        --score "${SCOREFILE}" 2 4 "${POSTERIOR}" cols=+scoresums --center\
        --out "${OUT_DIR}"/"${PHENO}"_chr"${CHROM}"_posterior"${POSTERIOR}"
    


In [None]:
files = {'--env CHROM':[],'--env PHENO':[],'--input SCOREFILE':[],'--input PLINK_FILES':[],'--env POSTERIOR':[]}

phenos = []
with open('phenos.list','rt') as inpu:
    for i in inpu:
        if i =="\n":
            continue
        phenos.append(i.replace('\n',''))
            

for pheno in phenos:
    for c in range(1,23):
        for pos in range(100):
            files['--env CHROM'].append(c)
            files['--env PHENO'].append(pheno)
            files['--input SCOREFILE'].append(f"{bucket}/panukbb/individualPRS/PRS/chr{c}/AoU_meta/{pheno}_chr{c}.txt")
            files['--input PLINK_FILES'].append(f'{bucket}/panukbb/data/aou_v7_testPops_chr{c}.*')
            files['--env POSTERIOR'].append(pos+6)

files = pd.DataFrame(files)

PARAMETER_FILENAME = 'prs_posteriors.tsv'
TEST_FILENAME = 'prs_posteriors_test.tsv'
# Save this Python variable value an environment variable so that its easier to use within %%bash cells.
%env PARAMETER_FILENAME={PARAMETER_FILENAME}
%env TEST_FILENAME={TEST_FILENAME}

files.to_csv(PARAMETER_FILENAME, sep='\t', index=False)
!head -n 1 {PARAMETER_FILENAME} > {TEST_FILENAME}
!tail -n 1 {PARAMETER_FILENAME} >> {TEST_FILENAME}



In [None]:
!cat {TEST_FILENAME}

In [None]:
# Use hyphens, not whitespace since it will become part of the bucket path.
job = TEST_FILENAME.replace('.tsv','')

# Save this Python variable as an environment variable so that its easier to use within %%bash cells.
%env JOB_NAME={job}

In [None]:
!gsutil cat gs://fc-secure-06f42177-4b29-4956-88a8-88ede84cb2ab/panukbb/profiles/prs_cs/aou/chrs/continuous-30000-both_sexes-irnt_chr22_posterior105.log
    
    

In [None]:
%%bash --out prs_posterior

source ~/aou_dsub.bash

aou_dsub \
  --image "gcr.io/ukbb-diversepops-neale/yw-prs-r:test" \
  --input PLINK2_SOFT="${WORKSPACE_BUCKET}/software/plink2.zip" \
  --output-recursive OUT_DIR="${WORKSPACE_BUCKET}/individualPRS/indiv_result" \
  --name "${JOB_NAME}" \
  --disk-size 100 \
  --tasks "${TEST_FILENAME}" \
  --min-cores 4 \
  --min-ram 100 \
  --preemptible \
  --logging "${WORKSPACE_BUCKET}/panukbb/individualPRS/indiv_result/logging" \
  --script "/home/jupyter/run_prs_forPRS-CS_aou.sh"

In [None]:
!dstat --provider google-cls-v2 --project terra-vpc-sc-da94e041 --location us-central1 --jobs 'prs-poster--zhuozshi--240508-013347-30' --users 'zhuozshi' --status '*'



