In [None]:
import pandas as pd
import os

In [None]:
# Release of interest
rel = 10

# Create folders to mount
! mkdir release working

# Read Access only
! gcsfuse --dir-mode 555 --file-mode 444 --implicit-dirs gp2_release{rel} release

# Read/Write Access
! gcsfuse --dir-mode 777 --file-mode 777 --implicit-dirs gp2_working_eu working

In [None]:
wd = '/YOUR/WORKING/DIR'

### Download Imputation File

In [None]:
# Fill in post-imputation from email sent by TOPMed
label = 'AJ'
user = 'nicole'
job_id = 'job-########-######-###'
job_password = 'password'
sex = 'male'

if sex == 'male':
    region = 'non_PAR'
    job_name = f'x_chrom_{label}_{sex}_{region}'
    imputed_out_dir = f'{wd}/working/{user}/x_chrom_new/imputation/imputation_out_dir/{label}/{sex}/{region}'
elif sex == 'female':
    job_name = f'x_chrom_{label}_{sex}'
    region = ''
    imputed_out_dir = f'{wd}/working/{user}/x_chrom_new/imputation/imputation_out_dir/{label}/{sex}'

# Change directory to where you want to save files
os.chdir(f'{imputed_out_dir}')
! pwd

In [None]:
# This will download and unzip the files from TopMed -- it may take a while!
! imputationbot download {job_id} --password {job_password}

### Convert Imputation VCF to PLINK

In [None]:
# Create sex info file for post-imputation IDs using release-wide sex info file
sex_check = pd.read_csv(f'{wd}/release/r{rel}_final/imputation_out_dir/{label}/{label}_release{rel}.sex', sep = '\t')
sex_check['new_IID'] = '0_' + sex_check['#IID'].astype(str)
sex_check.rename(columns = {'#IID': 'old_IID', 'new_IID': '#IID'}, inplace = True)

# sex_check[['#IID', 'SEX']].to_csv(f'{wd}/working/{user}/x_chrom_new/imputation/{label}/{label}_release{rel}.sex', sep = '\t', index = False)

In [None]:
# Create shell script to execute -- all ancestries except EUR (handled on HPC)
os.chdir(wd)

# Establish paths
if sex == 'male':
    release_path = f'{wd}/working/{user}/x_chrom_new/imputation/imputation_out_dir/{label}/{sex}/{region}/{job_id}-{job_name}/local/chrX.dose.vcf.gz'
    out_dir = f'{wd}/working/{user}/x_chrom_new/imputation/imputation_out_dir/{label}/{sex}/{region}'
elif sex == 'female':
    release_path = f'{wd}/working/{user}/x_chrom_new/imputation/imputation_out_dir/{label}/{sex}/{job_id}-{job_name}/local/chrX.dose.vcf.gz'
    out_dir = f'{wd}/working/{user}/x_chrom_new/imputation/imputation_out_dir/{label}/{sex}'
sex_values = f'{wd}/working/{user}/x_chrom_new/imputation/{label}/{label}_release{rel}.sex'
intermed_dir = f'/dev/shm/chrX_{label}_rel{rel}_{sex}'

if region == 'PAR':
    subset_cmd = f"plink2 --vcf {release_path} 'dosage=HDS' --update-sex {sex_values} --split-par b38 --make-pgen --out {intermed_dir}"
else:
    subset_cmd = f"plink2 --vcf {release_path} --update-sex {sex_values} --split-par b38 --make-pgen --out {intermed_dir}"
cp_cmd = f"gsutil cp {intermed_dir}.* {out_dir}"

with open(f'{wd}/convert_vcf_to_pfile.sh', 'w') as f:
    f.write('#!/bin/bash\n\n')
    f.write(f'{subset_cmd}\n')
    f.write(f'{cp_cmd}\n')
    f.close()

print(f"Preparing bash script for converting {label} ancestry!")

In [None]:
# Launch in separate shell windows
! chmod +x {wd}/convert_vcf_to_pfile.sh
! {wd}/convert_vcf_to_pfile.sh