# EGG BW + PW GWAS in Roskilde Child Cohort
by Sara Stinson March 18th, 2020 Early Growth and Genetics Consortium Birth Weight and Placental weight GWAS

In [1]:
workdir = "/emc/cbmr/users/mjl259/project_EGG"

In [2]:
pathgeno = "/emc/cbmr/data/imputed/IFG-Roskilde-rerun/michigan/hrc"

In [3]:
pathpheno = "/emc/cbmr/users/mjl259/project_EGG/phenotype/Roskilde_CHILD_rerun"

## 1. Prepare sample files

### 1.1 Autosomes

In [12]:
%%bash -s "$workdir" "$pathgeno" "$pathpheno"
. /home/mjl259/.bashrc

# Remove header
sed 1d $3/Roskilde_BW_PW_sex_GA_PC1-10_13.02.2020.txt > \
$3/Roskilde_BW_PW_sex_GA_PC1-10_wo_header.tmp

# Extract the order of individuals from VCF file
bcftools query -l $2/chr22.dose.vcf.gz > \
$3/Roskilde_CHILD_ID_list_from_vcf.txt

# Sort phenotype file according to vcf order
awk 'FNR==NR {x2[$1] = $0; next} $1 in x2 {print x2[$1]}' \
$3/Roskilde_BW_PW_sex_GA_PC1-10_wo_header.tmp \
$3/Roskilde_CHILD_ID_list_from_vcf.txt > $3/Roskilde_BW_PW_sex_GA_PC1-10_ordered.tmp

# Specify variable types
sed '1 s/^/0 0 0 D C C C C C C C C C C C P P\n/' \
$3/Roskilde_BW_PW_sex_GA_PC1-10_ordered.tmp > \
$3/Roskilde_BW_PW_sex_GA_PC1-10_ordered_line2.tmp

# Add header
{ echo "ID_1 ID_2 missing sex GA PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10 BW PW"; \
cat $3/Roskilde_BW_PW_sex_GA_PC1-10_ordered_line2.tmp; } > \
$3/Roskilde_BW_PW_sex_GA_PC1-10_ordered_header.sample

# remove intermediate files
rm $3/*.tmp

### 1.2 X chromosome

In [4]:
%%bash -s "$workdir" "$pathgeno" "$pathpheno"
. /home/mjl259/.bashrc

# Remove header
sed 1d $3/Roskilde_chrX_sex_GA_PC1-10_11.03.2020.txt > \
$3/Roskilde_chrX_BW_PW_sex_GA_PC1-10_wo_header.tmp

# Sort phenotype file according to vcf order
awk 'FNR==NR {x2[$1] = $0; next} $1 in x2 {print x2[$1]}' \
$3/Roskilde_chrX_BW_PW_sex_GA_PC1-10_wo_header.tmp \
$1/genotype/Roskilde_CHILD_rerun/chrX.recoded.sample > \
$3/Roskilde_chrX_BW_PW_sex_GA_PC1-10_ordered.tmp

# Specify variable types
sed '1 s/^/0 0 0 D C C C C C C C C C C C P P\n/' \
$3/Roskilde_chrX_BW_PW_sex_GA_PC1-10_ordered.tmp > \
$3/Roskilde_chrX_BW_PW_sex_GA_PC1-10_ordered_line2.tmp

# Add header
{ echo "ID_1 ID_2 missing sex GA PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10 BW PW"; \
cat $3/Roskilde_chrX_BW_PW_sex_GA_PC1-10_ordered_line2.tmp; } > \
$3/Roskilde_chrX_BW_PW_sex_GA_PC1-10_ordered_header.sample

# remove intermediate files:
rm $3/*.tmp

### 1.3 X chromosome (for binary trait for method newml)

In [51]:
%%bash -s "$workdir" "$pathgeno" "$pathpheno"
. /home/mjl259/.bashrc

# Remove header
sed 1d $3/Roskilde_Binary_sex_GA_PC1-10_10.03.2020.txt > \
$3/Roskilde_Binary_sex_GA_PC1-10_wo_header.tmp

# Extract the order of individuals from VCF file
bcftools query -l $2/chrX.dose.vcf.gz > \
$3/Roskilde_CHILD_ID_list_from_chrX_vcf.txt

# Sort phenotype file according to vcf order
awk 'FNR==NR {x2[$1] = $0; next} $1 in x2 {print x2[$1]}' \
$3/Roskilde_Binary_sex_GA_PC1-10_wo_header.tmp \
$3/Roskilde_CHILD_ID_list_from_chrX_vcf.txt > \
$3/Roskilde_Binary_sex_GA_PC1-10_ordered.tmp

# Specify variable types
sed '1 s/^/0 0 0 D C C C C C C C C C C C B\n/' \
$3/Roskilde_Binary_sex_GA_PC1-10_ordered.tmp > \
$3/Roskilde_Binary_sex_GA_PC1-10_ordered_line2.tmp

# Add header
{ echo "ID_1 ID_2 missing sex GA PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10 Binary"; \
cat $3/Roskilde_Binary_sex_GA_PC1-10_ordered_line2.tmp; } > \
$3/Roskilde_Binary_sex_GA_PC1-10_ordered_header.sample

# remove intermediate files:
rm $3/*.tmp

## 2. Run BW & PW SNPTEST

### 2.1 Autosomes (adjusted for sex & gest_age +10 PC)

In [16]:
%%writefile $workdir/script/SNPTEST_Roskilde_CHILD_BW_PW_SEX_GA_adj.sh
#!/bin/bash
#$ -S /bin/bash
#$ -cwd
#$ -pe smp 2
#$ -N SNPTest

pheno=${p}
chr=${c}
pathgeno=${pg}
workdir=${w}

/oldhome/tdr438/snptest_v2.5.2/snptest_v2.5.2 -data ${pathgeno}/chr${chr}.dose.vcf.gz ${workdir}/phenotype/Roskilde_CHILD_rerun/Roskilde_BW_PW_sex_GA_PC1-10_ordered_header.sample \
    -o ${workdir}/results/Roskilde_CHILD_rerun/SNPTest_Roskilde_CHILD_chr${chr}_${pheno}_sex_GA_adj.out \
    -genotype_field GP \
    -frequentist 1 \
    -method expected \
    -pheno ${pheno} \
    -cov_names sex GA PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10 \
    -hwe

Overwriting /emc/cbmr/users/mjl259/project_EGG/script/SNPTEST_Roskilde_CHILD_BW_PW_SEX_GA_adj.sh


In [17]:
%%bash -s "$workdir" "$pathgeno" "$pathpheno"  
. /home/mjl259/.bashrc

script="$1/script/SNPTEST_Roskilde_CHILD_BW_PW_SEX_GA_adj.sh"
declare -a phenos=("BW" "PW")
for pheno in "${phenos[@]}"; do # loop through each pheno
for chr in {1..22}; do # loop through each chr
qsub -q long.q -v p=${pheno},c=${chr},pg=$2,w=$1 -e $1/log/SNPTest_Roskild_CHILD_rerun_${pheno}_chr${chr}_sex_GA_adj_13.02.2020.e -o $1/log/SNPTest_Roskild_CHILD_rerun_${pheno}_chr${chr}_sex_GA_adj_13.02.2020.o "$script";
done;
done

Your job 6817803 ("SNPTest") has been submitted
Your job 6817804 ("SNPTest") has been submitted
Your job 6817805 ("SNPTest") has been submitted
Your job 6817806 ("SNPTest") has been submitted
Your job 6817807 ("SNPTest") has been submitted
Your job 6817808 ("SNPTest") has been submitted
Your job 6817809 ("SNPTest") has been submitted
Your job 6817810 ("SNPTest") has been submitted
Your job 6817811 ("SNPTest") has been submitted
Your job 6817812 ("SNPTest") has been submitted
Your job 6817813 ("SNPTest") has been submitted
Your job 6817814 ("SNPTest") has been submitted
Your job 6817815 ("SNPTest") has been submitted
Your job 6817816 ("SNPTest") has been submitted
Your job 6817817 ("SNPTest") has been submitted
Your job 6817818 ("SNPTest") has been submitted
Your job 6817819 ("SNPTest") has been submitted
Your job 6817820 ("SNPTest") has been submitted
Your job 6817821 ("SNPTest") has been submitted
Your job 6817822 ("SNPTest") has been submitted
Your job 6817823 ("SNPTest") has been su

### 2.2 Autosomes (adjusted for sex +10 PC)

In [21]:
%%writefile $workdir/script/SNPTEST_Roskilde_CHILD_BW_PW_SEX_adj.sh
#!/bin/bash
#$ -S /bin/bash
#$ -cwd
#$ -pe smp 2
#$ -N SNPTest

pheno=${p}
chr=${c}
pathgeno=${pg}
workdir=${w}

/oldhome/tdr438/snptest_v2.5.2/snptest_v2.5.2 -data ${pathgeno}/chr${chr}.dose.vcf.gz ${workdir}/phenotype/Roskilde_CHILD_rerun/Roskilde_BW_PW_sex_GA_PC1-10_ordered_header.sample \
    -o ${workdir}/results/Roskilde_CHILD_rerun/SNPTest_Roskilde_CHILD_chr${chr}_${pheno}_sex_adj.out \
    -genotype_field GP \
    -frequentist 1 \
    -method expected \
    -pheno ${pheno} \
    -cov_names sex PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10 \
    -hwe

Overwriting /emc/cbmr/users/mjl259/project_EGG/script/SNPTEST_Roskilde_CHILD_BW_PW_SEX_adj.sh


In [22]:
%%bash -s "$workdir" "$pathgeno" "$pathpheno"  
. /home/mjl259/.bashrc

script="$1/script/SNPTEST_Roskilde_CHILD_BW_PW_SEX_adj.sh"
declare -a phenos=("BW" "PW")
for pheno in "${phenos[@]}"; do # loop through each pheno
for chr in {1..22}; do # loop through each chr
qsub -q long.q -v p=${pheno},c=${chr},pg=$2,w=$1 -e $1/log/SNPTest_Roskild_CHILD_rerun_${pheno}_chr${chr}_sex_adj.e -o $1/log/SNPTest_Roskild_CHILD_rerun_${pheno}_chr${chr}_sex_adj.o "$script";
done;
done

Your job 6818028 ("SNPTest") has been submitted
Your job 6818029 ("SNPTest") has been submitted
Your job 6818030 ("SNPTest") has been submitted
Your job 6818031 ("SNPTest") has been submitted
Your job 6818032 ("SNPTest") has been submitted
Your job 6818033 ("SNPTest") has been submitted
Your job 6818034 ("SNPTest") has been submitted
Your job 6818035 ("SNPTest") has been submitted
Your job 6818036 ("SNPTest") has been submitted
Your job 6818037 ("SNPTest") has been submitted
Your job 6818038 ("SNPTest") has been submitted
Your job 6818039 ("SNPTest") has been submitted
Your job 6818040 ("SNPTest") has been submitted
Your job 6818041 ("SNPTest") has been submitted
Your job 6818042 ("SNPTest") has been submitted
Your job 6818043 ("SNPTest") has been submitted
Your job 6818044 ("SNPTest") has been submitted
Your job 6818045 ("SNPTest") has been submitted
Your job 6818046 ("SNPTest") has been submitted
Your job 6818047 ("SNPTest") has been submitted
Your job 6818048 ("SNPTest") has been su

### 2.3 X chromosome (adjusted for sex and gest_age +10 PC)

In [5]:
%%writefile $workdir/script/SNPTEST_Roskilde_chrX_recoded_CHILD_BW_PW_SEX_GA_adj.sh
#!/bin/bash
#$ -S /bin/bash
#$ -cwd
#$ -pe smp 8
#$ -N SNPTest_X

pheno=${p}
pathgeno=${pg}
workdir=${w}

/oldhome/tdr438/snptest_v2.5.2/snptest_v2.5.2 \
    -data ${workdir}/genotype/Roskilde_CHILD_rerun/joinedSexChrX.gen.gz \
          ${workdir}/phenotype/Roskilde_CHILD_rerun/Roskilde_chrX_BW_PW_sex_GA_PC1-10_ordered_header.sample \
    -o ${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_recoded_${pheno}_sex_GA_adj.out \
    -genotype_field GP \
    -frequentist 1 \
    -method expected \
    -pheno ${pheno} \
    -cov_names sex GA PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10 \
    -hwe

Overwriting /emc/cbmr/users/mjl259/project_EGG/script/SNPTEST_Roskilde_chrX_recoded_CHILD_BW_PW_SEX_GA_adj.sh


In [6]:
%%bash -s "$workdir" "$pathgeno" "$pathpheno"  
. /home/mjl259/.bashrc

script="$1/script/SNPTEST_Roskilde_chrX_recoded_CHILD_BW_PW_SEX_GA_adj.sh"
declare -a phenos=("BW" "PW")
for pheno in "${phenos[@]}"; do # loop through each pheno
qsub -q long.q -v p=${pheno},pg=$2,w=$1 -e $1/log/SNPTest_Roskilde_CHILD_recoded_${pheno}_chrX_sex_GA_adj.e -o $1/log/SNPTest_Roskilde_CHILD_recoded_${pheno}_chrX_sex_GA_adj.o "$script";
done

Your job 6817909 ("SNPTest_X") has been submitted
Your job 6817910 ("SNPTest_X") has been submitted


### 2.4 X chromosome (adjusted for sex +10 PC)

In [54]:
%%writefile $workdir/script/SNPTEST_Roskilde_chrX_recoded_CHILD_BW_PW_SEX_adj.sh
#!/bin/bash
#$ -S /bin/bash
#$ -cwd
#$ -pe smp 8
#$ -N SNPTest_X

pheno=${p}
pathgeno=${pg}
workdir=${w}

/oldhome/tdr438/snptest_v2.5.2/snptest_v2.5.2 \
    -data ${workdir}/genotype/Roskilde_CHILD_rerun/joinedSexChrX.gen.gz \
          ${workdir}/phenotype/Roskilde_CHILD_rerun/Roskilde_chrX_BW_PW_sex_GA_PC1-10_ordered_header.sample \
    -o ${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_recoded_${pheno}_sex_GA_adj.out \
    -genotype_field GP \
    -frequentist 1 \
    -method expected \
    -pheno ${pheno} \
    -cov_names sex PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10 \
    -hwe

Overwriting /emc/cbmr/users/mjl259/project_EGG/script/SNPTEST_Roskilde_chrX_recoded_CHILD_BW_PW_SEX_adj.sh


In [55]:
%%bash -s "$workdir" "$pathgeno" "$pathpheno"  
. /home/mjl259/.bashrc

script="$1/script/SNPTEST_Roskilde_chrX_recoded_CHILD_BW_PW_SEX_adj.sh"
declare -a phenos=("BW" "PW")
for pheno in "${phenos[@]}"; do # loop through each pheno
qsub -q long.q -v p=${pheno},pg=$2,w=$1 -e $1/log/SNPTest_Roskilde_CHILD_recoded_${pheno}_chrX_sex_adj.e -o $1/log/SNPTest_Roskilde_CHILD_recoded_${pheno}_chrX_sex_adj.o "$script";
done

Your job 6818226 ("SNPTest_X") has been submitted
Your job 6818227 ("SNPTest_X") has been submitted


### 2.5 X chromosome (binary trait adjusted for sex and gest_age + 10 PC)

In [10]:
%%writefile $workdir/script/SNPTEST_Roskilde_CHILD_Binary_SEX_GA_adj_XCHROM.sh
#!/bin/bash
#$ -S /bin/bash
#$ -cwd
#$ -pe smp 2
#$ -N SNPTest_X_B

pathgeno=${pg}
workdir=${w}

# Cannot use GP for #method newml have to be GT
   
/oldhome/tdr438/snptest_v2.5.2/snptest_v2.5.2 \
    -data ${pathgeno}/chrX.dose.vcf.gz \ 
          ${workdir}/phenotype/Roskilde_CHILD_rerun/Roskilde_Binary_sex_GA_PC1-10_ordered_header.sample \
    -o ${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_Binary_sex_GA_adj.out \
    -genotype_field GT \
    -frequentist 1 \
    -method newml \
    -pheno Binary \
    -cov_names sex GA PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10 \
    -hwe

Overwriting /emc/cbmr/users/mjl259/project_EGG/script/SNPTEST_Roskilde_CHILD_Binary_SEX_GA_adj_XCHROM.sh


In [11]:
%%bash -s "$workdir" "$pathgeno" "$pathpheno"  
. /home/mjl259/.bashrc

script="$1/script/SNPTEST_Roskilde_CHILD_Binary_SEX_GA_adj_XCHROM.sh"
qsub -q long.q -v pg=$2,w=$1 -e $1/log/SNPTEST_Roskild_CHILD_rerun_Binary_chrX_sex_GA_adj.e -o $1/log/SNPTEST_Roskild_CHILD_rerun_Binary_chrX_sex_GA_adj.o "$script"

Your job 6854697 ("SNPTest_X_B") has been submitted


In [8]:
%%writefile $workdir/script/SNPTEST_Roskilde_CHILD_Binary_SEX_adj_XCHROM.sh
#!/bin/bash
#$ -S /bin/bash
#$ -cwd
#$ -pe smp 2
#$ -N SNPTest_X_B

pathgeno=${pg}
workdir=${w}

# Cannot use GP for #method newml have to be GT
   
/oldhome/tdr438/snptest_v2.5.2/snptest_v2.5.2 \
    -data ${pathgeno}/chrX.dose.vcf.gz \
          ${workdir}/phenotype/Roskilde_CHILD_rerun/Roskilde_Binary_sex_GA_PC1-10_ordered_header.sample \
    -o ${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_Binary_sex_adj.out \
    -genotype_field GT \
    -frequentist 1 \
    -method newml \
    -pheno Binary \
    -cov_names sex PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10 \
    -hwe

Overwriting /emc/cbmr/users/mjl259/project_EGG/script/SNPTEST_Roskilde_CHILD_Binary_SEX_adj_XCHROM.sh


In [9]:
%%bash -s "$workdir" "$pathgeno" "$pathpheno"  
. /home/mjl259/.bashrc

script="$1/script/SNPTEST_Roskilde_CHILD_Binary_SEX_adj_XCHROM.sh"
qsub -q long.q -v pg=$2,w=$1 -e $1/log/SNPTEST_Roskild_CHILD_rerun_Binary_chrX_sex_adj.e -o $1/log/SNPTEST_Roskild_CHILD_rerun_Binary_chrX_sex_adj.o "$script"

Your job 6854696 ("SNPTest_X_B") has been submitted


## 3. Merge SNPTEST results

### 3.1 Combine chromosome 1-22

In [23]:
%%writefile $workdir/script/Merge_SNPTEST_output_Roskilde_CHILD.sh
#$ -S /bin/bash
#$ -N Merge
#$ -cwd
#$ -pe smp 2

workdir=${wd}
pheno=${pe}

# Merges all chromsosomes together
cat ${workdir}/results/Roskilde_CHILD_rerun/SNPTest_Roskilde_CHILD_chr*_${pheno}.out > \
${workdir}/results/Roskilde_CHILD_rerun/merged/SNPTest_Roskilde_CHILD_merged_${pheno}.tmp

# Adds header from SNPTEST at the top
cat ${workdir}/results/Roskilde_CHILD_rerun/merged/header \
<(grep -v -f ${workdir}/results/Roskilde_CHILD_rerun/merged/header \
  ${workdir}/results/Roskilde_CHILD_rerun/merged/SNPTest_Roskilde_CHILD_merged_${pheno}.tmp | grep -v '==>' ) > \
  ${workdir}/results/Roskilde_CHILD_rerun/merged/SNPTest_Roskilde_CHILD_merged_${pheno}; \

rm ${workdir}/results/Roskilde_CHILD_rerun/merged/SNPTest_Roskilde_CHILD_merged_${pheno}.tmp

# Removes text with hashtag
grep -v '^#' ${workdir}/results/Roskilde_CHILD_rerun/merged/SNPTest_Roskilde_CHILD_merged_${pheno} > \
${workdir}/results/Roskilde_CHILD_rerun/merged/SNPTest_Roskilde_CHILD_merged_${pheno}.tmp ; \

mv  ${workdir}/results/Roskilde_CHILD_rerun/merged/SNPTest_Roskilde_CHILD_merged_${pheno}.tmp  \
${workdir}/results/Roskilde_CHILD_rerun/merged/SNPTest_Roskilde_CHILD_merged_${pheno}

Overwriting /emc/cbmr/users/mjl259/project_EGG/script/Merge_SNPTEST_output_Roskilde_CHILD.sh


In [24]:
%%bash -s "$workdir" 
. /home/mjl259/.bashrc

script="$1/script/Merge_SNPTEST_output_Roskilde_CHILD.sh"
declare -a phenos=("BW_sex_GA_adj" "BW_sex_adj" "PW_sex_GA_adj" "PW_sex_adj")
for pheno in "${phenos[@]}"; do
qsub -q long.q -v pe=${pheno},wd=$1 -e $1/log/Merged_Roskilde_CHILD_${pheno}.e -o $1/log/Merged_Roskilde_CHILD_${pheno}.o "$script";
done

Your job 6865988 ("Merge") has been submitted
Your job 6865989 ("Merge") has been submitted
Your job 6865990 ("Merge") has been submitted
Your job 6865991 ("Merge") has been submitted


## 4. Remove SNPS without associations / monomorphic

### 4.1 Autosomes

In [35]:
%%writefile $workdir/script/Remove_NA_monomorphic_Roskilde_CHILD.sh
#$ -S /bin/bash
#$ -N Remove_NA
#$ -cwd
#$ -pe smp 2

workdir=${wd}
pheno=${pe}

# column 22 = PVAL
gawk -F ' ' '$22 != "NA"{ print}' ${workdir}/results/Roskilde_CHILD_rerun/merged/SNPTest_Roskilde_CHILD_merged_${pheno}  > \
${workdir}/results/Roskilde_CHILD_rerun/merged/SNPTEST_Roskilde_CHILD_merged_${pheno}_noNA.tmp

# column 19 = MAF_all
gawk -F ' ' '$19 != "0"{ print}' ${workdir}/results/Roskilde_CHILD_rerun/merged/SNPTEST_Roskilde_CHILD_merged_${pheno}_noNA.tmp  > \
${workdir}/results/Roskilde_CHILD_rerun/merged/SNPTEST_Roskilde_CHILD_merged_${pheno}_noNA_MAF.txt

Overwriting /emc/cbmr/users/mjl259/project_EGG/script/Remove_NA_monomorphic_Roskilde_CHILD.sh


In [36]:
%%bash -s "$workdir" 
. /home/mjl259/.bashrc

script="$1/script/Remove_NA_monomorphic_Roskilde_CHILD.sh"
declare -a phenos=("BW_sex_GA_adj" "BW_sex_adj" "PW_sex_GA_adj" "PW_sex_adj")
for pheno in "${phenos[@]}"; do
qsub -q long.q -v pe=${pheno},wd=$1 -e $1/log/Remove_NA_monomorphic_Roskilde_${pheno}.e -o $1/log/Remove_NA_monomorphic_Roskilde_${pheno}.o "$script";
done

Your job 6818191 ("Remove_NA") has been submitted
Your job 6818192 ("Remove_NA") has been submitted
Your job 6818193 ("Remove_NA") has been submitted
Your job 6818194 ("Remove_NA") has been submitted


### 4.2 X chromosome

In [56]:
%%writefile $workdir/script/Remove_NA_monomorphic_chrX_Roskilde_CHILD.sh
#$ -S /bin/bash
#$ -N Remove_NA
#$ -cwd
#$ -pe smp 2

workdir=${wd}
pheno=${pe}

# Adds header from SNPTEST at the top
cat ${workdir}/results/Roskilde_CHILD_rerun/merged/header \
<(grep -v -f ${workdir}/results/Roskilde_CHILD_rerun/merged/header \
  ${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_recoded_${pheno}.out | grep -v '==>' ) > \
${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_recoded_${pheno}

# Removes text with hashtag
grep -v '^#'  ${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_recoded_${pheno} >  \
${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_recoded_${pheno}.tmp ; \
mv ${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_recoded_${pheno}.tmp  \
${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_recoded_${pheno}

# column 22 = PVAL
gawk -F ' ' '$22 != "NA"{ print}' ${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_recoded_${pheno} > \
${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_recoded_${pheno}_noNA

# column 19 = MAF_all
gawk -F ' ' '$19 != "0"{ print}' ${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_recoded_${pheno}_noNA > \
${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_recoded_${pheno}_noNA_MAF.txt

Overwriting /emc/cbmr/users/mjl259/project_EGG/script/Remove_NA_monomorphic_chrX_Roskilde_CHILD.sh


In [57]:
%%bash -s "$workdir" 
. /home/mjl259/.bashrc

script="$1/script/Remove_NA_monomorphic_chrX_Roskilde_CHILD.sh"
declare -a phenos=("BW_sex_GA_adj" "BW_sex_adj" "PW_sex_GA_adj" "PW_sex_adj")
for pheno in "${phenos[@]}"; do
qsub -q long.q -v pe=${pheno},wd=$1 -e $1/log/Remove_NA_monomorphic_chrX_Roskilde_${pheno}.e -o $1/log/Remove_NA_monomorphic_chrX_Roskilde_${pheno}.o "$script";
done

Your job 6818230 ("Remove_NA") has been submitted
Your job 6818231 ("Remove_NA") has been submitted
Your job 6818232 ("Remove_NA") has been submitted
Your job 6818233 ("Remove_NA") has been submitted


### 4.3 Format X chromosome binary trait results

In [6]:
%%writefile $workdir/script/Format_binary_chrX_sex_adj_Roskilde_CHILD.sh
#$ -S /bin/bash
#$ -N Org_X
#$ -cwd
#$ -pe smp 2

workdir=${wd}
pheno=${pe}

# Adds header from SNPTEST at the top
cat ${workdir}/results/Roskilde_CHILD_rerun/ChrX/header_binary.txt \
<(grep -v -f ${workdir}/results/Roskilde_CHILD_rerun/ChrX/header_binary.txt \
  ${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_Binary_sex_adj.out | grep -v '==>' ) > \
${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_Binary_sex_adj

# Removes text with hashtag
grep -v '^#'  ${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_Binary_sex_adj >  \
${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_Binary_sex_adj.tmp ; \
mv ${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_Binary_sex_adj.tmp  \
${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_Binary_sex_adj.txt

Writing /emc/cbmr/users/mjl259/project_EGG/script/Format_binary_chrX_sex_adj_Roskilde_CHILD.sh


In [7]:
%%bash -s "$workdir" 
. /home/mjl259/.bashrc

script="$1/script/Format_binary_chrX_sex_adj_Roskilde_CHILD.sh"
qsub -q long.q -v wd=$1 -e $1/log/Format_chrX_Roskilde_Binary_sex_adj.e -o $1/log/Format_chrX_Roskilde_Binary_sex_adj.o "$script"

Your job 6854769 ("Org_X") has been submitted


In [14]:
%%writefile $workdir/script/Format_binary_chrX_sex_adj_Roskilde_CHILD_stratify_by_sex.sh
#$ -S /bin/bash
#$ -N Org_X
#$ -cwd
#$ -pe smp 2

workdir=${wd}
pheno=${pe}

# Adds header from SNPTEST at the top
cat ${workdir}/results/Roskilde_CHILD_rerun/ChrX/header_binary.txt \
<(grep -v -f ${workdir}/results/Roskilde_CHILD_rerun/ChrX/header_binary.txt \
  ${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_Binary_sex_GA_adj_stratify_sex.out | grep -v '==>' ) > \
${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_Binary_sex_GA_adj_stratify_sex

# Removes text with hashtag
grep -v '^#'  ${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_Binary_sex_GA_adj_stratify_sex >  \
${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_Binary_sex_GA_adj_stratify_sex.tmp ; \
mv ${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_Binary_sex_GA_adj_stratify_sex.tmp  \
${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_Binary_sex_GA_adj_stratify_sex.txt

Writing /emc/cbmr/users/mjl259/project_EGG/script/Format_binary_chrX_sex_adj_Roskilde_CHILD_stratify_by_sex.sh


In [15]:
%%bash -s "$workdir" 
. /home/mjl259/.bashrc

script="$1/script/Format_binary_chrX_sex_adj_Roskilde_CHILD_stratify_by_sex.sh"
qsub -q long.q -v wd=$1 -e $1/log/Format_chrX_Roskilde_Binary_sex_adj_stratify.e \
-o $1/log/Format_chrX_Roskilde_Binary_sex_adj_stratify.o "$script"

Your job 6854779 ("Org_X") has been submitted


## 5. Organize SNPTEST results

### 5.1 Autosomes

In [4]:
%%writefile $workdir/script/organize_upload_Roskilde_CHILD.sh
#$ -S /bin/bash
#$ -N Org_upload
#$ -cwd
#$ -pe smp 4

workdir=${wd}
pheno=${pe}

# Remove header
sed 1d ${workdir}/results/Roskilde_CHILD_rerun/merged/SNPTEST_Roskilde_CHILD_merged_${pheno}_noNA_MAF.txt > \
${workdir}/results/Roskilde_CHILD_rerun/merged/SNPTEST_Roskilde_CHILD_merged_${pheno}_wo_header.tmp

# Round to 4 decimal places
awk -v OFS='\t' '{printf("%s %s %s %s %s %s %s %s %.0f %.0f %.0f %.4f %.4g %.4f %.4f %.4g\n", $1, "+", "37", $3, $4, $6, $5, $18, $14, $15, $16, (($16*2 + $15)/($18*2)), $21, $24, $25, $22) }' \
${workdir}/results/Roskilde_CHILD_rerun/merged/SNPTEST_Roskilde_CHILD_merged_${pheno}_wo_header.tmp > \
${workdir}/results/Roskilde_CHILD_rerun/merged/SNPTEST_Roskilde_CHILD_merged_${pheno}_order.tmp

#Print tab-delimited
awk -v OFS='\t' '{print $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16}' \
${workdir}/results/Roskilde_CHILD_rerun/merged/SNPTEST_Roskilde_CHILD_merged_${pheno}_order.tmp > \
${workdir}/results/Roskilde_CHILD_rerun/merged/SNPTEST_Roskilde_CHILD_merged_${pheno}_tab.tmp

# Add header
{ echo -e "SNPID\tSTRAND\tBUILD\tCHR\tPOS\tEFFECT_ALLELE\tNON_EFFECT_ALLELE\tN\tN0\tN1\tN2\tEAF\tHWE_P\tBETA\tSE\tPVAL"; \
 cat ${workdir}/results/Roskilde_CHILD_rerun/merged/SNPTEST_Roskilde_CHILD_merged_${pheno}_tab.tmp; } > \
 ${workdir}/results/Roskilde_CHILD_rerun/merged/SNPTEST_Roskilde_CHILD_merged_${pheno}_header.txt

Overwriting /emc/cbmr/users/mjl259/project_EGG/script/organize_upload_Roskilde_CHILD.sh


In [5]:
%%bash -s "$workdir" 
. /home/mjl259/.bashrc

script="$1/script/organize_upload_Roskilde_CHILD.sh"
declare -a phenos=("BW_sex_GA_adj" "BW_sex_adj" "PW_sex_GA_adj" "PW_sex_adj")
for pheno in "${phenos[@]}"; do
qsub -q all.q -v pe=${pheno},wd=$1 -e $1/log/organize_upload_Roskilde_CHILD_${pheno}.e -o $1/log/organize_upload_Roskilde_CHILD_${pheno}.o "$script";
done

Your job 6918362 ("Org_upload") has been submitted
Your job 6918363 ("Org_upload") has been submitted
Your job 6918364 ("Org_upload") has been submitted
Your job 6918365 ("Org_upload") has been submitted


### 5.2 X chromosome (method expected)

In [8]:
%%writefile $workdir/script/organize_upload_chrX_Roskilde_CHILD.sh
#$ -S /bin/bash
#$ -N Org_upload
#$ -cwd
#$ -pe smp 4

workdir=${wd}
pheno=${pe}

# Remove header
sed 1d ${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_recoded_${pheno}_noNA_MAF.txt > \
${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_recoded_${pheno}_wo_header.tmp

# Round to 4 decimal places
awk -v OFS='\t' '{printf("%s %s %s %s %s %s %s %s %.0f %.0f %.0f %.4g %.4f %.4f %.4g\n", $1, "+", "37", $3, $4, $6, $5, $18, $14, $15, $16, $21, $24, $25, $22) }' \
${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_recoded_${pheno}_wo_header.tmp > \
${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_recoded_${pheno}_order.tmp

# Add header
{ echo -e "SNPID\tSTRAND\tBUILD\tCHR\tPOS\tEFFECT_ALLELE\tNON_EFFECT_ALLELE\tN\tN0\tN1\tN2\tHWE_P\tBETA\tSE\tPVAL"; \
 cat ${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_recoded_${pheno}_order.tmp; } > \
 ${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_recoded_${pheno}_header.txt

Overwriting /emc/cbmr/users/mjl259/project_EGG/script/organize_upload_chrX_Roskilde_CHILD.sh


In [9]:
%%bash -s "$workdir" 
. /home/mjl259/.bashrc

script="$1/script/organize_upload_chrX_Roskilde_CHILD.sh"
declare -a phenos=("BW_sex_GA_adj" "BW_sex_adj" "PW_sex_GA_adj" "PW_sex_adj")
for pheno in "${phenos[@]}"; do
qsub -q all.q -v pe=${pheno}, wd=$1 -e $1/log/organize_upload_Roskilde_chrX_CHILD_${pheno}.e -o $1/log/organize_upload_Roskilde_chrX_CHILD_${pheno}.o "$script";
done

Your job 6918370 ("Org_upload") has been submitted
Your job 6918371 ("Org_upload") has been submitted
Your job 6918372 ("Org_upload") has been submitted
Your job 6918373 ("Org_upload") has been submitted


### 5.3 Calculate EAF from X chromosome (method newml)

In [33]:
%%writefile $workdir/script/Calc_EAF_chrX_Binary_Roskilde_CHILD.sh
#$ -S /bin/bash
#$ -N Calc_EAF
#$ -cwd
#$ -pe smp 2

workdir=${wd}

# Remove first header
sed 1d ${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_Binary.txt > \
${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_Binary_wo_header.tmp

# EAF=(all_B+all_AB+2*all_BB)/(all_A+all_B+2*(all_AA+all_AB+all_BB)
# Round to 4 decimal places & calc EAF
awk -v OFS='\t' '{printf("%s %.4f\n", $1, (($9+$11+2*$12)/($8+$9+2*($10+$11+$12))))}' \
${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_Binary_wo_header.tmp > \
${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_Binary_EAF.tmp

# Add header
{ echo -e "SNPID\tEAF"; \
 cat ${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_Binary_EAF.tmp; } > \
 ${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_Binary_EAF_header.txt

Overwriting /emc/cbmr/users/mjl259/project_EGG/script/Calc_EAF_chrX_Binary_Roskilde_CHILD.sh


In [34]:
%%bash -s "$workdir" 
. /home/mjl259/.bashrc

script="$1/script/Calc_EAF_chrX_Binary_Roskilde_CHILD.sh"
qsub -q long.q -v wd=$1 -e $1/log/calc_EAF_Roskilde_child_chrX.e -o $1/log/calc_EAF_Roskilde_child_chrX.o "$script"

Your job 6866025 ("Calc_EAF") has been submitted


### 5.4 Merge EAF (method newml) with x chromosome (method expected)

In [10]:
%%writefile $workdir/script/Merge_EAF_Roskilde_CHILD_chrX.sh
#$ -S /bin/bash
#$ -N Org_upload
#$ -cwd
#$ -pe smp 2

workdir=${wd}
pheno=${pe}

# Merge two INFO FILES
awk -v OFS='\t' 'FNR==NR{a[$1]=$2;next}{ print $0, a[$1]}' \
<(tail +2 ${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_Binary_EAF_header.txt) \
<(tail +2 ${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_recoded_${pheno}_header.txt) > \
${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_recoded_${pheno}_EAF.tmp

# Round to 4 decimal places
awk -v OFS='\t' '{print ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $16, $12, $13, $14, $15) }' \
${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_recoded_${pheno}_EAF.tmp > \
${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_recoded_${pheno}_EAF_order.tmp

# Add header
{ echo -e "SNPID\tSTRAND\tBUILD\tCHR\tPOS\tEFFECT_ALLELE\tNON_EFFECT_ALLELE\tN\tN0\tN1\tN2\tEAF\tHWE_P\tBETA\tSE\tPVAL"; \
 cat ${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_recoded_${pheno}_EAF_order.tmp; } > \
 ${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_recoded_${pheno}_EAF_order_header.txt

Overwriting /emc/cbmr/users/mjl259/project_EGG/script/Merge_EAF_Roskilde_CHILD_chrX.sh


In [11]:
%%bash -s "$workdir" 
. /home/mjl259/.bashrc

script="$1/script/Merge_EAF_Roskilde_CHILD_chrX.sh"
declare -a phenos=("BW_sex_GA_adj" "BW_sex_adj" "PW_sex_GA_adj" "PW_sex_adj")
for pheno in "${phenos[@]}"; do
qsub -q all.q -v pe=${pheno},wd=$1 -e $1/log/organize_upload_Roskilde_CHILD_${pheno}.e -o $1/log/organize_upload_Roskilde_CHILD_${pheno}.o "$script";
done

Your job 6918374 ("Org_upload") has been submitted
Your job 6918375 ("Org_upload") has been submitted
Your job 6918376 ("Org_upload") has been submitted
Your job 6918377 ("Org_upload") has been submitted


## 6. Extract info file 1

### 6.1 Autosomes info file 1

In [4]:
%%writefile $workdir/script/extract_info_1_Roskilde_vcf.sh
#$ -S /bin/bash
#$ -N Extract_1
#$ -cwd
#$ -pe smp 8

chr=${c}
pathgeno=${pg}
workdir=${w}

# Extract CALL_RATE and IMPUTED from chr.info.gz files
zcat ${pathgeno}/chr${chr}.info.gz | \
perl -lane 'BEGIN{$_ = <>} print $F[0]."\t".$F[5]."\t".($F[7] eq "Imputed" ? 1 : 0)."\t".($F[7] eq "Imputed" ? "impute_info" : ".")' > \
${workdir}/genotype/Roskilde_CHILD_rerun/Info_query/chr${chr}_SNPID_CALL-RATE_IMPUTED_INFO-TYPE.tmp

Overwriting /emc/cbmr/users/mjl259/project_EGG/script/extract_info_1_Roskilde_vcf.sh


In [5]:
%%bash -s "$workdir" "$pathgeno" "$pathpheno"  
. /home/mjl259/.bashrc

script="$1/script/extract_info_1_Roskilde_vcf.sh"
for chr in {1..22}; do # loop through each chr
qsub -q long.q -v c=${chr},pg=$2,w=$1 -e $1/log/extract_info_1_chr${chr}.e -o $1/log/extract_info_1_chr${chr}.o "$script";
done

Your job 6885605 ("Extract_1") has been submitted
Your job 6885606 ("Extract_1") has been submitted
Your job 6885607 ("Extract_1") has been submitted
Your job 6885608 ("Extract_1") has been submitted
Your job 6885609 ("Extract_1") has been submitted
Your job 6885610 ("Extract_1") has been submitted
Your job 6885611 ("Extract_1") has been submitted
Your job 6885612 ("Extract_1") has been submitted
Your job 6885613 ("Extract_1") has been submitted
Your job 6885614 ("Extract_1") has been submitted
Your job 6885615 ("Extract_1") has been submitted
Your job 6885616 ("Extract_1") has been submitted
Your job 6885617 ("Extract_1") has been submitted
Your job 6885618 ("Extract_1") has been submitted
Your job 6885619 ("Extract_1") has been submitted
Your job 6885620 ("Extract_1") has been submitted
Your job 6885621 ("Extract_1") has been submitted
Your job 6885622 ("Extract_1") has been submitted
Your job 6885623 ("Extract_1") has been submitted
Your job 6885624 ("Extract_1") has been submitted


In [13]:
%%writefile $workdir/script/Merge_info_1_Roskilde_vcf.sh
#$ -S /bin/bash
#$ -N Merge_1
#$ -cwd
#$ -pe smp 8

chr=${c}
pathgeno=${pg}
workdir=${w}

# Merge each chr file
cat ${workdir}/genotype/Roskilde_CHILD_rerun/Info_query/chr*_SNPID_CALL-RATE_IMPUTED_INFO-TYPE.tmp > \
${workdir}/genotype/Roskilde_CHILD_rerun/Info_query/Merged_SNPID_CALL-RATE_IMPUTED_INFO-TYPE.txt

Overwriting /emc/cbmr/users/mjl259/project_EGG/script/Merge_info_1_Roskilde_vcf.sh


In [14]:
%%bash -s "$workdir" "$pathgeno" "$pathpheno"  
. /home/mjl259/.bashrc

script="$1/script/Merge_info_1_Roskilde_vcf.sh"
for chr in {1..22}; do # loop through each chr
qsub -q long.q -v c=${chr},pg=$2,w=$1 -e $1/log/Merge_info_1_chr${chr}.e -o $1/log/Merge_info_1_chr${chr}.o "$script";
done

Your job 6885651 ("Merge_1") has been submitted
Your job 6885652 ("Merge_1") has been submitted
Your job 6885653 ("Merge_1") has been submitted
Your job 6885654 ("Merge_1") has been submitted
Your job 6885655 ("Merge_1") has been submitted
Your job 6885656 ("Merge_1") has been submitted
Your job 6885657 ("Merge_1") has been submitted
Your job 6885658 ("Merge_1") has been submitted
Your job 6885659 ("Merge_1") has been submitted
Your job 6885660 ("Merge_1") has been submitted
Your job 6885661 ("Merge_1") has been submitted
Your job 6885662 ("Merge_1") has been submitted
Your job 6885663 ("Merge_1") has been submitted
Your job 6885664 ("Merge_1") has been submitted
Your job 6885665 ("Merge_1") has been submitted
Your job 6885666 ("Merge_1") has been submitted
Your job 6885667 ("Merge_1") has been submitted
Your job 6885668 ("Merge_1") has been submitted
Your job 6885669 ("Merge_1") has been submitted
Your job 6885670 ("Merge_1") has been submitted
Your job 6885671 ("Merge_1") has been su

### 6.2 chromosome X info file 1

In [6]:
%%writefile $workdir/script/extract_info_1_chrX_Roskilde_vcf.sh
#$ -S /bin/bash
#$ -N Extract_1
#$ -cwd
#$ -pe smp 8

pathgeno=${pg}
workdir=${w}

# Extract CALL_RATE and IMPUTED from chr.info.gz files
zcat ${pathgeno}/chrX.info.gz | \
perl -lane 'BEGIN{$_ = <>} print $F[0]."\t".$F[5]."\t".($F[7] eq "Imputed" ? 1 : 0)."\t".($F[7] eq "Imputed" ? "impute_info" : ".")' > \
${workdir}/genotype/Roskilde_CHILD_rerun/Info_query/ChrX/chrX_SNPID_CALL-RATE_IMPUTED_INFO-TYPE.tmp

Overwriting /emc/cbmr/users/mjl259/project_EGG/script/extract_info_1_chrX_Roskilde_vcf.sh


In [7]:
%%bash -s "$workdir" "$pathgeno" 
. /home/mjl259/.bashrc

script="$1/script/extract_info_1_chrX_Roskilde_vcf.sh"
qsub -q long.q -v pg=$2,w=$1 -e $1/log/extract_info_1_chrX.e -o $1/log/extract_info_1_chrX.o "$script"

Your job 6885627 ("Extract_1") has been submitted


## 7. Extract info file 2

### 7.1 Autosomes info file 2

In [8]:
%%writefile $workdir/script/extract_info_2_Roskilde_vcf.sh
#$ -S /bin/bash
#$ -N Extract_2
#$ -cwd
#$ -pe smp 8

chr=${c}
pathgeno=${pg}
workdir=${w}

# Extract INFO from dosage vcf files
bcftools query -f '%ID\t%INFO/R2\n' ${pathgeno}/chr${chr}.dose.vcf.gz > \
${workdir}/genotype/Roskilde_CHILD_rerun/Info_query/chr${chr}_SNPID_INFO.tmp

Overwriting /emc/cbmr/users/mjl259/project_EGG/script/extract_info_2_Roskilde_vcf.sh


In [9]:
%%bash -s "$workdir" "$pathgeno" "$pathpheno"  
. /home/mjl259/.bashrc

script="$1/script/extract_info_2_Roskilde_vcf.sh"
for chr in {1..22}; do
qsub -q long.q -v c=${chr},pg=$2,w=$1 -e $1/log/extract_info_2_chr${chr}.e -o $1/log/extract_info_2_chr${chr}.o "$script";
done

Your job 6885628 ("Extract_2") has been submitted
Your job 6885629 ("Extract_2") has been submitted
Your job 6885630 ("Extract_2") has been submitted
Your job 6885631 ("Extract_2") has been submitted
Your job 6885632 ("Extract_2") has been submitted
Your job 6885633 ("Extract_2") has been submitted
Your job 6885634 ("Extract_2") has been submitted
Your job 6885635 ("Extract_2") has been submitted
Your job 6885636 ("Extract_2") has been submitted
Your job 6885637 ("Extract_2") has been submitted
Your job 6885638 ("Extract_2") has been submitted
Your job 6885639 ("Extract_2") has been submitted
Your job 6885640 ("Extract_2") has been submitted
Your job 6885641 ("Extract_2") has been submitted
Your job 6885642 ("Extract_2") has been submitted
Your job 6885643 ("Extract_2") has been submitted
Your job 6885644 ("Extract_2") has been submitted
Your job 6885645 ("Extract_2") has been submitted
Your job 6885646 ("Extract_2") has been submitted
Your job 6885647 ("Extract_2") has been submitted


In [15]:
%%writefile $workdir/script/Merge_info_2_Roskilde_vcf.sh
#$ -S /bin/bash
#$ -N Merge_2
#$ -cwd
#$ -pe smp 8

chr=${c}
pathgeno=${pg}
workdir=${w}

# Merge chromsome files
cat ${workdir}/genotype/Roskilde_CHILD_rerun/Info_query/chr*_SNPID_INFO.tmp > \
    ${workdir}/genotype/Roskilde_CHILD_rerun/Info_query/Merged_SNPID_INFO.txt

Overwriting /emc/cbmr/users/mjl259/project_EGG/script/Merge_info_2_Roskilde_vcf.sh


In [16]:
%%bash -s "$workdir" "$pathgeno" "$pathpheno"  
. /home/mjl259/.bashrc

script="$1/script/Merge_info_2_Roskilde_vcf.sh"
for chr in {1..22}; do
qsub -q long.q -v c=${chr},pg=$2,w=$1 -e $1/log/extract_info_2_chr${chr}.e -o $1/log/extract_info_2_chr${chr}.o "$script";
done

Your job 6885673 ("Merge_2") has been submitted
Your job 6885674 ("Merge_2") has been submitted
Your job 6885675 ("Merge_2") has been submitted
Your job 6885676 ("Merge_2") has been submitted
Your job 6885677 ("Merge_2") has been submitted
Your job 6885678 ("Merge_2") has been submitted
Your job 6885679 ("Merge_2") has been submitted
Your job 6885680 ("Merge_2") has been submitted
Your job 6885681 ("Merge_2") has been submitted
Your job 6885682 ("Merge_2") has been submitted
Your job 6885683 ("Merge_2") has been submitted
Your job 6885684 ("Merge_2") has been submitted
Your job 6885685 ("Merge_2") has been submitted
Your job 6885686 ("Merge_2") has been submitted
Your job 6885687 ("Merge_2") has been submitted
Your job 6885688 ("Merge_2") has been submitted
Your job 6885689 ("Merge_2") has been submitted
Your job 6885690 ("Merge_2") has been submitted
Your job 6885691 ("Merge_2") has been submitted
Your job 6885692 ("Merge_2") has been submitted
Your job 6885693 ("Merge_2") has been su

### 7.2 Chromosome X info file 2

In [10]:
%%writefile $workdir/script/extract_info_2_chrX_Roskilde_vcf.sh
#$ -S /bin/bash
#$ -N Extract_2
#$ -cwd
#$ -pe smp 8

pathgeno=${pg}
workdir=${w}

# Extract INFO from dosage vcf files
bcftools query -f '%ID\t%INFO/R2\n' ${pathgeno}/chrX.dose.vcf.gz > \
${workdir}/genotype/Roskilde_CHILD_rerun/Info_query/ChrX/chrX_SNPID_INFO.tmp

Overwriting /emc/cbmr/users/mjl259/project_EGG/script/extract_info_2_chrX_Roskilde_vcf.sh


In [11]:
%%bash -s "$workdir" "$pathgeno"  
. /home/mjl259/.bashrc

script="$1/script/extract_info_2_chrX_Roskilde_vcf.sh"
qsub -q long.q -v pg=$2,w=$1 -e $1/log/extract_info_2_chrX.e -o $1/log/extract_info_2_chrX.o "$script"

Your job 6885650 ("Extract_2") has been submitted


## 8. Organize two info files

### 8.1 Autosomes info files

In [4]:
%%writefile $workdir/script/Organize_info_files_Roskilde_child.sh
#$ -S /bin/bash
#$ -N Org_Info
#$ -cwd
#$ -pe smp 8

workdir=${w}

# FILE 1: Set CALL_RATE to 4 decimal places
awk -v OFS='\t' '{ printf("%s %.4f %s %s\n", $1, $2, $3, $4)}' \
${workdir}/genotype/Roskilde_CHILD_rerun/Info_query/Merged_SNPID_CALL-RATE_IMPUTED_INFO-TYPE.txt > \
${workdir}/genotype/Roskilde_CHILD_rerun/Info_query/Merged_SNPID_CALL-RATE_IMPUTED_INFO-TYPE_dec.tmp

# FILE 1: Add header
{ echo -e "SNPID\tCALL_RATE\tIMPUTED\tINFO_TYPE"; \
 cat ${workdir}/genotype/Roskilde_CHILD_rerun/Info_query/Merged_SNPID_CALL-RATE_IMPUTED_INFO-TYPE_dec.tmp;} > \
 ${workdir}/genotype/Roskilde_CHILD_rerun/Info_query/Merged_SNPID_CALL-RATE_IMPUTED_INFO-TYPE_header.txt

# FILE 2: Set INFO (RSQ values) to 4 decimal places
awk -v OFS='\t' '{ printf("%s %.4f\n", $1, $2) }' \
${workdir}/genotype/Roskilde_CHILD_rerun/Info_query/Merged_SNPID_INFO.txt > \
${workdir}/genotype/Roskilde_CHILD_rerun/Info_query/Merged_SNPID_INFO_dec.tmp

# FILE 2: Add header
{ echo -e "SNPID\tINFO"; \
 cat ${workdir}/genotype/Roskilde_CHILD_rerun/Info_query/Merged_SNPID_INFO_dec.tmp; } > \
 ${workdir}/genotype/Roskilde_CHILD_rerun/Info_query/Merged_SNPID_INFO_header.txt

Overwriting /emc/cbmr/users/mjl259/project_EGG/script/Organize_info_files_Roskilde_child.sh


In [5]:
%%bash -s "$workdir"
. /home/mjl259/.bashrc

script="$1/script/Organize_info_files_Roskilde_child.sh"

qsub -q long.q -v w=$1 -e $1/log/org_info_files_Roskilde_child.e -o $1/log/org_info_files_Roskilde_child.o "$script";

Your job 6885830 ("Org_Info") has been submitted


### 8.2 X chromosome info files

In [6]:
%%writefile $workdir/script/Organize_info_files_ChrX_Roskilde_child.sh
#$ -S /bin/bash
#$ -N Org_Info
#$ -cwd
#$ -pe smp 8

workdir=${w}

# FILE 1: Set CALL_RATE to 4 decimal places
awk -v OFS='\t' '{ printf("%s %.4f %s %s\n", $1, $2, $3, $4)}' \
${workdir}/genotype/Roskilde_CHILD_rerun/Info_query/ChrX/chrX_SNPID_CALL-RATE_IMPUTED_INFO-TYPE.tmp > \
${workdir}/genotype/Roskilde_CHILD_rerun/Info_query/ChrX/chrX_SNPID_CALL-RATE_IMPUTED_INFO-TYPE_dec.tmp

# FILE 1: Add header
{ echo -e "SNPID\tCALL_RATE\tIMPUTED\tINFO_TYPE"; \
 cat ${workdir}/genotype/Roskilde_CHILD_rerun/Info_query/ChrX/chrX_SNPID_CALL-RATE_IMPUTED_INFO-TYPE_dec.tmp;} > \
 ${workdir}/genotype/Roskilde_CHILD_rerun/Info_query/ChrX/chrX_SNPID_CALL-RATE_IMPUTED_INFO-TYPE_header.txt

# FILE 2: Set INFO (RSQ values) to 4 decimal places
awk -v OFS='\t' '{ printf("%s %.4f\n", $1, $2) }' \
${workdir}/genotype/Roskilde_CHILD_rerun/Info_query/ChrX/chrX_SNPID_INFO.tmp > \
${workdir}/genotype/Roskilde_CHILD_rerun/Info_query/ChrX/chrX_SNPID_INFO_dec.tmp

# FILE 2: Add header
{ echo -e "SNPID\tINFO"; \
 cat ${workdir}/genotype/Roskilde_CHILD_rerun/Info_query/ChrX/chrX_SNPID_INFO_dec.tmp; } > \
 ${workdir}/genotype/Roskilde_CHILD_rerun/Info_query/ChrX/chrX_SNPID_INFO_header.txt

Overwriting /emc/cbmr/users/mjl259/project_EGG/script/Organize_info_files_ChrX_Roskilde_child.sh


In [7]:
%%bash -s "$workdir"
. /home/mjl259/.bashrc

script="$1/script/Organize_info_files_ChrX_Roskilde_child.sh"

qsub -q long.q -v w=$1 -e $1/log/org_info_files_chrX_Roskilde_child.e -o $1/log/org_info_files_chrX_Roskilde_child.o "$script";

Your job 6885831 ("Org_Info") has been submitted


## 9. Merge two info files

### 9.1 Autosome merge

In [5]:
%%writefile $workdir/script/Merge_info_files_Roskilde_CHILD_rerun.sh
#$ -S /bin/bash
#$ -N Merge_R
#$ -cwd
#$ -pe smp 8

workdir=${w}

# Merge two INFO FILES
awk -v OFS='\t' 'FNR==NR{a[$1]=$2;next}{ print $0, a[$1]}' \
<(tail +2 ${workdir}/genotype/Roskilde_CHILD_rerun/Info_query/Merged_SNPID_INFO_header.txt) \
<(tail +2 ${workdir}/genotype/Roskilde_CHILD_rerun/Info_query/Merged_SNPID_CALL-RATE_IMPUTED_INFO-TYPE_header.txt) > \
${workdir}/genotype/Roskilde_CHILD_rerun/Info_query/Merged_SNPID_CALL-RATE_IMPUTED_INFO-TYPE_INFO.tmp

# Add header
{ echo -e "SNPID\tCALL_RATE\tIMPUTED\tINFO_TYPE\tINFO"; \
 cat ${workdir}/genotype/Roskilde_CHILD_rerun/Info_query/Merged_SNPID_CALL-RATE_IMPUTED_INFO-TYPE_INFO.tmp ; } > \
${workdir}/genotype/Roskilde_CHILD_rerun/Info_query/Merged_SNPID_CALL-RATE_IMPUTED_INFO-TYPE_INFO.tmp2

# Set INFO (RSQ values) to missing if genotyped
cat ${workdir}/genotype/Roskilde_CHILD_rerun/Info_query/Merged_SNPID_CALL-RATE_IMPUTED_INFO-TYPE_INFO.tmp2 | \
perl -lane 'BEGIN{$_ = <>} print $F[0]."\t".$F[1]."\t".$F[2]."\t".$F[3]."\t".($F[2] eq "1" ? $F[4] : ".")' > \
${workdir}/genotype/Roskilde_CHILD_rerun/Info_query/Merged_SNPID_CALL-RATE_IMPUTED_INFO-TYPE_INFO_NA.tmp

# Add header
{ echo -e "SNPID\tCALL_RATE\tIMPUTED\tINFO_TYPE\tINFO"; \
 cat ${workdir}/genotype/Roskilde_CHILD_rerun/Info_query/Merged_SNPID_CALL-RATE_IMPUTED_INFO-TYPE_INFO_NA.tmp ; } > \
 ${workdir}/genotype/Roskilde_CHILD_rerun/Info_query/Merged_SNPID_CALL-RATE_IMPUTED_INFO-TYPE_INFO_header.txt

Overwriting /emc/cbmr/users/mjl259/project_EGG/script/Merge_info_files_Roskilde_CHILD_rerun.sh


In [6]:
%%bash -s "$workdir"  
. /home/mjl259/.bashrc

script="$1/script/Merge_info_files_Roskilde_CHILD_rerun.sh"

qsub -q all.q -v w=$1 -e $1/log/Merge_info_Roskilde_CHILD_rerun.e -o $1/log/Merge_info_Roskilde_CHILD_rerun.o "$script";

Your job 6918654 ("Merge_R") has been submitted


### 9.2 Chromosome X merge

In [7]:
%%writefile $workdir/script/Merge_info_files_chrX_Roskilde_CHILD_rerun.sh
#$ -S /bin/bash
#$ -N Merge_R
#$ -cwd
#$ -pe smp 8

workdir=${w}

# Merge two INFO FILES
awk -v OFS='\t' 'FNR==NR{a[$1]=$2;next}{ print $0, a[$1]}' \
<(tail +2 ${workdir}/genotype/Roskilde_CHILD_rerun/Info_query/ChrX/chrX_SNPID_INFO_header.txt) \
<(tail +2 ${workdir}/genotype/Roskilde_CHILD_rerun/Info_query/ChrX/chrX_SNPID_CALL-RATE_IMPUTED_INFO-TYPE_header.txt) > \
${workdir}/genotype/Roskilde_CHILD_rerun/Info_query/ChrX/chrX_SNPID_CALL-RATE_IMPUTED_INFO-TYPE_INFO.tmp

# Add header
{ echo -e "SNPID\tCALL_RATE\tIMPUTED\tINFO_TYPE\tINFO"; \
 cat ${workdir}/genotype/Roskilde_CHILD_rerun/Info_query/ChrX/chrX_SNPID_CALL-RATE_IMPUTED_INFO-TYPE_INFO.tmp; } > \
${workdir}/genotype/Roskilde_CHILD_rerun/Info_query/ChrX/chrX_SNPID_CALL-RATE_IMPUTED_INFO-TYPE_INFO.tmp2

# Set INFO (RSQ values) to missing if genotyped
cat ${workdir}/genotype/Roskilde_CHILD_rerun/Info_query/ChrX/chrX_SNPID_CALL-RATE_IMPUTED_INFO-TYPE_INFO.tmp2 | \
perl -lane 'BEGIN{$_ = <>} print $F[0]."\t".$F[1]."\t".$F[2]."\t".$F[3]."\t".($F[2] eq "1" ? $F[4] : ".")' > \
${workdir}/genotype/Roskilde_CHILD_rerun/Info_query/ChrX/chrX_SNPID_CALL-RATE_IMPUTED_INFO-TYPE_INFO_NA.tmp

# Add header
{ echo -e "SNPID\tCALL_RATE\tIMPUTED\tINFO_TYPE\tINFO"; \
 cat ${workdir}/genotype/Roskilde_CHILD_rerun/Info_query/ChrX/chrX_SNPID_CALL-RATE_IMPUTED_INFO-TYPE_INFO_NA.tmp; } > \
 ${workdir}/genotype/Roskilde_CHILD_rerun/Info_query/ChrX/chrX_SNPID_CALL-RATE_IMPUTED_INFO-TYPE_INFO_header.txt

Overwriting /emc/cbmr/users/mjl259/project_EGG/script/Merge_info_files_chrX_Roskilde_CHILD_rerun.sh


In [8]:
%%bash -s "$workdir"  
. /home/mjl259/.bashrc

script="$1/script/Merge_info_files_chrX_Roskilde_CHILD_rerun.sh"

qsub -q all.q -v w=$1 -e $1/log/Merge_info_chrX_Roskilde_CHILD_rerun.e -o $1/log/Merge_info_chrX_Roskilde_CHILD_rerun.o "$script";

Your job 6918655 ("Merge_R") has been submitted


## 10. Merge SNPTEST results with merged info file

### 10.1 Autosomes

In [9]:
%%writefile $workdir/script/Merge_SNPTEST_info_files_Roskilde_CHILD_rerun.sh
#$ -S /bin/bash
#$ -N Merge
#$ -cwd
#$ -pe smp 8

workdir=${wd}
pheno=${pe}

# Merging SNPTEST output with info files
awk -v OFS='\t' -v FS='\t' 'FNR==NR{a[$1]=$2 FS $3 FS $4 FS $5;next}{ print $0, a[$1]}' \
${workdir}/genotype/Roskilde_CHILD_rerun/Info_query/Merged_SNPID_CALL-RATE_IMPUTED_INFO-TYPE_INFO_header.txt \
${workdir}/results/Roskilde_CHILD_rerun/merged/SNPTEST_Roskilde_CHILD_merged_${pheno}_header.txt > \
${workdir}/results/Roskilde_CHILD_rerun/merged/SNPTEST_Roskilde_CHILD_${pheno}_info_merge.tmp

# Re-order CALL_RATE position
awk -v OFS='\t' '{print $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $17, $14, $15, $16, $18, $19, $20}' \
${workdir}/results/Roskilde_CHILD_rerun/merged/SNPTEST_Roskilde_CHILD_${pheno}_info_merge.tmp > \
${workdir}/results/Roskilde_CHILD_rerun/merged/SNPTEST_Roskilde_CHILD_${pheno}_info_merge_order.tmp

# Set HWE_P to missing for imputed SNPs
cat ${workdir}/results/Roskilde_CHILD_rerun/merged/SNPTEST_Roskilde_CHILD_${pheno}_info_merge_order.tmp | \
perl -lane 'BEGIN{$_ = <>} print $F[0]."\t".$F[1]."\t".$F[2]."\t".$F[3]."\t".$F[4]."\t".$F[5]."\t".$F[6]."\t".$F[7]."\t".$F[8]."\t".$F[9]."\t".$F[10]."\t".$F[11]."\t".($F[17] eq 1 ? "." : $F[12])."\t".$F[13]."\t".$F[14]."\t".$F[15]."\t".$F[16]."\t".$F[17]."\t".$F[18]."\t".$F[19]' > \
${workdir}/results/Roskilde_CHILD_rerun/merged/SNPTEST_Roskilde_CHILD_${pheno}_info_merge_hwe_na.tmp

# Add header
{ echo -e "SNPID\tSTRAND\tBUILD\tCHR\tPOS\tEFFECT_ALLELE\tNON_EFFECT_ALLELE\tN\tN0\tN1\tN2\tEAF\tHWE_P\tCALL_RATE\tBETA\tSE\tPVAL\tIMPUTED\tINFO_TYPE\tINFO"; \
 cat ${workdir}/results/Roskilde_CHILD_rerun/merged/SNPTEST_Roskilde_CHILD_${pheno}_info_merge_hwe_na.tmp ; } > \
 ${workdir}/results/Roskilde_CHILD_rerun/merged/SNPTEST_Roskilde_CHILD_${pheno}_upload_header.txt

Overwriting /emc/cbmr/users/mjl259/project_EGG/script/Merge_SNPTEST_info_files_Roskilde_CHILD_rerun.sh


In [10]:
%%bash -s "$workdir" 
. /home/mjl259/.bashrc

script="$1/script/Merge_SNPTEST_info_files_Roskilde_CHILD_rerun.sh"
declare -a phenos=("BW_sex_GA_adj" "BW_sex_adj" "PW_sex_GA_adj" "PW_sex_adj")
for pheno in "${phenos[@]}"; do
qsub -q all.q -v pe=${pheno},wd=$1 -e $1/log/Merge_snptest_info_files_Roskilde_CHILD_${pheno}.e -o $1/log/Merge_snptest_info_files_Roskilde_CHILD_${pheno}.o "$script";
done

Your job 6918680 ("Merge") has been submitted
Your job 6918681 ("Merge") has been submitted
Your job 6918682 ("Merge") has been submitted
Your job 6918683 ("Merge") has been submitted


### 10.2 X chromosome

In [11]:
%%writefile $workdir/script/Merge_SNPTEST_chrX_info_files_Roskilde_CHILD_rerun.sh
#$ -S /bin/bash
#$ -N Merge
#$ -cwd
#$ -pe smp 8

workdir=${wd}
pheno=${pe}

# Merging SNPTEST output with info files
awk -v OFS='\t' -v FS='\t' 'FNR==NR{a[$1]=$2 FS $3 FS $4 FS $5;next}{ print $0, a[$1]}' \
${workdir}/genotype/Roskilde_CHILD_rerun/Info_query/ChrX/chrX_SNPID_CALL-RATE_IMPUTED_INFO-TYPE_INFO_header.txt \
${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_recoded_${pheno}_EAF_order_header.txt > \
${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_recoded_${pheno}_info_merge.tmp

# Re-order CALL_RATE position
awk -v OFS='\t' '{print $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $17, $14, $15, $16, $18, $19, $20}' \
${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_recoded_${pheno}_info_merge.tmp > \
${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_recoded_${pheno}_info_merge_order.tmp

# Set HWE_P to missing for imputed SNPs
cat ${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_recoded_${pheno}_info_merge_order.tmp | \
perl -lane 'BEGIN{$_ = <>} print $F[0]."\t".$F[1]."\t".$F[2]."\t".$F[3]."\t".$F[4]."\t".$F[5]."\t".$F[6]."\t".$F[7]."\t".$F[8]."\t".$F[9]."\t".$F[10]."\t".$F[11]."\t".($F[17] eq 1 ? "." : $F[12])."\t".$F[13]."\t".$F[14]."\t".$F[15]."\t".$F[16]."\t".$F[17]."\t".$F[18]."\t".$F[19]' > \
${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_recoded_${pheno}_info_merge_hwe_na.tmp

# Add header
{ echo -e "SNPID\tSTRAND\tBUILD\tCHR\tPOS\tEFFECT_ALLELE\tNON_EFFECT_ALLELE\tN\tN0\tN1\tN2\tEAF\tHWE_P\tCALL_RATE\tBETA\tSE\tPVAL\tIMPUTED\tINFO_TYPE\tINFO"; \
 cat ${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_recoded_${pheno}_info_merge_hwe_na.tmp ; } > \
 ${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_recoded_${pheno}_upload_header.txt

Overwriting /emc/cbmr/users/mjl259/project_EGG/script/Merge_SNPTEST_chrX_info_files_Roskilde_CHILD_rerun.sh


In [12]:
%%bash -s "$workdir" 
. /home/mjl259/.bashrc

script="$1/script/Merge_SNPTEST_chrX_info_files_Roskilde_CHILD_rerun.sh"
declare -a phenos=("BW_sex_GA_adj" "BW_sex_adj" "PW_sex_GA_adj" "PW_sex_adj")
for pheno in "${phenos[@]}"; do
qsub -q all.q -v pe=${pheno},wd=$1 -e $1/log/Merge_snptest_info_files_chrX_Roskilde_CHILD_${pheno}.e -o $1/log/Merge_snptest_info_files_chrX_Roskilde_CHILD_${pheno}.o "$script";
done

Your job 6918684 ("Merge") has been submitted
Your job 6918685 ("Merge") has been submitted
Your job 6918686 ("Merge") has been submitted
Your job 6918687 ("Merge") has been submitted


## 11. Merge autosomes results with X chromosome results

In [13]:
%%writefile $workdir/script/Merge_final_results_Roskilde_CHILD_rerun.sh
#$ -S /bin/bash
#$ -N Merge
#$ -cwd
#$ -pe smp 8

workdir=${wd}
pheno=${pe}

cat ${workdir}/results/Roskilde_CHILD_rerun/merged/SNPTEST_Roskilde_CHILD_${pheno}_upload_header.txt \
${workdir}/results/Roskilde_CHILD_rerun/ChrX/SNPTest_Roskilde_CHILD_chrX_recoded_${pheno}_info_merge_hwe_na.tmp > \
${workdir}/upload/Roskilde_CHILD_rerun/SNPTEST_Roskilde_CHILD_${pheno}_merged_chr.txt

Overwriting /emc/cbmr/users/mjl259/project_EGG/script/Merge_final_results_Roskilde_CHILD_rerun.sh


In [14]:
%%bash -s "$workdir" 
. /home/mjl259/.bashrc

script="$1/script/Merge_final_results_Roskilde_CHILD_rerun.sh"
declare -a phenos=("BW_sex_GA_adj" "BW_sex_adj" "PW_sex_GA_adj" "PW_sex_adj")
for pheno in "${phenos[@]}"; do
qsub -q all.q -v pe=${pheno},wd=$1 -e $1/log/Merge_final_results_Roskilde_CHILD_${pheno}.e -o $1/log/Merge_final_results_Roskilde_CHILD_${pheno}.o "$script";
done

Your job 6918688 ("Merge") has been submitted
Your job 6918689 ("Merge") has been submitted
Your job 6918690 ("Merge") has been submitted
Your job 6918691 ("Merge") has been submitted


## 12. Rename and zip the final files

In [15]:
%%writefile $workdir/script/gzip_file_upload_Roskilde_CHILD.sh
#$ -S /bin/bash
#$ -N Upload
#$ -cwd
#$ -pe smp 4

workdir=${wd}
pheno=${pe}
    
workdir=${wd}
pheno=${pe}

cp ${workdir}/upload/Roskilde_CHILD_rerun/SNPTEST_Roskilde_CHILD_${pheno}_merged_chr.txt \
   ${workdir}/upload/Roskilde_CHILD_rerun/final

gzip ${workdir}/upload/Roskilde_CHILD_rerun/final/SNPTEST_Roskilde_CHILD_${pheno}_merged_chr.txt

mv ${workdir}/upload/Roskilde_CHILD_rerun/final/SNPTEST_Roskilde_CHILD_BW_sex_adj_merged_chr.txt.gz \
    ${workdir}/upload/Roskilde_CHILD_rerun/final/EGG_HRC_BW6.BW.child.sex.Roskilde.european.SES.20200527.txt.gz

mv ${workdir}/upload/Roskilde_CHILD_rerun/final/SNPTEST_Roskilde_CHILD_BW_sex_GA_adj_merged_chr.txt.gz \
     ${workdir}/upload/Roskilde_CHILD_rerun/final/EGG_HRC_BW6.BW.child.sex_gest.Roskilde.european.SES.20200527.txt.gz

mv ${workdir}/upload/Roskilde_CHILD_rerun/final/SNPTEST_Roskilde_CHILD_PW_sex_adj_merged_chr.txt.gz \
    ${workdir}/upload/Roskilde_CHILD_rerun/final/EGG_HRC_BW6.PW.child.sex.Roskilde.european.SES.20200527.txt.gz

mv ${workdir}/upload/Roskilde_CHILD_rerun/final/SNPTEST_Roskilde_CHILD_PW_sex_GA_adj_merged_chr.txt.gz  \
       ${workdir}/upload/Roskilde_CHILD_rerun/final/EGG_HRC_BW6.PW.child.sex_gest.Roskilde.european.SES.20200527.txt.gz

Overwriting /emc/cbmr/users/mjl259/project_EGG/script/gzip_file_upload_Roskilde_CHILD.sh


In [16]:
%%bash -s "$workdir" 
. /home/mjl259/.bashrc

script="$1/script/gzip_file_upload_Roskilde_CHILD.sh"
declare -a phenos=("BW_sex_GA_adj" "BW_sex_adj" "PW_sex_GA_adj" "PW_sex_adj")
for pheno in "${phenos[@]}"; do
qsub -q all.q -v pe=${pheno},wd=$1 -e $1/log/e/gzip_final_results_Roskilde_CHILD_${pheno}.e \
-o $1/log/o/gzip_final_results_Roskilde_CHILD_${pheno}.o "$script";
done

Your job 6918713 ("Upload") has been submitted
Your job 6918714 ("Upload") has been submitted
Your job 6918715 ("Upload") has been submitted
Your job 6918716 ("Upload") has been submitted
