# Early onset hearing impairment annotation

## Aim

- Retrieve the variants in these specific genes from the results of exome and imputed data of hearing impairment traits. Only filter those variants with p-val < 0.05
- To annotate the variants with a p-val < 0.005 from the summary statistics after running the LMM analysis for hearing impairment traits in order to compare results for Early onset hearing impairment

## Description

In the following cells you can find the scripts used for these particular annotations as well as some bash code used to select the variants in specific genes requested by Thashi. 

## Note

For exome data hg38 reference was used

For imputed data hg19 reference was used

## Bash code for selecting variants in specific genes

In [None]:
##Coordinates for hg38 exome data
#Chr2	178815308	179060137
#Chr17	36485636	36545338
#Chr7	21533039	21911839
#Chr17	72111020	72136416
#Chr2	113205997	113288921
#Chr2	130449455	130519707
#Chr9	135692185	135805502
#Chr6	168282830	168329777
#Chr2	231443531	231474484
#Chr4	142013160	142856535

cd /gpfs/gibbs/pi/dewan/data/UKBiobank/results/REGENIE_results/results_exome_data/
cwd=/gpfs/gibbs/pi/dewan/data/UKBiobank/results/REGENIE_results/results_exome_data/
for file in */*.snp_stats.gz; do
    zcat $file | awk  -F'\t' 'NR == 1 {print; next} {
      flag=0; 
      for (i=1;i<=NF;i++) if ($1 == 2 && $2 > 178815308 && $2 < 179060137 && $8<0.05)  flag=1;
      for (i=1;i<=NF;i++) if ($1 == 17 && $2 > 36485636 && $2 < 36545338 && $8<0.05)  flag=1; 
      for (i=1;i<=NF;i++) if ($1 == 7 && $2 > 21533039 && $2 < 21911839 && $8<0.05)  flag=1; 
      for (i=1;i<=NF;i++) if ($1 == 17 && $2 > 72111020 && $2 < 72136416 && $8<0.05)  flag=1;
      for (i=1;i<=NF;i++) if ($1 == 2 && $2 > 113205997 && $2 < 113288921 && $8<0.05)  flag=1;
      for (i=1;i<=NF;i++) if ($1 == 2 && $2 > 130449455 && $2 < 130519707 && $8<0.05)  flag=1;
      for (i=1;i<=NF;i++) if ($1 == 9 && $2 > 135692185 && $2 < 135805502 && $8<0.05)  flag=1;
      for (i=1;i<=NF;i++) if ($1 == 6 && $2 > 168282830 && $2 < 168329777 && $8<0.05) flag=1;
      for (i=1;i<=NF;i++) if ($1 == 2 && $2 > 231443531 && $2 < 231474484 && $8<0.05) flag=1;
      for (i=1;i<=NF;i++) if ($1 == 4 && $2 > 142013160 && $2 < 142856535 && $8<0.05) flag=1;
      if (flag) print
    }'  > ${cwd}/${file%%.*}.EOHI
    gzip -c ${cwd}/${file%%.*}.EOHI > ${cwd}/${file%%.*}.EOHI.gz ;
    done 

In [27]:
## Coordinates for hg19 imputed data
#Chr2	179684484	179924864
#Chr17	34841480	34900695
#Chr7	21572657	21951457
#Chr17	70107161	70132557
#Chr2	113963574	114046498
#Chr2	131207028	131277280
#Chr9	138584031	138697348
#Chr6	168683510	168730457
#Chr2	232308242	232339195
#Chr4	142934313	143777688
cd /gpfs/gibbs/pi/dewan/data/UKBiobank/results/FastGWA_results/results_imputed_data/hearing_impairment
cwd=/gpfs/gibbs/pi/dewan/data/UKBiobank/results/FastGWA_results/results_imputed_data/hearing_impairment
for file in */*.snp_stats.gz; do
    zcat $file | awk  -F'\t' 'NR == 1 {print; next} {
      flag=0; 
      for (i=1;i<=NF;i++) if ($1 == 2 && $2 > 179684484 && $2 < 179924864 && $8<0.05)  flag=1;
      for (i=1;i<=NF;i++) if ($1 == 17 && $2 > 34841480 && $2 < 34900695 && $8<0.05)  flag=1; 
      for (i=1;i<=NF;i++) if ($1 == 7 && $2 >21572657 && $2 < 21951457 && $8<0.05)  flag=1; 
      for (i=1;i<=NF;i++) if ($1 == 17 && $2 > 70107161 && $2 < 70132557 && $8<0.05)  flag=1;
      for (i=1;i<=NF;i++) if ($1 == 2 && $2 > 113963574 && $2 < 114046498 && $8<0.05)  flag=1;
      for (i=1;i<=NF;i++) if ($1 == 2 && $2 > 131207028 && $2 < 131277280 && $8<0.05)  flag=1;
      for (i=1;i<=NF;i++) if ($1 == 9 && $2 > 138584031 && $2 < 138697348 && $8<0.05)  flag=1;
      for (i=1;i<=NF;i++) if ($1 == 6 && $2 > 168683510 && $2 < 168730457 && $8<0.05) flag=1;
      for (i=1;i<=NF;i++) if ($1 == 2 && $2 > 232308242 && $2 < 232339195 && $8<0.05) flag=1;
      for (i=1;i<=NF;i++) if ($1 == 4 && $2 > 142934313 && $2 < 143777688 && $8<0.05) flag=1;
      if (flag) print
    }' > ${cwd}/${file%%.*}.EOHI
    gzip -c ${cwd}/${file%%.*}.EOHI > ${cwd}/${file%%.*}.EOHI.gz ;
    done




# Exome data

## 1. f.3393: new phenotype with controls_na and UKBB exome non-qc data

In [54]:
UKBB_PATH=/gpfs/gibbs/pi/dewan/data/UKBiobank
USER_PATH=/home/dc2325/project
cwd=/home/dc2325/scratch60/EOHI_thashi/f3393_hearing_aid_exomes200K_noqc_ctrl_na
sumstatsFile=$UKBB_PATH/results/REGENIE_results/results_exome_data/f3393_hearing_aid_exomes200K_noqc_ctrl_na/*.EOHI.gz
hg=38
job_size=1
container_annovar=$UKBB_PATH/annovar.sif
bimfiles=`echo /gpfs/gibbs/pi/dewan/data/UKBiobank/genotype_files/ukb28374_exomedata/exome_data_OCT2020/ukb23155_c{1..22}_b0_v1.bim`
bim_name=/home/dc2325/project/results/exome_bim_merge/ukb23155_chr1_chr22.bim
humandb=/gpfs/ysm/datasets/db/annovar/humandb
p_filter=0.005
sos run ~/project/UKBB_GWAS_dev/workflow/snptogene.ipynb annovar \
    --cwd $cwd \
    --sumstatsFile $sumstatsFile\
    --bim_name $bim_name \
    --bimfiles $bimfiles \
    --p_filter $p_filter \
    --hg $hg \
    --job_size $job_size \
    --humandb $humandb\
    --ukbb $UKBB_PATH \
    --container_annovar $container_annovar

INFO: Running [32mannovar_1[0m: Get the list of significantly associated SNPs
INFO: [32mannovar_1[0m is [32mcompleted[0m.
INFO: [32mannovar_1[0m output:   [32m/home/dc2325/scratch60/EOHI_thashi/f3393_hearing_aid_exomes200K_noqc_ctrl_na/062421_UKBB_Hearing_aid_f3393_expandedwhite_z974included_ctrl_na_104402ind_f3393_ctrl_na.snp_annotate[0m
INFO: Running [32mannovar_2[0m: Get chr, start, end, ref_allele, alt_allele format
INFO: [32mannovar_2[0m is [32mcompleted[0m.
INFO: [32mannovar_2[0m output:   [32m/home/dc2325/scratch60/EOHI_thashi/f3393_hearing_aid_exomes200K_noqc_ctrl_na/062421_UKBB_Hearing_aid_f3393_expandedwhite_z974included_ctrl_na_104402ind_f3393_ctrl_na.avinput[0m
INFO: Running [32mannovar_3[0m: Annotate variants file using ANNOVAR
INFO: [32mannovar_3[0m is [32mcompleted[0m.
INFO: [32mannovar_3[0m output:   [32m/home/dc2325/scratch60/EOHI_thashi/f3393_hearing_aid_exomes200K_noqc_ctrl_na/062421_UKBB_Hearing_aid_f3393_expandedwhite_z974included_ctrl_

## 2. f.2247: new phenotype with controls_na and UKBB exome non-qc data

In [55]:
UKBB_PATH=/gpfs/gibbs/pi/dewan/data/UKBiobank
USER_PATH=/home/dc2325/project
cwd=/home/dc2325/scratch60/EOHI_thashi/f2247_hearing_difficulty_exomes200K_noqc_ctrl_na
sumstatsFile=$UKBB_PATH/results/REGENIE_results/results_exome_data/f2247_hearing_difficulty_exomes200K_noqc_ctrl_na/*.EOHI.gz
hg=38
job_size=1
container_annovar=$UKBB_PATH/annovar.sif
bimfiles=`echo /gpfs/gibbs/pi/dewan/data/UKBiobank/genotype_files/ukb28374_exomedata/exome_data_OCT2020/ukb23155_c{1..22}_b0_v1.bim`
bim_name=/home/dc2325/project/results/exome_bim_merge/ukb23155_chr1_chr22.bim
humandb=/gpfs/ysm/datasets/db/annovar/humandb
p_filter=0.005
sos run ~/project/UKBB_GWAS_dev/workflow/snptogene.ipynb annovar \
    --cwd $cwd \
    --sumstatsFile $sumstatsFile\
    --bim_name $bim_name \
    --bimfiles $bimfiles \
    --p_filter $p_filter \
    --hg $hg \
    --job_size $job_size \
    --humandb $humandb\
    --ukbb $UKBB_PATH \
    --container_annovar $container_annovar

INFO: Running [32mannovar_1[0m: Get the list of significantly associated SNPs
INFO: [32mannovar_1[0m is [32mcompleted[0m.
INFO: [32mannovar_1[0m output:   [32m/home/dc2325/scratch60/EOHI_thashi/f2247_hearing_difficulty_exomes200K_noqc_ctrl_na/062421_UKBB_Hearing_difficulty_f2247_expandedwhite_z974included_ctrl_na_144952ind_f2247_ctrl_na.snp_annotate[0m
INFO: Running [32mannovar_2[0m: Get chr, start, end, ref_allele, alt_allele format
INFO: [32mannovar_2[0m is [32mcompleted[0m.
INFO: [32mannovar_2[0m output:   [32m/home/dc2325/scratch60/EOHI_thashi/f2247_hearing_difficulty_exomes200K_noqc_ctrl_na/062421_UKBB_Hearing_difficulty_f2247_expandedwhite_z974included_ctrl_na_144952ind_f2247_ctrl_na.avinput[0m
INFO: Running [32mannovar_3[0m: Annotate variants file using ANNOVAR
INFO: [32mannovar_3[0m is [32mcompleted[0m.
INFO: [32mannovar_3[0m output:   [32m/home/dc2325/scratch60/EOHI_thashi/f2247_hearing_difficulty_exomes200K_noqc_ctrl_na/062421_UKBB_Hearing_difficu

## 3. f.2257: new phenotype with controls_na and UKBB exome non-qc data

In [56]:
UKBB_PATH=/gpfs/gibbs/pi/dewan/data/UKBiobank
USER_PATH=/home/dc2325/project
cwd=/home/dc2325/scratch60/EOHI_thashi/f2257_hearing_noise_exomes200K_noqc_ctrl_na
sumstatsFile=$UKBB_PATH/results/REGENIE_results/results_exome_data/f2257_hearing_noise_exomes200K_noqc_ctrl_na/*.EOHI.gz
hg=38
job_size=1
container_annovar=$UKBB_PATH/annovar.sif
bimfiles=`echo /gpfs/gibbs/pi/dewan/data/UKBiobank/genotype_files/ukb28374_exomedata/exome_data_OCT2020/ukb23155_c{1..22}_b0_v1.bim`
bim_name=/home/dc2325/project/results/exome_bim_merge/ukb23155_chr1_chr22.bim
humandb=/gpfs/ysm/datasets/db/annovar/humandb
p_filter=0.005
sos run ~/project/UKBB_GWAS_dev/workflow/snptogene.ipynb annovar \
    --cwd $cwd \
    --sumstatsFile $sumstatsFile\
    --bim_name $bim_name \
    --bimfiles $bimfiles \
    --p_filter $p_filter \
    --hg $hg \
    --job_size $job_size \
    --humandb $humandb\
    --ukbb $UKBB_PATH \
    --container_annovar $container_annovar

INFO: Running [32mannovar_1[0m: Get the list of significantly associated SNPs
INFO: [32mannovar_1[0m is [32mcompleted[0m.
INFO: [32mannovar_1[0m output:   [32m/home/dc2325/scratch60/EOHI_thashi/f2257_hearing_noise_exomes200K_noqc_ctrl_na/062421_UKBB_Hearing_background_noise_f2257_expandedwhite_z974included_ctrl_na_166199ind_f2257_ctrl_na.snp_annotate[0m
INFO: Running [32mannovar_2[0m: Get chr, start, end, ref_allele, alt_allele format
INFO: [32mannovar_2[0m is [32mcompleted[0m.
INFO: [32mannovar_2[0m output:   [32m/home/dc2325/scratch60/EOHI_thashi/f2257_hearing_noise_exomes200K_noqc_ctrl_na/062421_UKBB_Hearing_background_noise_f2257_expandedwhite_z974included_ctrl_na_166199ind_f2257_ctrl_na.avinput[0m
INFO: Running [32mannovar_3[0m: Annotate variants file using ANNOVAR
INFO: [32mannovar_3[0m is [32mcompleted[0m.
INFO: [32mannovar_3[0m output:   [32m/home/dc2325/scratch60/EOHI_thashi/f2257_hearing_noise_exomes200K_noqc_ctrl_na/062421_UKBB_Hearing_background

## 4. f.2247 & f.2257: new phenotype with controls_na and UKBB exome non-qc data

In [57]:
UKBB_PATH=/gpfs/gibbs/pi/dewan/data/UKBiobank
USER_PATH=/home/dc2325/project
cwd=/home/dc2325/scratch60/EOHI_thashi/f2247_f2257_exomes200K_noqc_ctrl_na
sumstatsFile=$UKBB_PATH/results/REGENIE_results/results_exome_data/f2247_f2257_exomes200K_noqc_ctrl_na/*.EOHI.gz
hg=38
job_size=1
container_annovar=$UKBB_PATH/annovar.sif
bimfiles=`echo /gpfs/gibbs/pi/dewan/data/UKBiobank/genotype_files/ukb28374_exomedata/exome_data_OCT2020/ukb23155_c{1..22}_b0_v1.bim`
bim_name=/home/dc2325/project/results/exome_bim_merge/ukb23155_chr1_chr22.bim
humandb=/gpfs/ysm/datasets/db/annovar/humandb
p_filter=0.005
sos run ~/project/UKBB_GWAS_dev/workflow/snptogene.ipynb annovar \
    --cwd $cwd \
    --sumstatsFile $sumstatsFile\
    --bim_name $bim_name \
    --bimfiles $bimfiles \
    --p_filter $p_filter \
    --hg $hg \
    --job_size $job_size \
    --humandb $humandb\
    --ukbb $UKBB_PATH \
    --container_annovar $container_annovar

INFO: Running [32mannovar_1[0m: Get the list of significantly associated SNPs
INFO: [32mannovar_1[0m is [32mcompleted[0m.
INFO: [32mannovar_1[0m output:   [32m/home/dc2325/scratch60/EOHI_thashi/f2247_f2257_exomes200K_noqc_ctrl_na/062421_UKBB_f2247_f2257_expandedwhite_z974included_ctrl_na_137245ind_f2247_f2257_ctrl_na.snp_annotate[0m
INFO: Running [32mannovar_2[0m: Get chr, start, end, ref_allele, alt_allele format
INFO: [32mannovar_2[0m is [32mcompleted[0m.
INFO: [32mannovar_2[0m output:   [32m/home/dc2325/scratch60/EOHI_thashi/f2247_f2257_exomes200K_noqc_ctrl_na/062421_UKBB_f2247_f2257_expandedwhite_z974included_ctrl_na_137245ind_f2247_f2257_ctrl_na.avinput[0m
INFO: Running [32mannovar_3[0m: Annotate variants file using ANNOVAR
INFO: [32mannovar_3[0m is [32mcompleted[0m.
INFO: [32mannovar_3[0m output:   [32m/home/dc2325/scratch60/EOHI_thashi/f2247_f2257_exomes200K_noqc_ctrl_na/062421_UKBB_f2247_f2257_expandedwhite_z974included_ctrl_na_137245ind_f2247_f2257

## 5. Tinnitus_p1

In [58]:
UKBB_PATH=/gpfs/gibbs/pi/dewan/data/UKBiobank
USER_PATH=/home/dc2325/project
cwd=/home/dc2325/scratch60/EOHI_thashi/tinnitus_plan1_2_3
sumstatsFile=$UKBB_PATH/results/REGENIE_results/results_exome_data/tinnitus_plan1_2_3/010421_UKBB_Tinnitus_plan1_2_3_f4803_exomes_tinnitus.EOHI.gz
hg=38
job_size=1
container_annovar=$UKBB_PATH/annovar.sif
bimfiles=`echo /gpfs/gibbs/pi/dewan/data/UKBiobank/genotype_files/ukb28374_exomedata/exome_data_OCT2020/ukb23155_c{1..22}_b0_v1.bim`
bim_name=/home/dc2325/project/results/exome_bim_merge/ukb23155_chr1_chr22.bim
humandb=/gpfs/ysm/datasets/db/annovar/humandb
p_filter=0.005
sos run ~/project/UKBB_GWAS_dev/workflow/snptogene.ipynb annovar \
    --cwd $cwd \
    --sumstatsFile $sumstatsFile\
    --bim_name $bim_name \
    --bimfiles $bimfiles \
    --p_filter $p_filter \
    --hg $hg \
    --job_size $job_size \
    --humandb $humandb\
    --ukbb $UKBB_PATH \
    --container_annovar $container_annovar

INFO: Running [32mannovar_1[0m: Get the list of significantly associated SNPs
INFO: [32mannovar_1[0m is [32mcompleted[0m.
INFO: [32mannovar_1[0m output:   [32m/home/dc2325/scratch60/EOHI_thashi/tinnitus_plan1_2_3/010421_UKBB_Tinnitus_plan1_2_3_f4803_exomes_tinnitus.snp_annotate[0m
INFO: Running [32mannovar_2[0m: Get chr, start, end, ref_allele, alt_allele format
INFO: [32mannovar_2[0m is [32mcompleted[0m.
INFO: [32mannovar_2[0m output:   [32m/home/dc2325/scratch60/EOHI_thashi/tinnitus_plan1_2_3/010421_UKBB_Tinnitus_plan1_2_3_f4803_exomes_tinnitus.avinput[0m
INFO: Running [32mannovar_3[0m: Annotate variants file using ANNOVAR
INFO: [32mannovar_3[0m is [32mcompleted[0m.
INFO: [32mannovar_3[0m output:   [32m/home/dc2325/scratch60/EOHI_thashi/tinnitus_plan1_2_3/010421_UKBB_Tinnitus_plan1_2_3_f4803_exomes_tinnitus.hg38_multianno.csv[0m
INFO: Workflow annovar (ID=w64517985db2971a4) is executed successfully with 3 completed steps.



## 6. Tinnitus_p2

In [59]:
UKBB_PATH=/gpfs/gibbs/pi/dewan/data/UKBiobank
USER_PATH=/home/dc2325/project
cwd=/home/dc2325/scratch60/EOHI_thashi/tinnitus_plan1_2_3
sumstatsFile=$UKBB_PATH/results/REGENIE_results/results_exome_data/tinnitus_plan1_2_3/010421_UKBB_Tinnitus_plan1_2_3_f4803_exomes_tinnitus_p2.EOHI.gz
hg=38
job_size=1
container_annovar=$UKBB_PATH/annovar.sif
bimfiles=`echo /gpfs/gibbs/pi/dewan/data/UKBiobank/genotype_files/ukb28374_exomedata/exome_data_OCT2020/ukb23155_c{1..22}_b0_v1.bim`
bim_name=/home/dc2325/project/results/exome_bim_merge/ukb23155_chr1_chr22.bim
humandb=/gpfs/ysm/datasets/db/annovar/humandb
p_filter=0.005
sos run ~/project/UKBB_GWAS_dev/workflow/snptogene.ipynb annovar \
    --cwd $cwd \
    --sumstatsFile $sumstatsFile\
    --bim_name $bim_name \
    --bimfiles $bimfiles \
    --p_filter $p_filter \
    --hg $hg \
    --job_size $job_size \
    --humandb $humandb\
    --ukbb $UKBB_PATH \
    --container_annovar $container_annovar

INFO: Running [32mannovar_1[0m: Get the list of significantly associated SNPs
INFO: [32mannovar_1[0m is [32mcompleted[0m.
INFO: [32mannovar_1[0m output:   [32m/home/dc2325/scratch60/EOHI_thashi/tinnitus_plan1_2_3/010421_UKBB_Tinnitus_plan1_2_3_f4803_exomes_tinnitus_p2.snp_annotate[0m
INFO: Running [32mannovar_2[0m: Get chr, start, end, ref_allele, alt_allele format
INFO: [32mannovar_2[0m is [32mcompleted[0m.
INFO: [32mannovar_2[0m output:   [32m/home/dc2325/scratch60/EOHI_thashi/tinnitus_plan1_2_3/010421_UKBB_Tinnitus_plan1_2_3_f4803_exomes_tinnitus_p2.avinput[0m
INFO: Running [32mannovar_3[0m: Annotate variants file using ANNOVAR
INFO: [32mannovar_3[0m is [32mcompleted[0m.
INFO: [32mannovar_3[0m output:   [32m/home/dc2325/scratch60/EOHI_thashi/tinnitus_plan1_2_3/010421_UKBB_Tinnitus_plan1_2_3_f4803_exomes_tinnitus_p2.hg38_multianno.csv[0m
INFO: Workflow annovar (ID=wde925a43ff7c8e79) is executed successfully with 3 completed steps.



## 7. Tinnitus_p3

In [60]:
UKBB_PATH=/gpfs/gibbs/pi/dewan/data/UKBiobank
USER_PATH=/home/dc2325/project
cwd=/home/dc2325/scratch60/EOHI_thashi/tinnitus_plan1_2_3
sumstatsFile=$UKBB_PATH/results/REGENIE_results/results_exome_data/tinnitus_plan1_2_3/010421_UKBB_Tinnitus_plan1_2_3_f4803_exomes_tinnitus_p3.EOHI.gz
hg=38
job_size=1
container_annovar=$UKBB_PATH/annovar.sif
bimfiles=`echo /gpfs/gibbs/pi/dewan/data/UKBiobank/genotype_files/ukb28374_exomedata/exome_data_OCT2020/ukb23155_c{1..22}_b0_v1.bim`
bim_name=/home/dc2325/project/results/exome_bim_merge/ukb23155_chr1_chr22.bim
humandb=/gpfs/ysm/datasets/db/annovar/humandb
p_filter=0.005
sos run ~/project/UKBB_GWAS_dev/workflow/snptogene.ipynb annovar \
    --cwd $cwd \
    --sumstatsFile $sumstatsFile\
    --bim_name $bim_name \
    --bimfiles $bimfiles \
    --p_filter $p_filter \
    --hg $hg \
    --job_size $job_size \
    --humandb $humandb\
    --ukbb $UKBB_PATH \
    --container_annovar $container_annovar

INFO: Running [32mannovar_1[0m: Get the list of significantly associated SNPs
INFO: [32mannovar_1[0m is [32mcompleted[0m.
INFO: [32mannovar_1[0m output:   [32m/home/dc2325/scratch60/EOHI_thashi/tinnitus_plan1_2_3/010421_UKBB_Tinnitus_plan1_2_3_f4803_exomes_tinnitus_p3.snp_annotate[0m
INFO: Running [32mannovar_2[0m: Get chr, start, end, ref_allele, alt_allele format
INFO: [32mannovar_2[0m is [32mcompleted[0m.
INFO: [32mannovar_2[0m output:   [32m/home/dc2325/scratch60/EOHI_thashi/tinnitus_plan1_2_3/010421_UKBB_Tinnitus_plan1_2_3_f4803_exomes_tinnitus_p3.avinput[0m
INFO: Running [32mannovar_3[0m: Annotate variants file using ANNOVAR
INFO: [32mannovar_3[0m is [32mcompleted[0m.
INFO: [32mannovar_3[0m output:   [32m/home/dc2325/scratch60/EOHI_thashi/tinnitus_plan1_2_3/010421_UKBB_Tinnitus_plan1_2_3_f4803_exomes_tinnitus_p3.hg38_multianno.csv[0m
INFO: Workflow annovar (ID=w501a7236685f5238) is executed successfully with 3 completed steps.



# Imputed data

In [63]:
cd /gpfs/gibbs/pi/dewan/data/UKBiobank/results/FastGWA_results/results_imputed_data/hearing_impairment
ls -lrt */*.EOHI.gz

-rw-r--r-- 1 dc2325 dewan  9920 Jul 15 09:14 f20019_srt_int_left/200904_UKBB_SRT_int_left_cc_srt_int_left.EOHI.gz
-rw-r--r-- 1 dc2325 dewan 10509 Jul 15 09:18 f20021_srt_int_right/200904_UKBB_SRT_int_right_cc_srt_int_right.EOHI.gz
-rw-r--r-- 1 dc2325 dewan 15725 Jul 15 09:22 f2247_f2257_combined/200828_UKBB_f2247_f2257_f2247_f2257.EOHI.gz
-rw-r--r-- 1 dc2325 dewan 15906 Jul 15 09:27 f2247_hearing_difficulty/200828_UKBB_Hearing_difficulty_f2247_hearing_diff_new.EOHI.gz
-rw-r--r-- 1 dc2325 dewan 15920 Jul 15 09:31 f2247_hearing_difficulty_expandedwhite/120120_UKBB_Hearing_difficulty_f2247_expandedwhite_hearing_diff_new.EOHI.gz
-rw-r--r-- 1 dc2325 dewan 12950 Jul 15 09:35 f2257_hearing_background_noise/200828_UKBB_Hearing_background_noise_f2257_hearing_noise_cat.EOHI.gz
-rw-r--r-- 1 dc2325 dewan   110 Jul 15 09:41 f3393_hearing_aid/200828_UKBB_Hearing_aid_f3393_hearing_aid_cat.EOHI.gz
-rw-r--r-- 1 dc2325 dewan 11182 Jul 15 09:46 srt_cat/200904_UKBB_SRT_cat_cc_srt_cat.EOHI.gz
-rw-r--r-- 1 

## Get the variants in bim file format from the bgen format

In [6]:
UKBB_PATH=/gpfs/gibbs/pi/dewan/data/UKBiobank
USER_PATH=/home/dc2325/project
tpl_file=$USER_PATH/UKBB_GWAS_dev/farnam.yml
cwd=~/scratch60/bgen_to_bim
genoFile=`echo $UKBB_PATH/genotype_files/ukb39554_imputeddataset/ukb_imp_chr{1..22}_v3.bgen`
sampleFile=$UKBB_PATH/genotype_files/ukb39554_imputeddataset/ukb32285_imputedindiv.sample
job_size=1
numThreads=20
container_lmm=$UKBB_PATH/lmm.sif
bgen_bim_sos=$USER_PATH/UKBB_GWAS_dev/workflow/bgen_to_bim.ipynb 
bgen_bim_sbatch=$USER_PATH/UKBB_GWAS_dev/output/$(date +"%Y-%m-%d")_bgen_bim.sbatch

bgen_args="""
    --cwd $cwd
    --genoFile $genoFile
    --sampleFile $sampleFile
    --numThreads $numThreads 
    --job_size $job_size
    --container_lmm $container_lmm
"""

sos run ~/project/bioworkflows/admin/Get_Job_Script.ipynb farnam \
    --template-file $tpl_file \
    --workflow-file $bgen_bim_sos \
    --to-script $bgen_bim_sbatch \
    --args "$bgen_args"

INFO: Running [32mfarnam[0m: Configuration for Yale `farnam` cluster
INFO: [32mfarnam[0m is [32mcompleted[0m.
INFO: [32mfarnam[0m output:   [32m/home/dc2325/project/UKBB_GWAS_dev/output/2021-07-15_bgen_bim.sbatch[0m
INFO: Workflow farnam (ID=w8e3c38dfe4d47887) is executed successfully with 1 completed step.



## 1. f20019_srt_int_left

Require format changes since these are bgen files

In [None]:
UKBB_PATH=/gpfs/gibbs/pi/dewan/data/UKBiobank
USER_PATH=/home/dc2325/project
cwd=/home/dc2325/scratch60/EOHI_thashi/imputed_data/f20019_srt_int_left
sumstatsFile=$UKBB_PATH/results/FastGWA_results/results_imputed_data/hearing_impairment/f20019_srt_int_left/200904_UKBB_SRT_int_left_cc_srt_int_left.fastGWA.snp_stats.gz
hg=19
job_size=1
container_annovar=$UKBB_PATH/annovar.sif
bimfiles=
bim_name=/home/dc2325/project/results/exome_bim_merge/ukb23155_chr1_chr22.bim
humandb=/gpfs/ysm/datasets/db/annovar/humandb
p_filter=0.005

sos run ~/project/UKBB_GWAS_dev/workflow/snptogene.ipynb annovar \
    --cwd $cwd \
    --sumstatsFile $sumstatsFile\
    --bim_name $bim_name \
    --bimfiles $bimfiles \
    --p_filter $p_filter \
    --hg $hg \
    --job_size $job_size \
    --humandb $humandb\
    --ukbb $UKBB_PATH \
    --container_annovar $container_annovar