# Early onset hearing impairment annotation

## Aim

- Retrieve the variants in these specific genes from the results of exome and imputed data of hearing impairment traits. Only filter those variants with p-val < 0.05
- To annotate the variants with a p-val < 0.005 from the summary statistics after running the LMM analysis for hearing impairment traits in order to compare results for Early onset hearing impairment

## Description

In the following cells you can find the scripts used for these particular annotations as well as some bash code used to select the variants in specific genes requested by Thashi. 

## Note

For exome data hg38 reference was used

For imputed data hg19 reference was used

## Bash code for selecting variants in specific genes

In [None]:
##Coordinates for hg38 exome data
#Chr2	178815308	179060137
#Chr17	36485636	36545338
#Chr7	21533039	21911839
#Chr17	72111020	72136416
#Chr2	113205997	113288921
#Chr2	130449455	130519707
#Chr9	135692185	135805502
#Chr6	168282830	168329777
#Chr2	231443531	231474484
#Chr4	142013160	142856535

cd /gpfs/gibbs/pi/dewan/data/UKBiobank/results/REGENIE_results/results_exome_data/
cwd=/gpfs/gibbs/pi/dewan/data/UKBiobank/results/REGENIE_results/results_exome_data/
for file in */*.snp_stats.gz; do
    zcat $file | awk  -F'\t' 'NR == 1 {print; next} {
      flag=0; 
      for (i=1;i<=NF;i++) if ($1 == 2 && $2 > 178815308 && $2 < 179060137 && $8<0.05)  flag=1;
      for (i=1;i<=NF;i++) if ($1 == 17 && $2 > 36485636 && $2 < 36545338 && $8<0.05)  flag=1; 
      for (i=1;i<=NF;i++) if ($1 == 7 && $2 > 21533039 && $2 < 21911839 && $8<0.05)  flag=1; 
      for (i=1;i<=NF;i++) if ($1 == 17 && $2 > 72111020 && $2 < 72136416 && $8<0.05)  flag=1;
      for (i=1;i<=NF;i++) if ($1 == 2 && $2 > 113205997 && $2 < 113288921 && $8<0.05)  flag=1;
      for (i=1;i<=NF;i++) if ($1 == 2 && $2 > 130449455 && $2 < 130519707 && $8<0.05)  flag=1;
      for (i=1;i<=NF;i++) if ($1 == 9 && $2 > 135692185 && $2 < 135805502 && $8<0.05)  flag=1;
      for (i=1;i<=NF;i++) if ($1 == 6 && $2 > 168282830 && $2 < 168329777 && $8<0.05) flag=1;
      for (i=1;i<=NF;i++) if ($1 == 2 && $2 > 231443531 && $2 < 231474484 && $8<0.05) flag=1;
      for (i=1;i<=NF;i++) if ($1 == 4 && $2 > 142013160 && $2 < 142856535 && $8<0.05) flag=1;
      if (flag) print
    }'  > ${cwd}/${file%%.*}.EOHI
    gzip -c ${cwd}/${file%%.*}.EOHI > ${cwd}/${file%%.*}.EOHI.gz ;
    done 

In [27]:
## Coordinates for hg19 imputed data
#Chr2	179684484	179924864
#Chr17	34841480	34900695
#Chr7	21572657	21951457
#Chr17	70107161	70132557
#Chr2	113963574	114046498
#Chr2	131207028	131277280
#Chr9	138584031	138697348
#Chr6	168683510	168730457
#Chr2	232308242	232339195
#Chr4	142934313	143777688
cd /gpfs/gibbs/pi/dewan/data/UKBiobank/results/FastGWA_results/results_imputed_data/hearing_impairment
cwd=/gpfs/gibbs/pi/dewan/data/UKBiobank/results/FastGWA_results/results_imputed_data/hearing_impairment
for file in */*.snp_stats.gz; do
    zcat $file | awk  -F'\t' 'NR == 1 {print; next} {
      flag=0; 
      for (i=1;i<=NF;i++) if ($1 == 2 && $2 > 179684484 && $2 < 179924864 && $8<0.05)  flag=1;
      for (i=1;i<=NF;i++) if ($1 == 17 && $2 > 34841480 && $2 < 34900695 && $8<0.05)  flag=1; 
      for (i=1;i<=NF;i++) if ($1 == 7 && $2 >21572657 && $2 < 21951457 && $8<0.05)  flag=1; 
      for (i=1;i<=NF;i++) if ($1 == 17 && $2 > 70107161 && $2 < 70132557 && $8<0.05)  flag=1;
      for (i=1;i<=NF;i++) if ($1 == 2 && $2 > 113963574 && $2 < 114046498 && $8<0.05)  flag=1;
      for (i=1;i<=NF;i++) if ($1 == 2 && $2 > 131207028 && $2 < 131277280 && $8<0.05)  flag=1;
      for (i=1;i<=NF;i++) if ($1 == 9 && $2 > 138584031 && $2 < 138697348 && $8<0.05)  flag=1;
      for (i=1;i<=NF;i++) if ($1 == 6 && $2 > 168683510 && $2 < 168730457 && $8<0.05) flag=1;
      for (i=1;i<=NF;i++) if ($1 == 2 && $2 > 232308242 && $2 < 232339195 && $8<0.05) flag=1;
      for (i=1;i<=NF;i++) if ($1 == 4 && $2 > 142934313 && $2 < 143777688 && $8<0.05) flag=1;
      if (flag) print
    }' > ${cwd}/${file%%.*}.EOHI
    gzip -c ${cwd}/${file%%.*}.EOHI > ${cwd}/${file%%.*}.EOHI.gz ;
    done




# Exome data

## 1. f.3393: new phenotype with controls_na and UKBB exome non-qc data

In [54]:
UKBB_PATH=/gpfs/gibbs/pi/dewan/data/UKBiobank
USER_PATH=/home/dc2325/project
cwd=/home/dc2325/scratch60/EOHI_thashi/f3393_hearing_aid_exomes200K_noqc_ctrl_na
sumstatsFile=$UKBB_PATH/results/REGENIE_results/results_exome_data/f3393_hearing_aid_exomes200K_noqc_ctrl_na/*.EOHI.gz
hg=38
job_size=1
container_annovar=$UKBB_PATH/annovar.sif
bimfiles=`echo /gpfs/gibbs/pi/dewan/data/UKBiobank/genotype_files/ukb28374_exomedata/exome_data_OCT2020/ukb23155_c{1..22}_b0_v1.bim`
bim_name=/home/dc2325/project/results/exome_bim_merge/ukb23155_chr1_chr22.bim
humandb=/gpfs/ysm/datasets/db/annovar/humandb
p_filter=0.005
sos run ~/project/UKBB_GWAS_dev/workflow/snptogene.ipynb annovar \
    --cwd $cwd \
    --sumstatsFile $sumstatsFile\
    --bim_name $bim_name \
    --bimfiles $bimfiles \
    --p_filter $p_filter \
    --hg $hg \
    --job_size $job_size \
    --humandb $humandb\
    --ukbb $UKBB_PATH \
    --container_annovar $container_annovar

INFO: Running [32mannovar_1[0m: Get the list of significantly associated SNPs
INFO: [32mannovar_1[0m is [32mcompleted[0m.
INFO: [32mannovar_1[0m output:   [32m/home/dc2325/scratch60/EOHI_thashi/f3393_hearing_aid_exomes200K_noqc_ctrl_na/062421_UKBB_Hearing_aid_f3393_expandedwhite_z974included_ctrl_na_104402ind_f3393_ctrl_na.snp_annotate[0m
INFO: Running [32mannovar_2[0m: Get chr, start, end, ref_allele, alt_allele format
INFO: [32mannovar_2[0m is [32mcompleted[0m.
INFO: [32mannovar_2[0m output:   [32m/home/dc2325/scratch60/EOHI_thashi/f3393_hearing_aid_exomes200K_noqc_ctrl_na/062421_UKBB_Hearing_aid_f3393_expandedwhite_z974included_ctrl_na_104402ind_f3393_ctrl_na.avinput[0m
INFO: Running [32mannovar_3[0m: Annotate variants file using ANNOVAR
INFO: [32mannovar_3[0m is [32mcompleted[0m.
INFO: [32mannovar_3[0m output:   [32m/home/dc2325/scratch60/EOHI_thashi/f3393_hearing_aid_exomes200K_noqc_ctrl_na/062421_UKBB_Hearing_aid_f3393_expandedwhite_z974included_ctrl_

## 2. f.2247: new phenotype with controls_na and UKBB exome non-qc data

In [55]:
UKBB_PATH=/gpfs/gibbs/pi/dewan/data/UKBiobank
USER_PATH=/home/dc2325/project
cwd=/home/dc2325/scratch60/EOHI_thashi/f2247_hearing_difficulty_exomes200K_noqc_ctrl_na
sumstatsFile=$UKBB_PATH/results/REGENIE_results/results_exome_data/f2247_hearing_difficulty_exomes200K_noqc_ctrl_na/*.EOHI.gz
hg=38
job_size=1
container_annovar=$UKBB_PATH/annovar.sif
bimfiles=`echo /gpfs/gibbs/pi/dewan/data/UKBiobank/genotype_files/ukb28374_exomedata/exome_data_OCT2020/ukb23155_c{1..22}_b0_v1.bim`
bim_name=/home/dc2325/project/results/exome_bim_merge/ukb23155_chr1_chr22.bim
humandb=/gpfs/ysm/datasets/db/annovar/humandb
p_filter=0.005
sos run ~/project/UKBB_GWAS_dev/workflow/snptogene.ipynb annovar \
    --cwd $cwd \
    --sumstatsFile $sumstatsFile\
    --bim_name $bim_name \
    --bimfiles $bimfiles \
    --p_filter $p_filter \
    --hg $hg \
    --job_size $job_size \
    --humandb $humandb\
    --ukbb $UKBB_PATH \
    --container_annovar $container_annovar

INFO: Running [32mannovar_1[0m: Get the list of significantly associated SNPs
INFO: [32mannovar_1[0m is [32mcompleted[0m.
INFO: [32mannovar_1[0m output:   [32m/home/dc2325/scratch60/EOHI_thashi/f2247_hearing_difficulty_exomes200K_noqc_ctrl_na/062421_UKBB_Hearing_difficulty_f2247_expandedwhite_z974included_ctrl_na_144952ind_f2247_ctrl_na.snp_annotate[0m
INFO: Running [32mannovar_2[0m: Get chr, start, end, ref_allele, alt_allele format
INFO: [32mannovar_2[0m is [32mcompleted[0m.
INFO: [32mannovar_2[0m output:   [32m/home/dc2325/scratch60/EOHI_thashi/f2247_hearing_difficulty_exomes200K_noqc_ctrl_na/062421_UKBB_Hearing_difficulty_f2247_expandedwhite_z974included_ctrl_na_144952ind_f2247_ctrl_na.avinput[0m
INFO: Running [32mannovar_3[0m: Annotate variants file using ANNOVAR
INFO: [32mannovar_3[0m is [32mcompleted[0m.
INFO: [32mannovar_3[0m output:   [32m/home/dc2325/scratch60/EOHI_thashi/f2247_hearing_difficulty_exomes200K_noqc_ctrl_na/062421_UKBB_Hearing_difficu

## 3. f.2257: new phenotype with controls_na and UKBB exome non-qc data

In [56]:
UKBB_PATH=/gpfs/gibbs/pi/dewan/data/UKBiobank
USER_PATH=/home/dc2325/project
cwd=/home/dc2325/scratch60/EOHI_thashi/f2257_hearing_noise_exomes200K_noqc_ctrl_na
sumstatsFile=$UKBB_PATH/results/REGENIE_results/results_exome_data/f2257_hearing_noise_exomes200K_noqc_ctrl_na/*.EOHI.gz
hg=38
job_size=1
container_annovar=$UKBB_PATH/annovar.sif
bimfiles=`echo /gpfs/gibbs/pi/dewan/data/UKBiobank/genotype_files/ukb28374_exomedata/exome_data_OCT2020/ukb23155_c{1..22}_b0_v1.bim`
bim_name=/home/dc2325/project/results/exome_bim_merge/ukb23155_chr1_chr22.bim
humandb=/gpfs/ysm/datasets/db/annovar/humandb
p_filter=0.005
sos run ~/project/UKBB_GWAS_dev/workflow/snptogene.ipynb annovar \
    --cwd $cwd \
    --sumstatsFile $sumstatsFile\
    --bim_name $bim_name \
    --bimfiles $bimfiles \
    --p_filter $p_filter \
    --hg $hg \
    --job_size $job_size \
    --humandb $humandb\
    --ukbb $UKBB_PATH \
    --container_annovar $container_annovar

INFO: Running [32mannovar_1[0m: Get the list of significantly associated SNPs
INFO: [32mannovar_1[0m is [32mcompleted[0m.
INFO: [32mannovar_1[0m output:   [32m/home/dc2325/scratch60/EOHI_thashi/f2257_hearing_noise_exomes200K_noqc_ctrl_na/062421_UKBB_Hearing_background_noise_f2257_expandedwhite_z974included_ctrl_na_166199ind_f2257_ctrl_na.snp_annotate[0m
INFO: Running [32mannovar_2[0m: Get chr, start, end, ref_allele, alt_allele format
INFO: [32mannovar_2[0m is [32mcompleted[0m.
INFO: [32mannovar_2[0m output:   [32m/home/dc2325/scratch60/EOHI_thashi/f2257_hearing_noise_exomes200K_noqc_ctrl_na/062421_UKBB_Hearing_background_noise_f2257_expandedwhite_z974included_ctrl_na_166199ind_f2257_ctrl_na.avinput[0m
INFO: Running [32mannovar_3[0m: Annotate variants file using ANNOVAR
INFO: [32mannovar_3[0m is [32mcompleted[0m.
INFO: [32mannovar_3[0m output:   [32m/home/dc2325/scratch60/EOHI_thashi/f2257_hearing_noise_exomes200K_noqc_ctrl_na/062421_UKBB_Hearing_background

## 4. f.2247 & f.2257: new phenotype with controls_na and UKBB exome non-qc data

In [57]:
UKBB_PATH=/gpfs/gibbs/pi/dewan/data/UKBiobank
USER_PATH=/home/dc2325/project
cwd=/home/dc2325/scratch60/EOHI_thashi/f2247_f2257_exomes200K_noqc_ctrl_na
sumstatsFile=$UKBB_PATH/results/REGENIE_results/results_exome_data/f2247_f2257_exomes200K_noqc_ctrl_na/*.EOHI.gz
hg=38
job_size=1
container_annovar=$UKBB_PATH/annovar.sif
bimfiles=`echo /gpfs/gibbs/pi/dewan/data/UKBiobank/genotype_files/ukb28374_exomedata/exome_data_OCT2020/ukb23155_c{1..22}_b0_v1.bim`
bim_name=/home/dc2325/project/results/exome_bim_merge/ukb23155_chr1_chr22.bim
humandb=/gpfs/ysm/datasets/db/annovar/humandb
p_filter=0.005
sos run ~/project/UKBB_GWAS_dev/workflow/snptogene.ipynb annovar \
    --cwd $cwd \
    --sumstatsFile $sumstatsFile\
    --bim_name $bim_name \
    --bimfiles $bimfiles \
    --p_filter $p_filter \
    --hg $hg \
    --job_size $job_size \
    --humandb $humandb\
    --ukbb $UKBB_PATH \
    --container_annovar $container_annovar

INFO: Running [32mannovar_1[0m: Get the list of significantly associated SNPs
INFO: [32mannovar_1[0m is [32mcompleted[0m.
INFO: [32mannovar_1[0m output:   [32m/home/dc2325/scratch60/EOHI_thashi/f2247_f2257_exomes200K_noqc_ctrl_na/062421_UKBB_f2247_f2257_expandedwhite_z974included_ctrl_na_137245ind_f2247_f2257_ctrl_na.snp_annotate[0m
INFO: Running [32mannovar_2[0m: Get chr, start, end, ref_allele, alt_allele format
INFO: [32mannovar_2[0m is [32mcompleted[0m.
INFO: [32mannovar_2[0m output:   [32m/home/dc2325/scratch60/EOHI_thashi/f2247_f2257_exomes200K_noqc_ctrl_na/062421_UKBB_f2247_f2257_expandedwhite_z974included_ctrl_na_137245ind_f2247_f2257_ctrl_na.avinput[0m
INFO: Running [32mannovar_3[0m: Annotate variants file using ANNOVAR
INFO: [32mannovar_3[0m is [32mcompleted[0m.
INFO: [32mannovar_3[0m output:   [32m/home/dc2325/scratch60/EOHI_thashi/f2247_f2257_exomes200K_noqc_ctrl_na/062421_UKBB_f2247_f2257_expandedwhite_z974included_ctrl_na_137245ind_f2247_f2257

## 5. Tinnitus_p1

In [58]:
UKBB_PATH=/gpfs/gibbs/pi/dewan/data/UKBiobank
USER_PATH=/home/dc2325/project
cwd=/home/dc2325/scratch60/EOHI_thashi/tinnitus_plan1_2_3
sumstatsFile=$UKBB_PATH/results/REGENIE_results/results_exome_data/tinnitus_plan1_2_3/010421_UKBB_Tinnitus_plan1_2_3_f4803_exomes_tinnitus.EOHI.gz
hg=38
job_size=1
container_annovar=$UKBB_PATH/annovar.sif
bimfiles=`echo /gpfs/gibbs/pi/dewan/data/UKBiobank/genotype_files/ukb28374_exomedata/exome_data_OCT2020/ukb23155_c{1..22}_b0_v1.bim`
bim_name=/home/dc2325/project/results/exome_bim_merge/ukb23155_chr1_chr22.bim
humandb=/gpfs/ysm/datasets/db/annovar/humandb
p_filter=0.005
sos run ~/project/UKBB_GWAS_dev/workflow/snptogene.ipynb annovar \
    --cwd $cwd \
    --sumstatsFile $sumstatsFile\
    --bim_name $bim_name \
    --bimfiles $bimfiles \
    --p_filter $p_filter \
    --hg $hg \
    --job_size $job_size \
    --humandb $humandb\
    --ukbb $UKBB_PATH \
    --container_annovar $container_annovar

INFO: Running [32mannovar_1[0m: Get the list of significantly associated SNPs
INFO: [32mannovar_1[0m is [32mcompleted[0m.
INFO: [32mannovar_1[0m output:   [32m/home/dc2325/scratch60/EOHI_thashi/tinnitus_plan1_2_3/010421_UKBB_Tinnitus_plan1_2_3_f4803_exomes_tinnitus.snp_annotate[0m
INFO: Running [32mannovar_2[0m: Get chr, start, end, ref_allele, alt_allele format
INFO: [32mannovar_2[0m is [32mcompleted[0m.
INFO: [32mannovar_2[0m output:   [32m/home/dc2325/scratch60/EOHI_thashi/tinnitus_plan1_2_3/010421_UKBB_Tinnitus_plan1_2_3_f4803_exomes_tinnitus.avinput[0m
INFO: Running [32mannovar_3[0m: Annotate variants file using ANNOVAR
INFO: [32mannovar_3[0m is [32mcompleted[0m.
INFO: [32mannovar_3[0m output:   [32m/home/dc2325/scratch60/EOHI_thashi/tinnitus_plan1_2_3/010421_UKBB_Tinnitus_plan1_2_3_f4803_exomes_tinnitus.hg38_multianno.csv[0m
INFO: Workflow annovar (ID=w64517985db2971a4) is executed successfully with 3 completed steps.



## 6. Tinnitus_p2

In [59]:
UKBB_PATH=/gpfs/gibbs/pi/dewan/data/UKBiobank
USER_PATH=/home/dc2325/project
cwd=/home/dc2325/scratch60/EOHI_thashi/tinnitus_plan1_2_3
sumstatsFile=$UKBB_PATH/results/REGENIE_results/results_exome_data/tinnitus_plan1_2_3/010421_UKBB_Tinnitus_plan1_2_3_f4803_exomes_tinnitus_p2.EOHI.gz
hg=38
job_size=1
container_annovar=$UKBB_PATH/annovar.sif
bimfiles=`echo /gpfs/gibbs/pi/dewan/data/UKBiobank/genotype_files/ukb28374_exomedata/exome_data_OCT2020/ukb23155_c{1..22}_b0_v1.bim`
bim_name=/home/dc2325/project/results/exome_bim_merge/ukb23155_chr1_chr22.bim
humandb=/gpfs/ysm/datasets/db/annovar/humandb
p_filter=0.005
sos run ~/project/UKBB_GWAS_dev/workflow/snptogene.ipynb annovar \
    --cwd $cwd \
    --sumstatsFile $sumstatsFile\
    --bim_name $bim_name \
    --bimfiles $bimfiles \
    --p_filter $p_filter \
    --hg $hg \
    --job_size $job_size \
    --humandb $humandb\
    --ukbb $UKBB_PATH \
    --container_annovar $container_annovar

INFO: Running [32mannovar_1[0m: Get the list of significantly associated SNPs
INFO: [32mannovar_1[0m is [32mcompleted[0m.
INFO: [32mannovar_1[0m output:   [32m/home/dc2325/scratch60/EOHI_thashi/tinnitus_plan1_2_3/010421_UKBB_Tinnitus_plan1_2_3_f4803_exomes_tinnitus_p2.snp_annotate[0m
INFO: Running [32mannovar_2[0m: Get chr, start, end, ref_allele, alt_allele format
INFO: [32mannovar_2[0m is [32mcompleted[0m.
INFO: [32mannovar_2[0m output:   [32m/home/dc2325/scratch60/EOHI_thashi/tinnitus_plan1_2_3/010421_UKBB_Tinnitus_plan1_2_3_f4803_exomes_tinnitus_p2.avinput[0m
INFO: Running [32mannovar_3[0m: Annotate variants file using ANNOVAR
INFO: [32mannovar_3[0m is [32mcompleted[0m.
INFO: [32mannovar_3[0m output:   [32m/home/dc2325/scratch60/EOHI_thashi/tinnitus_plan1_2_3/010421_UKBB_Tinnitus_plan1_2_3_f4803_exomes_tinnitus_p2.hg38_multianno.csv[0m
INFO: Workflow annovar (ID=wde925a43ff7c8e79) is executed successfully with 3 completed steps.



## 7. Tinnitus_p3

In [60]:
UKBB_PATH=/gpfs/gibbs/pi/dewan/data/UKBiobank
USER_PATH=/home/dc2325/project
cwd=/home/dc2325/scratch60/EOHI_thashi/tinnitus_plan1_2_3
sumstatsFile=$UKBB_PATH/results/REGENIE_results/results_exome_data/tinnitus_plan1_2_3/010421_UKBB_Tinnitus_plan1_2_3_f4803_exomes_tinnitus_p3.EOHI.gz
hg=38
job_size=1
container_annovar=$UKBB_PATH/annovar.sif
bimfiles=`echo /gpfs/gibbs/pi/dewan/data/UKBiobank/genotype_files/ukb28374_exomedata/exome_data_OCT2020/ukb23155_c{1..22}_b0_v1.bim`
bim_name=/home/dc2325/project/results/exome_bim_merge/ukb23155_chr1_chr22.bim
humandb=/gpfs/ysm/datasets/db/annovar/humandb
p_filter=0.005
sos run ~/project/UKBB_GWAS_dev/workflow/snptogene.ipynb annovar \
    --cwd $cwd \
    --sumstatsFile $sumstatsFile\
    --bim_name $bim_name \
    --bimfiles $bimfiles \
    --p_filter $p_filter \
    --hg $hg \
    --job_size $job_size \
    --humandb $humandb\
    --ukbb $UKBB_PATH \
    --container_annovar $container_annovar

INFO: Running [32mannovar_1[0m: Get the list of significantly associated SNPs
INFO: [32mannovar_1[0m is [32mcompleted[0m.
INFO: [32mannovar_1[0m output:   [32m/home/dc2325/scratch60/EOHI_thashi/tinnitus_plan1_2_3/010421_UKBB_Tinnitus_plan1_2_3_f4803_exomes_tinnitus_p3.snp_annotate[0m
INFO: Running [32mannovar_2[0m: Get chr, start, end, ref_allele, alt_allele format
INFO: [32mannovar_2[0m is [32mcompleted[0m.
INFO: [32mannovar_2[0m output:   [32m/home/dc2325/scratch60/EOHI_thashi/tinnitus_plan1_2_3/010421_UKBB_Tinnitus_plan1_2_3_f4803_exomes_tinnitus_p3.avinput[0m
INFO: Running [32mannovar_3[0m: Annotate variants file using ANNOVAR
INFO: [32mannovar_3[0m is [32mcompleted[0m.
INFO: [32mannovar_3[0m output:   [32m/home/dc2325/scratch60/EOHI_thashi/tinnitus_plan1_2_3/010421_UKBB_Tinnitus_plan1_2_3_f4803_exomes_tinnitus_p3.hg38_multianno.csv[0m
INFO: Workflow annovar (ID=w501a7236685f5238) is executed successfully with 3 completed steps.



## 8. Mendelian-like data

In [4]:
module load Singularity
UKBB_PATH=/mnt/mfs/statgen/UKBiobank/
USER_PATH=/home/dmc2245/project/
cwd=/home/dmc2245/scratch60/EOHI_thashi/exome_data/mendelian_like
sumstatsFile=$UKBB_PATH/results/REGENIE_results/results_exome_data/mendelian_like_exomes200K_ctrl_na/full_mendilian-like_pheno_file_mendilian-like.regenie.snp_stats.gz
hg=38
job_size=1
container_annovar=/mnt/mfs/statgen/containers/gatk4-annovar.sif
bim_name=/mnt/mfs/statgen/UKBiobank/data/exome_bim_merge/ukb23155_chr1_chr22.bim
humandb=/mnt/mfs/statgen/isabelle/REF/humandb
xref_path=/mnt/mfs/statgen/isabelle/REF/humandb
p_filter=0.005
sos run ~/project/UKBB_GWAS_dev/workflow/snptogene.ipynb annovar \
    --cwd $cwd \
    --sumstatsFile $sumstatsFile\
    --bim_name $bim_name \
    --bimfiles $bimfiles \
    --p_filter $p_filter \
    --hg $hg \
    --job_size $job_size \
    --humandb $humandb\
    --xref_path $xref_path \
    --container_annovar $container_annovar

INFO: Running [32mannovar_1[0m: Get the list of significantly associated SNPs
INFO: [32mannovar_1[0m (index=0) is [32mignored[0m due to saved signature
INFO: [32mannovar_1[0m output:   [32m/home/dmc2245/scratch60/EOHI_thashi/exome_data/mendelian_like/full_mendilian-like_pheno_file_mendilian-like.regenie.snp_annotate[0m
INFO: Running [32mannovar_2[0m: Get chr, start, end, ref_allele, alt_allele format
INFO: [32mannovar_2[0m (index=0) is [32mignored[0m due to saved signature
INFO: [32mannovar_2[0m output:   [32m/home/dmc2245/scratch60/EOHI_thashi/exome_data/mendelian_like/full_mendilian-like_pheno_file_mendilian-like.regenie.avinput[0m
INFO: Running [32mannovar_3[0m: Annotate variants file using ANNOVAR
INFO: [32mannovar_3[0m is [32mcompleted[0m.
INFO: [32mannovar_3[0m output:   [32m/home/dmc2245/scratch60/EOHI_thashi/exome_data/mendelian_like/full_mendilian-like_pheno_file_mendilian-like.regenie.hg38_multianno.csv[0m
INFO: Workflow annovar (ID=w06a2533da717

# Imputed data

In [63]:
cd /gpfs/gibbs/pi/dewan/data/UKBiobank/results/FastGWA_results/results_imputed_data/hearing_impairment
ls -lrt */*.EOHI.gz

-rw-r--r-- 1 dc2325 dewan  9920 Jul 15 09:14 f20019_srt_int_left/200904_UKBB_SRT_int_left_cc_srt_int_left.EOHI.gz
-rw-r--r-- 1 dc2325 dewan 10509 Jul 15 09:18 f20021_srt_int_right/200904_UKBB_SRT_int_right_cc_srt_int_right.EOHI.gz
-rw-r--r-- 1 dc2325 dewan 15725 Jul 15 09:22 f2247_f2257_combined/200828_UKBB_f2247_f2257_f2247_f2257.EOHI.gz
-rw-r--r-- 1 dc2325 dewan 15906 Jul 15 09:27 f2247_hearing_difficulty/200828_UKBB_Hearing_difficulty_f2247_hearing_diff_new.EOHI.gz
-rw-r--r-- 1 dc2325 dewan 15920 Jul 15 09:31 f2247_hearing_difficulty_expandedwhite/120120_UKBB_Hearing_difficulty_f2247_expandedwhite_hearing_diff_new.EOHI.gz
-rw-r--r-- 1 dc2325 dewan 12950 Jul 15 09:35 f2257_hearing_background_noise/200828_UKBB_Hearing_background_noise_f2257_hearing_noise_cat.EOHI.gz
-rw-r--r-- 1 dc2325 dewan   110 Jul 15 09:41 f3393_hearing_aid/200828_UKBB_Hearing_aid_f3393_hearing_aid_cat.EOHI.gz
-rw-r--r-- 1 dc2325 dewan 11182 Jul 15 09:46 srt_cat/200904_UKBB_SRT_cat_cc_srt_cat.EOHI.gz
-rw-r--r-- 1 

## Get the variants in bim file format from the bgen format

In [30]:
UKBB_PATH=/gpfs/gibbs/pi/dewan/data/UKBiobank
USER_PATH=/home/dc2325/project
cwd=/home/dc2325/project/results/bgen_bim_merge/
genoFile=`echo $UKBB_PATH/genotype_files/ukb39554_imputeddataset/ukb_imp_chr{1..22}_v3.bgen`
job_size=1
container_lmm=$UKBB_PATH/lmm.sif
bim_name='ukb39554_imputed_variants'

sos run ~/project/UKBB_GWAS_dev/workflow/snptogene.ipynb bim_from_bgen\
    --cwd $cwd\
    --genoFile $genoFile\
    --job_size $job_size \
    --bim_name $bim_name\
    --container_lmm $container_lmm

INFO: Running [32mbim_from_bgen[0m: Create a merged *.bim file from *.bgen files
INFO: [32mbim_from_bgen[0m (index=0) is [32mcompleted[0m.
INFO: [32mbim_from_bgen[0m (index=1) is [32mcompleted[0m.
INFO: [32mbim_from_bgen[0m (index=2) is [32mcompleted[0m.
INFO: [32mbim_from_bgen[0m (index=3) is [32mcompleted[0m.
INFO: [32mbim_from_bgen[0m (index=4) is [32mcompleted[0m.
INFO: [32mbim_from_bgen[0m (index=5) is [32mcompleted[0m.
INFO: [32mbim_from_bgen[0m (index=6) is [32mcompleted[0m.
INFO: [32mbim_from_bgen[0m (index=7) is [32mcompleted[0m.
INFO: [32mbim_from_bgen[0m (index=8) is [32mcompleted[0m.
INFO: [32mbim_from_bgen[0m (index=9) is [32mcompleted[0m.
INFO: [32mbim_from_bgen[0m (index=10) is [32mcompleted[0m.
INFO: [32mbim_from_bgen[0m (index=11) is [32mcompleted[0m.
INFO: [32mbim_from_bgen[0m (index=12) is [32mcompleted[0m.
INFO: [32mbim_from_bgen[0m (index=13) is [32mcompleted[0m.
INFO: [32mbim_from_bgen[0m (index=14) is [

## 1. f20019_srt_int_left

#### Columbia's cluster 
Require format changes since these are bgen files

In [2]:
UKBB_PATH=/mnt/mfs/statgen/UKBiobank
USER_PATH=/home/dmc2245/project
cwd=/home/dmc2245/scratch60/EOHI_thashi/imputed_data/f20019_srt_int_left
sumstatsFile=$UKBB_PATH/results/FastGWA_results/results_imputed_data/hearing_impairment/f20019_srt_int_left/200904_UKBB_SRT_int_left_cc_srt_int_left.fastGWA.snp_stats.gz
build='hg19'
rsid=True
p_filter=0.005
bim_name=$UKBB_PATH/data/bgen_bim_merge/ukb39554_imputed_variants.merged.bim
humandb=/mnt/mfs/statgen/isabelle/REF/humandb
xref_path=/mnt/mfs/statgen/isabelle/REF/humandb
container_annovar=/mnt/mfs/statgen/containers/gatk4-annovar.sif

module load Singularity/3.5.3
sos run ~/project/UKBB_GWAS_dev/workflow/snptogene.ipynb annovar \
    --cwd $cwd \
    --sumstatsFile $sumstatsFile\
    --bim_name $bim_name \
    --p_filter $p_filter \
    --rsid $rsid \
    --build $build \
    --humandb $humandb\
    --xref_path $xref_path \
    --container_annovar $container_annovar\
    -s build

INFO: Running [32mannovar_1[0m: Get the list of significantly associated SNPs
INFO: Step [32mannovar_1[0m (index=0) is [32mignored[0m with signature constructed
INFO: [32mannovar_1[0m output:   [32m/home/dmc2245/scratch60/EOHI_thashi/imputed_data/f20019_srt_int_left/200904_UKBB_SRT_int_left_cc_srt_int_left.fastGWA.snp_annotate[0m
INFO: Running [32mannovar_2[0m: Get chr, start, end, ref_allele, alt_allele format
INFO: Step [32mannovar_2[0m (index=0) is [32mignored[0m with signature constructed
INFO: [32mannovar_2[0m output:   [32m/home/dmc2245/scratch60/EOHI_thashi/imputed_data/f20019_srt_int_left/200904_UKBB_SRT_int_left_cc_srt_int_left.fastGWA.avinput[0m
INFO: Running [32mannovar_3[0m: Annotate variants file using ANNOVAR
INFO: [32mannovar_3[0m is [32mcompleted[0m.
INFO: [32mannovar_3[0m output:   [32m/home/dmc2245/scratch60/EOHI_thashi/imputed_data/f20019_srt_int_left/200904_UKBB_SRT_int_left_cc_srt_int_left.fastGWA.hg19_multianno.csv[0m
INFO: Workflow a

## 2. f20021_srt_int_right

In [3]:
UKBB_PATH=/mnt/mfs/statgen/UKBiobank
USER_PATH=/home/dmc2245/project
cwd=/home/dmc2245/scratch60/EOHI_thashi/imputed_data/f20021_srt_int_right
sumstatsFile=$UKBB_PATH/results/FastGWA_results/results_imputed_data/hearing_impairment/f20021_srt_int_right/200904_UKBB_SRT_int_right_cc_srt_int_right.fastGWA.snp_stats.gz
build='hg19'
rsid=True
p_filter=0.005
bim_name=$UKBB_PATH/data/bgen_bim_merge/ukb39554_imputed_variants.merged.bim
humandb=/mnt/mfs/statgen/isabelle/REF/humandb
xref_path=/mnt/mfs/statgen/isabelle/REF/humandb
container_annovar=/mnt/mfs/statgen/containers/gatk4-annovar.sif

module load Singularity/3.5.3
sos run ~/project/UKBB_GWAS_dev/workflow/snptogene.ipynb annovar \
    --cwd $cwd \
    --sumstatsFile $sumstatsFile\
    --bim_name $bim_name \
    --p_filter $p_filter \
    --rsid $rsid \
    --build $build \
    --humandb $humandb\
    --xref_path $xref_path \
    --container_annovar $container_annovar

INFO: Running [32mannovar_1[0m: Get the list of significantly associated SNPs
INFO: [32mannovar_1[0m is [32mcompleted[0m.
INFO: [32mannovar_1[0m output:   [32m/home/dmc2245/scratch60/EOHI_thashi/imputed_data/f20021_srt_int_right/200904_UKBB_SRT_int_right_cc_srt_int_right.fastGWA.snp_annotate[0m
INFO: Running [32mannovar_2[0m: Get chr, start, end, ref_allele, alt_allele format
INFO: [32mannovar_2[0m is [32mcompleted[0m.
INFO: [32mannovar_2[0m output:   [32m/home/dmc2245/scratch60/EOHI_thashi/imputed_data/f20021_srt_int_right/200904_UKBB_SRT_int_right_cc_srt_int_right.fastGWA.avinput[0m
INFO: Running [32mannovar_3[0m: Annotate variants file using ANNOVAR
INFO: [32mannovar_3[0m is [32mcompleted[0m.
INFO: [32mannovar_3[0m output:   [32m/home/dmc2245/scratch60/EOHI_thashi/imputed_data/f20021_srt_int_right/200904_UKBB_SRT_int_right_cc_srt_int_right.fastGWA.hg19_multianno.csv[0m
INFO: Workflow annovar (ID=w62cf5871ca0d28cf) is executed successfully with 3 complet

## 3. srt_int_best

In [4]:
UKBB_PATH=/mnt/mfs/statgen/UKBiobank
USER_PATH=/home/dmc2245/project
cwd=/home/dmc2245/scratch60/EOHI_thashi/imputed_data/srt_int_best
sumstatsFile=$UKBB_PATH/results/FastGWA_results/results_imputed_data/hearing_impairment/srt_int_best/200904_UKBB_SRT_int_best_cc_srt_int_best.fastGWA.snp_stats.gz
build='hg19'
rsid=True
p_filter=0.005
bim_name=$UKBB_PATH/data/bgen_bim_merge/ukb39554_imputed_variants.merged.bim
humandb=/mnt/mfs/statgen/isabelle/REF/humandb
xref_path=/mnt/mfs/statgen/isabelle/REF/humandb
container_annovar=/mnt/mfs/statgen/containers/gatk4-annovar.sif

module load Singularity/3.5.3
sos run ~/project/UKBB_GWAS_dev/workflow/snptogene.ipynb annovar \
    --cwd $cwd \
    --sumstatsFile $sumstatsFile\
    --bim_name $bim_name \
    --p_filter $p_filter \
    --rsid $rsid \
    --build $build \
    --humandb $humandb\
    --xref_path $xref_path \
    --container_annovar $container_annovar

INFO: Running [32mannovar_1[0m: Get the list of significantly associated SNPs
INFO: [32mannovar_1[0m is [32mcompleted[0m.
INFO: [32mannovar_1[0m output:   [32m/home/dmc2245/scratch60/EOHI_thashi/imputed_data/srt_int_best/200904_UKBB_SRT_int_best_cc_srt_int_best.fastGWA.snp_annotate[0m
INFO: Running [32mannovar_2[0m: Get chr, start, end, ref_allele, alt_allele format
INFO: [32mannovar_2[0m is [32mcompleted[0m.
INFO: [32mannovar_2[0m output:   [32m/home/dmc2245/scratch60/EOHI_thashi/imputed_data/srt_int_best/200904_UKBB_SRT_int_best_cc_srt_int_best.fastGWA.avinput[0m
INFO: Running [32mannovar_3[0m: Annotate variants file using ANNOVAR
INFO: [32mannovar_3[0m is [32mcompleted[0m.
INFO: [32mannovar_3[0m output:   [32m/home/dmc2245/scratch60/EOHI_thashi/imputed_data/srt_int_best/200904_UKBB_SRT_int_best_cc_srt_int_best.fastGWA.hg19_multianno.csv[0m
INFO: Workflow annovar (ID=wbea1e3849f3fca2d) is executed successfully with 3 completed steps.



## 4. srt_int_worst

In [5]:
UKBB_PATH=/mnt/mfs/statgen/UKBiobank
USER_PATH=/home/dmc2245/project
cwd=/home/dmc2245/scratch60/EOHI_thashi/imputed_data/srt_int_worst
sumstatsFile=$UKBB_PATH/results/FastGWA_results/results_imputed_data/hearing_impairment/srt_int_worst/200904_UKBB_SRT_int_worst_cc_srt_int_worst.fastGWA.snp_stats.gz
build='hg19'
rsid=True
p_filter=0.005
bim_name=$UKBB_PATH/data/bgen_bim_merge/ukb39554_imputed_variants.merged.bim
humandb=/mnt/mfs/statgen/isabelle/REF/humandb
xref_path=/mnt/mfs/statgen/isabelle/REF/humandb
container_annovar=/mnt/mfs/statgen/containers/gatk4-annovar.sif

module load Singularity/3.5.3
sos run ~/project/UKBB_GWAS_dev/workflow/snptogene.ipynb annovar \
    --cwd $cwd \
    --sumstatsFile $sumstatsFile\
    --bim_name $bim_name \
    --p_filter $p_filter \
    --rsid $rsid \
    --build $build \
    --humandb $humandb\
    --xref_path $xref_path \
    --container_annovar $container_annovar

INFO: Running [32mannovar_1[0m: Get the list of significantly associated SNPs
INFO: [32mannovar_1[0m is [32mcompleted[0m.
INFO: [32mannovar_1[0m output:   [32m/home/dmc2245/scratch60/EOHI_thashi/imputed_data/srt_int_worst/200904_UKBB_SRT_int_worst_cc_srt_int_worst.fastGWA.snp_annotate[0m
INFO: Running [32mannovar_2[0m: Get chr, start, end, ref_allele, alt_allele format
INFO: [32mannovar_2[0m is [32mcompleted[0m.
INFO: [32mannovar_2[0m output:   [32m/home/dmc2245/scratch60/EOHI_thashi/imputed_data/srt_int_worst/200904_UKBB_SRT_int_worst_cc_srt_int_worst.fastGWA.avinput[0m
INFO: Running [32mannovar_3[0m: Annotate variants file using ANNOVAR
INFO: [32mannovar_3[0m is [32mcompleted[0m.
INFO: [32mannovar_3[0m output:   [32m/home/dmc2245/scratch60/EOHI_thashi/imputed_data/srt_int_worst/200904_UKBB_SRT_int_worst_cc_srt_int_worst.fastGWA.hg19_multianno.csv[0m
INFO: Workflow annovar (ID=w5ccac777741734f2) is executed successfully with 3 completed steps.



## 5. srt_cat

In [6]:
UKBB_PATH=/mnt/mfs/statgen/UKBiobank
USER_PATH=/home/dmc2245/project
cwd=/home/dmc2245/scratch60/EOHI_thashi/imputed_data/srt_cat
sumstatsFile=$UKBB_PATH/results/FastGWA_results/results_imputed_data/hearing_impairment/srt_cat/200904_UKBB_SRT_cat_cc_srt_cat.fastGWA.snp_stats.gz
build='hg19'
rsid=True
p_filter=0.005
bim_name=$UKBB_PATH/data/bgen_bim_merge/ukb39554_imputed_variants.merged.bim
humandb=/mnt/mfs/statgen/isabelle/REF/humandb
xref_path=/mnt/mfs/statgen/isabelle/REF/humandb
container_annovar=/mnt/mfs/statgen/containers/gatk4-annovar.sif

module load Singularity/3.5.3
sos run ~/project/UKBB_GWAS_dev/workflow/snptogene.ipynb annovar \
    --cwd $cwd \
    --sumstatsFile $sumstatsFile\
    --bim_name $bim_name \
    --p_filter $p_filter \
    --rsid $rsid \
    --build $build \
    --humandb $humandb\
    --xref_path $xref_path \
    --container_annovar $container_annovar

INFO: Running [32mannovar_1[0m: Get the list of significantly associated SNPs
INFO: [32mannovar_1[0m is [32mcompleted[0m.
INFO: [32mannovar_1[0m output:   [32m/home/dmc2245/scratch60/EOHI_thashi/imputed_data/srt_cat/200904_UKBB_SRT_cat_cc_srt_cat.fastGWA.snp_annotate[0m
INFO: Running [32mannovar_2[0m: Get chr, start, end, ref_allele, alt_allele format
INFO: [32mannovar_2[0m is [32mcompleted[0m.
INFO: [32mannovar_2[0m output:   [32m/home/dmc2245/scratch60/EOHI_thashi/imputed_data/srt_cat/200904_UKBB_SRT_cat_cc_srt_cat.fastGWA.avinput[0m
INFO: Running [32mannovar_3[0m: Annotate variants file using ANNOVAR
INFO: [32mannovar_3[0m is [32mcompleted[0m.
INFO: [32mannovar_3[0m output:   [32m/home/dmc2245/scratch60/EOHI_thashi/imputed_data/srt_cat/200904_UKBB_SRT_cat_cc_srt_cat.fastGWA.hg19_multianno.csv[0m
INFO: Workflow annovar (ID=w72e33b6e2ea9982a) is executed successfully with 3 completed steps.



## 6. tinnitus_082720

In [7]:
UKBB_PATH=/mnt/mfs/statgen/UKBiobank
USER_PATH=/home/dmc2245/project
cwd=/home/dmc2245/scratch60/EOHI_thashi/imputed_data/tinnitus_p2_082720
sumstatsFile=$UKBB_PATH/results/FastGWA_results/results_imputed_data/hearing_impairment/tinnitus_p2_082720/200811_UKBB_Tinnitus_plan1_2_3_f4803_tinnitus_p2.fastGWA.snp_stats.gz
build='hg19'
rsid=True
p_filter=0.005
bim_name=$UKBB_PATH/data/bgen_bim_merge/ukb39554_imputed_variants.merged.bim
humandb=/mnt/mfs/statgen/isabelle/REF/humandb
xref_path=/mnt/mfs/statgen/isabelle/REF/humandb
container_annovar=/mnt/mfs/statgen/containers/gatk4-annovar.sif

module load Singularity/3.5.3
sos run ~/project/UKBB_GWAS_dev/workflow/snptogene.ipynb annovar \
    --cwd $cwd \
    --sumstatsFile $sumstatsFile\
    --bim_name $bim_name \
    --p_filter $p_filter \
    --rsid $rsid \
    --build $build \
    --humandb $humandb\
    --xref_path $xref_path \
    --container_annovar $container_annovar

INFO: Running [32mannovar_1[0m: Get the list of significantly associated SNPs
INFO: [32mannovar_1[0m is [32mcompleted[0m.
INFO: [32mannovar_1[0m output:   [32m/home/dmc2245/scratch60/EOHI_thashi/imputed_data/tinnitus_p2_082720/200811_UKBB_Tinnitus_plan1_2_3_f4803_tinnitus_p2.fastGWA.snp_annotate[0m
INFO: Running [32mannovar_2[0m: Get chr, start, end, ref_allele, alt_allele format
INFO: [32mannovar_2[0m is [32mcompleted[0m.
INFO: [32mannovar_2[0m output:   [32m/home/dmc2245/scratch60/EOHI_thashi/imputed_data/tinnitus_p2_082720/200811_UKBB_Tinnitus_plan1_2_3_f4803_tinnitus_p2.fastGWA.avinput[0m
INFO: Running [32mannovar_3[0m: Annotate variants file using ANNOVAR
INFO: [32mannovar_3[0m is [32mcompleted[0m.
INFO: [32mannovar_3[0m output:   [32m/home/dmc2245/scratch60/EOHI_thashi/imputed_data/tinnitus_p2_082720/200811_UKBB_Tinnitus_plan1_2_3_f4803_tinnitus_p2.fastGWA.hg19_multianno.csv[0m
INFO: Workflow annovar (ID=wc45b5d587d2df036) is executed successfully wi

## 7. tinnitus_p3_082720

In [None]:
UKBB_PATH=/mnt/mfs/statgen/UKBiobank
USER_PATH=/home/dmc2245/project
cwd=/home/dmc2245/scratch60/EOHI_thashi/imputed_data/tinnitus_p3_082720
sumstatsFile=$UKBB_PATH/results/FastGWA_results/results_imputed_data/hearing_impairment/tinnitus_p3_082720/200811_UKBB_Tinnitus_plan1_2_3_f4803_tinnitus_p3.fastGWA.snp_stats.gz
build='hg19'
rsid=True
p_filter=0.005
bim_name=$UKBB_PATH/data/bgen_bim_merge/ukb39554_imputed_variants.merged.bim
humandb=/mnt/mfs/statgen/isabelle/REF/humandb
xref_path=/mnt/mfs/statgen/isabelle/REF/humandb
container_annovar=/mnt/mfs/statgen/containers/gatk4-annovar.sif

module load Singularity/3.5.3
sos run ~/project/UKBB_GWAS_dev/workflow/snptogene.ipynb annovar \
    --cwd $cwd \
    --sumstatsFile $sumstatsFile\
    --bim_name $bim_name \
    --p_filter $p_filter \
    --rsid $rsid \
    --build $build \
    --humandb $humandb\
    --xref_path $xref_path \
    --container_annovar $container_annovar

In [1]:
library('dplyr')
setwd('/gpfs/gibbs/pi/dewan/data/UKBiobank/results/FastGWA_results/results_imputed_data/hearing_impairment/f20019_srt_int_left/')
# Import the sumstats file as dataframe
data <- read.table(gzfile('200904_UKBB_SRT_int_left_cc_srt_int_left.EOHI.gz'), sep='\t', header=T)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




In [2]:
head(data)

Unnamed: 0_level_0,CHR,POS,REF,ALT,SNP,BETA,SE,P
Unnamed: 0_level_1,<int>,<int>,<fct>,<fct>,<fct>,<dbl>,<dbl>,<dbl>
1,2,113982699,G,A,rs568012144,0.099069,0.0467875,0.0342238
2,2,113994450,T,C,rs557926582,-0.0900989,0.042168,0.0326254
3,2,114010048,T,C,rs11886592,-0.0255934,0.012288,0.0372693
4,2,114012047,C,G,rs73955185,-0.0259768,0.0122771,0.0343546
5,2,114012239,C,T,rs147241854,-0.070698,0.0353587,0.0455595
6,2,114013269,C,G,rs73955186,-0.0256193,0.0122831,0.0370023


In [5]:
# Filter SNPs with p-val <5e-08
# Subset data to obtain only chr, pos, snp, beta,se and p for gene mapping
sig.p <- data %>%
    filter(P < 0.005) %>%
    select(CHR, POS, REF, ALT,BETA,SE,P)
    

In [6]:
head(sig.p)

Unnamed: 0_level_0,CHR,POS,REF,ALT,BETA,SE,P
Unnamed: 0_level_1,<int>,<int>,<fct>,<fct>,<dbl>,<dbl>,<dbl>
1,2,179872951,A,T,0.0817565,0.0200551,4.56983e-05
2,4,142990767,G,A,0.042855,0.0152238,0.00487778
3,4,143028337,A,G,0.0465522,0.0150079,0.00192319
4,4,143047063,G,A,0.0472084,0.0150545,0.0017136
5,4,143184193,T,C,-0.153717,0.0531503,0.00382663
6,4,143238014,C,A,-0.133751,0.046752,0.00422484


In [7]:
sig.p$SNP <- paste(sig.p$CHR, sig.p$POS, sig.p$REF, sig.p$ALT, sep=":")

In [8]:
head(sig.p)

Unnamed: 0_level_0,CHR,POS,REF,ALT,BETA,SE,P,SNP
Unnamed: 0_level_1,<int>,<int>,<fct>,<fct>,<dbl>,<dbl>,<dbl>,<chr>
1,2,179872951,A,T,0.0817565,0.0200551,4.56983e-05,2:179872951:A:T
2,4,142990767,G,A,0.042855,0.0152238,0.00487778,4:142990767:G:A
3,4,143028337,A,G,0.0465522,0.0150079,0.00192319,4:143028337:A:G
4,4,143047063,G,A,0.0472084,0.0150545,0.0017136,4:143047063:G:A
5,4,143184193,T,C,-0.153717,0.0531503,0.00382663,4:143184193:T:C
6,4,143238014,C,A,-0.133751,0.046752,0.00422484,4:143238014:C:A


In [10]:
write.table(sig.p, '${_output}', sep = " ", quote=FALSE, row.names=FALSE, col.names=FALSE) 

In [15]:
library('dplyr')
setwd('~/')
# Import the sumstats file as dataframe
data <- read.table('out.test', sep='\t', header=T)

In [11]:
cat ~/out.test | head

# bgenix: started 2021-07-15 16:10:09
alternate_ids	rsid	chromosome	position	number_of_alleles	first_allele	alternative_alleles
22:16050075_A_G	rs587697622	22	16050075	2	A	G
22:16050115_G_A	rs587755077	22	16050115	2	G	A
22:16050213_C_T	rs587654921	22	16050213	2	C	T
22:16050319_C_T	rs587712275	22	16050319	2	C	T
22:16050435_T_C	22:16050435_T_C	22	16050435	2	T	C
22:16050527_C_A	rs587769434	22	16050527	2	C	A
22:16050568_C_A	rs587638893	22	16050568	2	C	A
22:16050607_G_A	rs587720402	22	16050607	2	G	A
cat: write error: Broken pipe



In [1]:
cd /home/dc2325/scratch60/output/
awk 'NR==FNR{seen[$0]=1; next} seen[$0]' 010421_UKBB_f2247_f2257_136862ind_exomes_f2247_f2257.regenie.avinput 010421_UKBB_Hearing_difficulty_f2247_171970ind_exomes_hearing_diff_new.regenie.avinput

1 58468867 58468867 T C
1 58468874 58468874 T C
1 58468970 58468970 G T
1 58506159 58506159 G A
1 58506268 58506268 C A
1 58539307 58539307 C T
5 272741 272741 A G
5 272748 272748 G C
5 272755 272755 A G
5 73773756 73773756 A G
5 73776529 73776529 T C
5 73780632 73780632 G A
5 73780649 73780650 GT G
5 73780686 73780686 C A
5 73794340 73794340 A AGTT
5 73794436 73794436 T C
5 73795301 73795301 T A
5 73795403 73795403 C T
6 43301291 43301291 A C
6 43302413 43302413 C T
6 43305866 43305866 A G
6 43308652 43308652 G A
6 75841299 75841299 A G
6 158071628 158071628 C T
8 86556416 86556416 T G
8 86558437 86558437 A C
8 86558500 86558500 C T
22 38086345 38086387 GTGCGGGAGCGGGACTGGCCATCCCAGTACTCCGAGGGTGCTA G
22 50549676 50549676 G A



In [2]:
awk 'NR==FNR{seen[$0]=1; next} seen[$0]' 010421_UKBB_Hearing_difficulty_f2247_171970ind_exomes_hearing_diff_new.regenie.avinput 010421_UKBB_Hearing_background_noise_f2257_175531ind_exomes_hearing_noise_cat.regenie.avinput

5 272741 272741 A G
5 272748 272748 G C
5 272755 272755 A G
6 43301291 43301291 A C
6 43305866 43305866 A G
6 43308652 43308652 G A
6 75841299 75841299 A G



In [4]:
awk 'NR==FNR{seen[$0]=1; next} seen[$0]' 010421_UKBB_Hearing_background_noise_f2257_175531ind_exomes_hearing_noise_cat.regenie.avinput 010421_UKBB_Hearing_difficulty_f2247_171970ind_exomes_hearing_diff_new.regenie.avinput 

5 272741 272741 A G
5 272748 272748 G C
5 272755 272755 A G
6 43301291 43301291 A C
6 43305866 43305866 A G
6 43308652 43308652 G A
6 75841299 75841299 A G



In [5]:
awk 'NR==FNR{seen[$0]=1; next} seen[$0]' 010421_UKBB_Hearing_difficulty_f2247_171970ind_exomes_hearing_diff_new.regenie.avinput 010421_UKBB_Hearing_aid_f3393_128254ind_exomes_hearing_aid_cat.regenie.avinput

1 58468867 58468867 T C
1 58468874 58468874 T C
1 58468970 58468970 G T
1 58539307 58539307 C T
6 75841299 75841299 A G
8 86556416 86556416 T G
8 86558437 86558437 A C
8 86558500 86558500 C T
10 7292138 7292138 G A
17 41827392 41827392 A C
17 44848191 44848191 C T



In [6]:
awk 'NR==FNR{seen[$0]=1; next} seen[$0]' 010421_UKBB_f2247_f2257_136862ind_exomes_f2247_f2257.regenie.avinput 010421_UKBB_Hearing_aid_f3393_128254ind_exomes_hearing_aid_cat.regenie.avinput

1 58468867 58468867 T C
1 58468874 58468874 T C
1 58468970 58468970 G T
1 58539307 58539307 C T
6 75362956 75362956 T C
6 75841299 75841299 A G
8 86556416 86556416 T G
8 86558437 86558437 A C
8 86558500 86558500 C T



In [8]:
zcat /gpfs/gibbs/pi/dewan/data/UKBiobank/results/REGENIE_results/results_exome_data/f3393_hearing_aid_exomes/010421_UKBB_Hearing_aid_f3393_128254ind_exomes_hearing_aid_cat.regenie.snp_stats.gz | grep -w -f /home/dc2325/scratch60/output/010421_UKBB_Hearing_aid_f3393_128254ind_exomes_hearing_aid_cat.regenie.snp_annotate

1	58468867	T	C	1:58468867:T:C	-0.168986	0.021412099999999996	7.055048440506522e-16
1	58468874	T	C	1:58468874:T:C	-0.168015	0.0213657	9.11381062129295e-16
1	58468970	G	T	1:58468970:G:T	-0.14103	0.0156138	6.13903345360935e-20
1	58539307	C	T	1:58539307:C:T	-0.13430799999999998	0.0143388	3.3853221281535305e-21
2	168834219	T	C	2:168834219:T:C	-0.130908	0.021813	1.029437536659268e-09
6	75362956	T	C	6:75362956:T:C	2.48143	0.369753	8.570575794832358e-09
6	75841299	A	G	6:75841299:A:G	2.9806	0.407723	1.2227847112296838e-10
8	86556416	T	G	8:86556416:T:G	-0.115771	0.017346200000000003	1.4047531026968791e-11
8	86558437	A	C	8:86558437:A:C	-0.114282	0.0193435	2.110425374828825e-09
8	86558500	C	T	8:86558500:C:T	-0.116954	0.019379	9.270219126904037e-10
10	7243670	A	G	10:7243670:A:G	-0.0980153	0.0159944	1.0312880846394044e-09
10	7276841	T	C	10:7276841:T:C	-0.133236	0.0219836	7.090877782620701e-10
10	7277326	C	A	10:7277326:C:A	-0.143172	0.0223905	7.409689339136985e-11
10	7292138	G	A	10:7292138:G:A	-0.110

### BETA, SE and P in f.3393 when using the original bfile from UKBB

In [6]:
zcat /gpfs/gibbs/pi/dewan/data/UKBiobank/results/REGENIE_results/results_exome_data/f3393_hearing_aid_exomes_bfile/010421_UKBB_Hearing_aid_f3393_128254ind_exomes_hearing_aid_cat.regenie.snp_stats.gz | grep -w -f /home/dc2325/scratch60/output/bfile_annovar/010421_UKBB_Hearing_aid_f3393_128254ind_exomes_hearing_aid_cat.regenie.snp_annotate

1	33010786	C	T	1:33010786:C:T	0.170924	0.0296248	8.597060836427362e-09
1	58468867	T	C	1:58468867:T:C	-0.227117	0.0216753	3.813291452767764e-27
1	58468874	T	C	1:58468874:T:C	-0.225999	0.0216299	5.560322736938666e-27
1	58468970	G	T	1:58468970:G:T	-0.193106	0.0157452	1.1279763655881595e-35
1	58506159	G	A	1:58506159:G:A	-0.203908	0.0312325	1.7943204217168444e-11
1	58506268	C	A	1:58506268:C:A	-0.201774	0.0313195	3.312073760197969e-11
1	58530531	T	C	1:58530531:T:C	-0.271127	0.0423279	2.6927745869589524e-11
1	58539307	C	T	1:58539307:C:T	-0.18493	0.0144342	1.8264172487743598e-38
2	168824768	T	TTTTGTTTGTTTG	2:168824768:I:12	-0.247936	0.0319456	3.9012160295267637e-16
2	168824768	T	TTTTGTTTG	2:168824768:I:8	-0.194196	0.0343111	4.233016876945092e-09
2	168834219	T	C	2:168834219:T:C	-0.224051	0.0220935	1.669552191554301e-25
2	168850752	AAATTCTCTGG	A	2:168850752:D:10	-0.193516	0.0290039	7.289537928556997e-12
4	15985886	T	TTTAAG	4:15985886:I:5	0.111099	0.0160466	2.556819088689687e-12
6	75362956	T	C	6:

In [8]:
zcat /gpfs/gibbs/pi/dewan/data/UKBiobank/results/REGENIE_results/results_exome_data/f3393_hearing_aid_exomes_bfile/010421_UKBB_Hearing_aid_f3393_128254ind_exomes_hearing_aid_cat.regenie.snp_stats_original_columns.gz | grep -w -f /home/dc2325/scratch60/output/bfile_annovar/010421_UKBB_Hearing_aid_f3393_128254ind_exomes_hearing_aid_cat.regenie.snp_annotate

1 33010786 1:33010786:C:T C T 0.231841 ADD 0.170924 0.0296248 33.1352 8.06565
1 58468867 1:58468867:T:C T C 0.187718 ADD -0.227117 0.0216753 116.437 26.4187
1 58468874 1:58468874:T:C T C 0.18874 ADD -0.225999 0.0216299 115.689 26.2549
1 58468970 1:58468970:G:T G T 0.310273 ADD -0.193106 0.0157452 155.43 34.9477
1 58506159 1:58506159:G:A G A 0.0980239 ADD -0.203908 0.0312325 45.1833 10.7461
1 58506268 1:58506268:C:A C A 0.096814 ADD -0.201774 0.0313195 43.9831 10.4799
1 58530531 1:58530531:T:C T C 0.0579461 ADD -0.271127 0.0423279 44.3884 10.5698
1 58539307 1:58539307:C:T C T 0.351209 ADD -0.18493 0.0144342 168.203 37.7384
2 168824768 2:168824768:I:12 T TTTTGTTTGTTTG 0.0969155 ADD -0.247936 0.0319456 66.2853 15.4088
2 168824768 2:168824768:I:8 T TTTTGTTTG 0.0826374 ADD -0.194196 0.0343111 34.5135 8.37335
2 168834219 2:168834219:T:C T C 0.18284 ADD -0.224051 0.0220935 108.944 24.7774
2 168850752 2:168850752:D:10 AAATTCTCTGG A 0.116754 ADD -0.193516 0.0290039 46.948 11.1373
4 15985886 4:1

### Get the BETA, SE, P and MAF from the original column file f.3393

In [1]:
zcat /gpfs/gibbs/pi/dewan/data/UKBiobank/results/REGENIE_results/results_exome_data/f3393_hearing_aid_exomes/010421_UKBB_Hearing_aid_f3393_128254ind_exomes_hearing_aid_cat.regenie.snp_stats_original_columns.gz | grep -w -f /home/dc2325/scratch60/output/010421_UKBB_Hearing_aid_f3393_128254ind_exomes_hearing_aid_cat.regenie.snp_annotate

1 58468867 1:58468867:T:C T C 0.187716 ADD -0.168986 0.0214121 65.1177 15.1515 
1 58468874 1:58468874:T:C T C 0.188739 ADD -0.168015 0.0213657 64.6132 15.0403 
1 58468970 1:58468970:G:T G T 0.310271 ADD -0.14103 0.0156138 83.5735 19.2119 
1 58539307 1:58539307:C:T C T 0.351207 ADD -0.134308 0.0143388 89.3042 20.4704 
2 168834219 2:168834219:T:C T C 0.182838 ADD -0.130908 0.021813 37.2683 8.9874 
6 75362956 6:75362956:T:C T C 0.000148152 ADD 2.48143 0.369753 33.1412 8.06699 
6 75841299 6:75841299:A:G A G 0.000113057 ADD 2.9806 0.407723 41.4282 9.91265 
8 86556416 8:86556416:T:G T G 0.25997 ADD -0.115771 0.0173462 45.6626 10.8524 
8 86558437 8:86558437:A:C A C 0.2221 ADD -0.114282 0.0193435 35.869 8.67563 
8 86558500 8:86558500:C:T C T 0.218854 ADD -0.116954 0.019379 37.4727 9.03291 
10 7243670 10:7243670:A:G A G 0.546956 ADD -0.0980153 0.0159944 37.2648 8.98662 
10 7276841 10:7276841:T:C T C 0.189831 ADD -0.133236 0.0219836 37.9955 9.1493 
10 7277326 10:7277326:C:A C A 0.186652 ADD -0.1

## f.2257 exome filtered

In [9]:
zcat /gpfs/gibbs/pi/dewan/data/UKBiobank/results/REGENIE_results/results_exome_data/f2257_hearing_noise_exomes/010421_UKBB_Hearing_background_noise_f2257_175531ind_exomes_hearing_noise_cat.regenie.snp_stats.gz  | grep -w -f /home/dc2325/scratch60/output/010421_UKBB_Hearing_background_noise_f2257_175531ind_exomes_hearing_noise_cat.regenie.snp_annotate

5	272741	A	G	5:272741:A:G	0.897546	0.128497	1.2220810223250552e-12
5	272748	G	C	5:272748:G:C	0.8718739999999999	0.139898	2.4925277377693566e-10
5	272755	A	G	5:272755:A:G	0.8530200000000001	0.12476400000000001	4.0766178334794515e-12
6	43301291	A	C	6:43301291:A:C	0.04535719999999999	0.00720349	2.987376772921112e-10
6	43305866	A	G	6:43305866:A:G	0.0454121	0.00720402	2.852462870044014e-10
6	43308652	G	A	6:43308652:G:A	0.044663800000000003	0.00718951	5.126842785315215e-10
6	75841299	A	G	6:75841299:A:G	2.93015	0.673602	3.333419717618388e-10



In [3]:
zcat /gpfs/gibbs/pi/dewan/data/UKBiobank/results/REGENIE_results/results_exome_data/f2257_hearing_noise_exomes/010421_UKBB_Hearing_background_noise_f2257_175531ind_exomes_hearing_noise_cat.regenie.snp_stats_original_columns.gz  | grep -w -f /home/dc2325/scratch60/output/010421_UKBB_Hearing_background_noise_f2257_175531ind_exomes_hearing_noise_cat.regenie.snp_annotate

5 272741 5:272741:A:G A G 0.000746392 ADD 0.897546 0.128497 50.4506 11.9129 
5 272748 5:272748:G:C G C 0.000626873 ADD 0.871874 0.139898 40.0366 9.60336 
5 272755 5:272755:A:G A G 0.000786226 ADD 0.85302 0.124764 48.0875 11.3897 
6 43301291 6:43301291:A:C A C 0.608417 ADD 0.0453572 0.00720349 39.6829 9.52471 
6 43305866 6:43305866:A:G A G 0.608452 ADD 0.0454121 0.00720402 39.7731 9.54478 
6 43308652 6:43308652:G:A G A 0.608468 ADD 0.0446638 0.00718951 38.6284 9.29015 
6 75841299 6:75841299:A:G A G 8.5455e-05 ADD 2.93015 0.673602 39.4688 9.47711 



## f.2257 original bfile

In [10]:
zcat /gpfs/gibbs/pi/dewan/data/UKBiobank/results/REGENIE_results/results_exome_data/f2257_hearing_noise_exomes_bfile/010421_UKBB_Hearing_background_noise_f2257_175531ind_exomes_hearing_noise_cat.regenie.snp_stats.gz  | grep -w -f /home/dc2325/scratch60/output/bfile_annovar/010421_UKBB_Hearing_background_noise_f2257_175531ind_exomes_hearing_noise_cat.regenie.snp_annotate

1	58468970	G	T	1:58468970:G:T	0.0388225	0.00630642	7.840071077794492e-10
1	58539307	C	T	1:58539307:C:T	0.039175	0.00581854	1.7615703831392665e-11
5	272741	A	G	5:272741:A:G	0.895364	0.128826	1.5656691705285048e-12
5	272748	G	C	5:272748:G:C	0.872977	0.140283	2.6424087573219503e-10
5	272755	A	G	5:272755:A:G	0.852598	0.125086	4.715198713364486e-12
6	75841299	A	G	6:75841299:A:G	2.92445	0.675154	4.5532343128209967e-10
8	86556416	T	G	8:86556416:T:G	0.0422805	0.00701265	1.7416865577909724e-09
8	86558437	A	C	8:86558437:A:C	0.0461656	0.00782776	3.903462396489647e-09
8	86558500	C	T	8:86558500:C:T	0.0462525	0.00783409	3.7604515711246546e-09
10	7292138	G	A	10:7292138:G:A	0.0371488	0.00634972	5.113402742113402e-09
20	62065259	A	G	20:62065259:A:G	0.0383255	0.00675216	1.4268883758820646e-08



In [9]:
zcat /gpfs/gibbs/pi/dewan/data/UKBiobank/results/REGENIE_results/results_exome_data/f2257_hearing_noise_exomes_bfile/010421_UKBB_Hearing_background_noise_f2257_175531ind_exomes_hearing_noise_cat.regenie.snp_stats_original_columns.gz  | grep -w -f /home/dc2325/scratch60/output/bfile_annovar/010421_UKBB_Hearing_background_noise_f2257_175531ind_exomes_hearing_noise_cat.regenie.snp_annotate

1 58468970 1:58468970:G:T G T 0.218027 ADD 0.0388225 0.00630642 37.7995 9.10568
1 58539307 1:58539307:C:T C T 0.246377 ADD 0.039175 0.00581854 45.2194 10.7541
5 272741 5:272741:A:G A G 0.000746396 ADD 0.895364 0.128826 49.9644 11.8053
5 272748 5:272748:G:C G C 0.000626877 ADD 0.872977 0.140283 39.9225 9.578
5 272755 5:272755:A:G A G 0.000786231 ADD 0.852598 0.125086 47.8019 11.3265
6 75841299 6:75841299:A:G A G 8.54555e-05 ADD 2.92445 0.675154 38.8599 9.34168
8 86556416 8:86556416:T:G T G 0.181755 ADD 0.0422805 0.00701265 36.2432 8.75903
8 86558437 8:86558437:A:C A C 0.156181 ADD 0.0461656 0.00782776 34.6713 8.40855
8 86558500 8:86558500:C:T C T 0.153212 ADD 0.0462525 0.00783409 34.7439 8.42476
10 7292138 10:7292138:G:A G A 0.227024 ADD 0.0371488 0.00634972 34.1458 8.29129
20 62065259 20:62065259:A:G A G 0.247018 ADD 0.0383255 0.00675216 32.1504 7.84561



## f2247 exome filtered

In [11]:
zcat /gpfs/gibbs/pi/dewan/data/UKBiobank/results/REGENIE_results/results_exome_data/f2247_hearing_difficulty_exomes/010421_UKBB_Hearing_difficulty_f2247_171970ind_exomes_hearing_diff_new.regenie.snp_stats.gz | grep -w -f /home/dc2325/scratch60/output/010421_UKBB_Hearing_difficulty_f2247_171970ind_exomes_hearing_diff_new.regenie.snp_annotate

1	58468867	T	C	1:58468867:T:C	0.0717153	0.00923927	1.1587773561551261e-14
1	58468874	T	C	1:58468874:T:C	0.0715995	0.00922922	1.1937132101928058e-14
1	58468970	G	T	1:58468970:G:T	0.0593697	0.00684749	5.851941823780803e-18
1	58506159	G	A	1:58506159:G:A	0.0802067	0.013556499999999999	4.0267994442394125e-09
1	58506268	C	A	1:58506268:C:A	0.08213530000000001	0.013631799999999998	2.092088248507645e-09
1	58539307	C	T	1:58539307:C:T	0.0580006	0.00632463	6.44465984741689e-20
5	272741	A	G	5:272741:A:G	1.19275	0.125574	4.1152857843551e-21
5	272748	G	C	5:272748:G:C	1.20544	0.13772	3.4268887043278663e-18
5	272755	A	G	5:272755:A:G	1.17113	0.122546	2.5409727055493115e-21
5	73773756	A	G	5:73773756:A:G	0.0452337	0.00785184	8.259999099997013e-09
5	73776529	T	C	5:73776529:T:C	0.04761169999999999	0.0078087	1.0857754055343505e-09
5	73780632	G	A	5:73780632:G:A	0.0471588	0.00780284	1.5153396952829279e-09
5	73780649	GT	G	5:73780649:D:1	0.04820119999999999	0.00790071	1.0603274386034904e-09
5	73780686	C	A	5:7378

In [4]:
zcat /gpfs/gibbs/pi/dewan/data/UKBiobank/results/REGENIE_results/results_exome_data/f2247_hearing_difficulty_exomes/010421_UKBB_Hearing_difficulty_f2247_171970ind_exomes_hearing_diff_new.regenie.snp_stats_original_columns.gz | grep -w -f /home/dc2325/scratch60/output/010421_UKBB_Hearing_difficulty_f2247_171970ind_exomes_hearing_diff_new.regenie.snp_annotate

1 58468867 1:58468867:T:C T C 0.13066 ADD 0.0717153 0.00923927 59.6061 13.936 
1 58468874 1:58468874:T:C T C 0.131478 ADD 0.0715995 0.00922922 59.5475 13.9231 
1 58468970 1:58468970:G:T G T 0.216891 ADD 0.0593697 0.00684749 74.5703 17.2327 
1 58506159 1:58506159:G:A G A 0.0685232 ADD 0.0802067 0.0135565 34.6107 8.39504 
1 58506268 1:58506268:C:A C A 0.0673356 ADD 0.0821353 0.0136318 35.886 8.67942 
1 58539307 1:58539307:C:T C T 0.245146 ADD 0.0580006 0.00632463 83.4776 19.1908 
5 272741 5:272741:A:G A G 0.000779296 ADD 1.19275 0.125574 88.9178 20.3856 
5 272748 5:272748:G:C G C 0.000648584 ADD 1.20544 0.13772 75.6268 17.4651 
5 272755 5:272755:A:G A G 0.000817041 ADD 1.17113 0.122546 89.8718 20.595 
5 73773756 5:73773756:A:G A G 0.548953 ADD 0.0452337 0.00785184 33.2129 8.08302 
5 73776529 5:73776529:T:C T C 0.467749 ADD 0.0476117 0.0078087 37.1644 8.96426 
5 73780632 5:73780632:G:A G A 0.465742 ADD 0.0471588 0.00780284 36.5145 8.81949 
5 73780649 5:73780649:D:1 GT G 0.472223 ADD 0.048

## f.2247 original bfile

In [12]:
zcat /gpfs/gibbs/pi/dewan/data/UKBiobank/results/REGENIE_results/results_exome_data/f2247_hearing_difficulty_exomes_bfile/010421_UKBB_Hearing_difficulty_f2247_171970ind_exomes_hearing_diff_new.regenie.snp_stats.gz | grep -w -f /home/dc2325/scratch60/output/bfile_annovar/010421_UKBB_Hearing_difficulty_f2247_171970ind_exomes_hearing_diff_new.regenie.snp_annotate

1	33010786	C	T	1:33010786:C:T	-0.0877293	0.0125406	2.670545612191901e-12
1	58468867	T	C	1:58468867:T:C	0.100429	0.00929324	7.573558772744394e-27
1	58468874	T	C	1:58468874:T:C	0.100516	0.0092829	5.9937689832663774e-27
1	58468970	G	T	1:58468970:G:T	0.0846861	0.00689	2.375198990470038e-34
1	58480867	C	CT	1:58480867:I:1	0.134377	0.0236204	1.6801188330219523e-08
1	58481189	T	G	1:58481189:T:G	0.122711	0.017333	2.178211266293439e-12
1	58506159	G	A	1:58506159:G:A	0.112135	0.01364	3.4190070790206896e-16
1	58506268	C	A	1:58506268:C:A	0.114523	0.0137153	1.1885022274370165e-16
1	58530531	T	C	1:58530531:T:C	0.121481	0.0181414	3.091718893891698e-11
1	58539307	C	T	1:58539307:C:T	0.082978	0.00636577	1.883215413189167e-38
2	168824768	T	TTTTGTTTGTTTG	2:168824768:I:12	0.103888	0.0131674	5.3703179637025325e-15
2	168824768	T	TTTTGTTTG	2:168824768:I:8	0.112013	0.0146801	4.128573303779981e-14
2	168834219	T	C	2:168834219:T:C	0.0962217	0.00951813	1.0207044307665863e-23
2	168850752	AAATTCTCTGG	A	2:168850752:D:1

In [14]:
zcat /gpfs/gibbs/pi/dewan/data/UKBiobank/results/REGENIE_results/results_exome_data/f2247_hearing_difficulty_exomes_bfile/010421_UKBB_Hearing_difficulty_f2247_171970ind_exomes_hearing_diff_new.regenie.snp_stats_original_columns.gz | grep -w -f /home/dc2325/scratch60/output/bfile_annovar/010421_UKBB_Hearing_difficulty_f2247_171970ind_exomes_hearing_diff_new.regenie.snp_annotate

1 33010786 1:33010786:C:T C T 0.260661 ADD -0.0877293 0.0125406 48.9169 11.5734
1 58468867 1:58468867:T:C T C 0.130661 ADD 0.100429 0.00929324 115.076 26.1207
1 58468874 1:58468874:T:C T C 0.131479 ADD 0.100516 0.0092829 115.54 26.2223
1 58468970 1:58468970:G:T G T 0.216892 ADD 0.0846861 0.00689 149.374 33.6243
1 58480867 1:58480867:I:1 C CT 0.0257702 ADD 0.134377 0.0236204 31.833 7.77466
1 58481189 1:58481189:T:G T G 0.0438016 ADD 0.122711 0.017333 49.3164 11.6619
1 58506159 1:58506159:G:A G A 0.0685236 ADD 0.112135 0.01364 66.5456 15.4661
1 58506268 1:58506268:C:A C A 0.067336 ADD 0.114523 0.0137153 68.6289 15.925
1 58530531 1:58530531:T:C T C 0.0404119 ADD 0.121481 0.0181414 44.118 10.5098
1 58539307 1:58539307:C:T C T 0.245147 ADD 0.082978 0.00636577 168.143 37.7251
2 168824768 2:168824768:I:12 T TTTTGTTTGTTTG 0.0632524 ADD 0.103888 0.0131674 61.1199 14.27
2 168824768 2:168824768:I:8 T TTTTGTTTG 0.0535825 ADD 0.112013 0.0146801 57.1064 13.3842
2 168834219 2:168834219:T:C T C 0.1275

## f2247 & f2257 exome filtered data

In [13]:
zcat /gpfs/gibbs/pi/dewan/data/UKBiobank/results/REGENIE_results/results_exome_data/f2247_f2257_combined_exomes/010421_UKBB_f2247_f2257_136862ind_exomes_f2247_f2257.regenie.snp_stats.gz | grep -w -f /home/dc2325/scratch60/output/010421_UKBB_f2247_f2257_136862ind_exomes_f2247_f2257.regenie.snp_annotate

1	46132597	C	A	1:46132597:C:A	-0.048425199999999995	0.00873988	2.9676316895317354e-08
1	58468867	T	C	1:58468867:T:C	0.0679535	0.010248700000000001	4.1133910615245514e-11
1	58468874	T	C	1:58468874:T:C	0.0689153	0.0102364	2.0682355559317648e-11
1	58468970	G	T	1:58468970:G:T	0.0606005	0.00760586	2.052579560952533e-15
1	58506159	G	A	1:58506159:G:A	0.0846227	0.0150871	2.4171299521044176e-08
1	58506268	C	A	1:58506268:C:A	0.0862086	0.0151635	1.5624999063995115e-08
1	58539307	C	T	1:58539307:C:T	0.0607587	0.00701922	6.366489102003704e-18
5	272741	A	G	5:272741:A:G	1.3423	0.14832599999999999	6.737518954705801e-20
5	272748	G	C	5:272748:G:C	1.33381	0.162199	1.0801896208818909e-16
5	272755	A	G	5:272755:A:G	1.30036	0.143215	6.297961479295158e-20
5	73773756	A	G	5:73773756:A:G	0.052482299999999996	0.00871943	1.727547510306292e-09
5	73776529	T	C	5:73776529:T:C	0.0514956	0.00867282	2.9077696549191336e-09
5	73780632	G	A	5:73780632:G:A	0.0511833	0.00866646	3.527361740282659e-09
5	73780649	GT	G	5:73780649:D

In [5]:
zcat /gpfs/gibbs/pi/dewan/data/UKBiobank/results/REGENIE_results/results_exome_data/f2247_f2257_combined_exomes/010421_UKBB_f2247_f2257_136862ind_exomes_f2247_f2257.regenie.snp_stats_original_columns.gz | grep -w -f /home/dc2325/scratch60/output/010421_UKBB_f2247_f2257_136862ind_exomes_f2247_f2257.regenie.snp_annotate

1 46132597 1:46132597:C:A C A 0.436179 ADD -0.0484252 0.00873988 30.7286 7.52759 
1 58468867 1:58468867:T:C T C 0.131799 ADD 0.0679535 0.0102487 43.559 10.3858 
1 58468874 1:58468874:T:C T C 0.132606 ADD 0.0689153 0.0102364 44.9052 10.6844 
1 58468970 1:58468970:G:T G T 0.218038 ADD 0.0606005 0.00760586 63.0138 14.6877 
1 58506159 1:58506159:G:A G A 0.0686777 ADD 0.0846227 0.0150871 31.1268 7.6167 
1 58506268 1:58506268:C:A C A 0.0675475 ADD 0.0862086 0.0151635 31.974 7.80618 
1 58539307 1:58539307:C:T C T 0.2466 ADD 0.0607587 0.00701922 74.4039 17.1961 
5 272741 5:272741:A:G A G 0.000730754 ADD 1.3423 0.148326 83.3895 19.1715 
5 272748 5:272748:G:C G C 0.000610304 ADD 1.33381 0.162199 68.8173 15.9665 
5 272755 5:272755:A:G A G 0.00077819 ADD 1.30036 0.143215 83.5229 19.2008 
5 73773756 5:73773756:A:G A G 0.548568 ADD 0.0524823 0.00871943 36.2591 8.76257 
5 73776529 5:73776529:T:C T C 0.467546 ADD 0.0514956 0.00867282 35.2447 8.53644 
5 73780632 5:73780632:G:A G A 0.465594 ADD 0.051183

In [15]:
zcat /gpfs/gibbs/pi/dewan/data/UKBiobank/results/REGENIE_results/results_exome_data/f2247_f2257_combined_exomes_bfile/010421_UKBB_f2247_f2257_136862ind_exomes_f2247_f2257.regenie.snp_stats.gz | grep -w -f /home/dc2325/scratch60/output/bfile_annovar/010421_UKBB_f2247_f2257_136862ind_exomes_f2247_f2257.regenie.snp_annotate

1	33010786	C	T	1:33010786:C:T	-0.0857345	0.0138235	5.617072119932656e-10
1	46132597	C	A	1:46132597:C:A	-0.0488328	0.00873003	2.189424633100619e-08
1	58468867	T	C	1:58468867:T:C	0.101218	0.0102299	8.695608932545938e-23
1	58468874	T	C	1:58468874:T:C	0.102319	0.0102172	2.649720098878972e-23
1	58468970	G	T	1:58468970:G:T	0.0899816	0.00759536	4.891026321245926e-32
1	58480867	C	CT	1:58480867:I:1	0.16287	0.0260437	5.739446341647975e-10
1	58481189	T	G	1:58481189:T:G	0.133909	0.0191164	3.694026443582841e-12
1	58506159	G	A	1:58506159:G:A	0.120971	0.0150604	1.5721717924629402e-15
1	58506268	C	A	1:58506268:C:A	0.122957	0.0151366	7.608516731700462e-16
1	58530531	T	C	1:58530531:T:C	0.136704	0.0198926	9.444957325077047e-12
1	58539307	C	T	1:58539307:C:T	0.0893096	0.0070114	8.529037030705686e-37
2	168824768	T	TTTTGTTTGTTTG	2:168824768:I:12	0.114035	0.0144865	6.2230028516916016e-15
2	168824768	T	TTTTGTTTG	2:168824768:I:8	0.115116	0.0162252	2.0658557819227176e-12
2	168834219	T	C	2:168834219:T:C	0.104722	

In [16]:
zcat /gpfs/gibbs/pi/dewan/data/UKBiobank/results/REGENIE_results/results_exome_data/f2247_f2257_combined_exomes_bfile/010421_UKBB_f2247_f2257_136862ind_exomes_f2247_f2257.regenie.snp_stats_original_columns.gz | grep -w -f /home/dc2325/scratch60/output/bfile_annovar/010421_UKBB_f2247_f2257_136862ind_exomes_f2247_f2257.regenie.snp_annotate

1 33010786 1:33010786:C:T C T 0.260525 ADD -0.0857345 0.0138235 38.4501 9.25049
1 46132597 1:46132597:C:A C A 0.436179 ADD -0.0488328 0.00873003 31.3189 7.65967
1 58468867 1:58468867:T:C T C 0.131799 ADD 0.101218 0.0102299 96.5514 22.0607
1 58468874 1:58468874:T:C T C 0.132606 ADD 0.102319 0.0102172 98.9045 22.5768
1 58468970 1:58468970:G:T G T 0.218038 ADD 0.0899816 0.00759536 138.792 31.3106
1 58480867 1:58480867:I:1 C CT 0.0258005 ADD 0.16287 0.0260437 38.4081 9.24113
1 58481189 1:58481189:T:G T G 0.0439355 ADD 0.133909 0.0191164 48.2804 11.4325
1 58506159 1:58506159:G:A G A 0.0686777 ADD 0.120971 0.0150604 63.5392 14.8035
1 58506268 1:58506268:C:A C A 0.0675475 ADD 0.122957 0.0151366 64.9689 15.1187
1 58530531 1:58530531:T:C T C 0.0408165 ADD 0.136704 0.0198926 46.4402 11.0248
1 58539307 1:58539307:C:T C T 0.2466 ADD 0.0893096 0.0070114 160.562 36.0691
2 168824768 2:168824768:I:12 T TTTTGTTTGTTTG 0.0637661 ADD 0.114035 0.0144865 60.8299 14.206
2 168824768 2:168824768:I:8 T TTTTGTTT

In [24]:
zcat /gpfs/gibbs/pi/dewan/data/UKBiobank/results/FastGWA_results/results_imputed_data/f2247_f2257_combined/200828_UKBB_f2247_f2257_f2247_f2257.fastGWA.snp_stats.gz | awk '{print $5}' | grep -w -f /home/dc2325/scratch60/output/010421_UKBB_f2247_f2257_136862ind_exomes_f2247_f2257.regenie.hg38_multianno.csv.hg38_multianno.snplist

rs1707304
rs697591
rs822062
rs338240
rs3087585
rs11207245
rs4704097
rs7714670
rs11949860
rs6453022
rs10683146
rs7716253
rs2973549
rs2973548
rs1574430
rs2270860
rs2242416
rs2254303
rs146694394
rs2304787
rs1062850
rs1062851
rs36062310



In [20]:
zcat /gpfs/gibbs/pi/dewan/data/UKBiobank/results/FastGWA_results/results_imputed_data/f2247_hearing_difficulty/200828_UKBB_Hearing_difficulty_f2247_hearing_diff_new.fastGWA.snp_stats.gz | awk '{print $5}' | grep -w -f /home/dc2325/scratch60/output/010421_UKBB_Hearing_difficulty_f2247_171970ind_exomes_hearing_diff_new.regenie.hg38_multianno.snplist 

zcat /gpfs/gibbs/pi/dewan/data/UKBiobank/results/FastGWA_results/results_imputed_data/f2257_hearing_background_noise/200828_UKBB_Hearing_background_noise_f2257_hearing_noise_cat.fastGWA.snp_stats.gz | awk '{print $5}' | grep -w -f /home/dc2325/scratch60/output/010421_UKBB_Hearing_background_noise_f2257_175531ind_exomes_hearing_noise_cat.regenie.hg38_multianno.snplist

In [25]:
zcat /gpfs/gibbs/pi/dewan/data/UKBiobank/results/FastGWA_results/results_imputed_data/f3393_hearing_aid/200828_UKBB_Hearing_aid_f3393_hearing_aid_cat.fastGWA.snp_stats.gz | awk '{print $5}' | grep -w -f /home/dc2325/scratch60/output/010421_UKBB_Hearing_aid_f3393_128254ind_exomes_hearing_aid_cat.regenie.hg38_multianno.snplist




### To get the snps from the csv file

In [None]:
cat 010421_UKBB_Hearing_difficulty_f2247_171970ind_exomes_hearing_diff_new.regenie.hg38_multianno.csv | awk -vFPAT='([^,]*)|("[^"]+")' -vOFS=, '{print $77}' | sed 's/"//g' > 010421_UKBB_Hearing_difficulty_f2247_171970ind_exomes_hearing_diff_new.regenie.hg38_multianno.snplist


In [None]:
cat 010421_UKBB_Hearing_background_noise_f2257_175531ind_exomes_hearing_noise_cat.regenie.hg38_multianno.csv | awk -vFPAT='([^,]*)|("[^"]+")' -vOFS=, '{print $77}' | sed 's/"//g' > 010421_UKBB_Hearing_background_noise_f2257_175531ind_exomes_hearing_noise_cat.regenie.hg38_multianno.snplist


In [None]:
cat 010421_UKBB_Hearing_aid_f3393_128254ind_exomes_hearing_aid_cat.regenie.hg38_multianno.csv | awk -vFPAT='([^,]*)|("[^"]+")' -vOFS=, '{print $77}' | sed 's/"//g' > 010421_UKBB_Hearing_aid_f3393_128254ind_exomes_hearing_aid_cat.regenie.hg38_multianno.snplist

In [None]:
cat 010421_UKBB_Hearing_difficulty_f2247_171970ind_exomes_hearing_diff_new.regenie.hg38_multianno.csv | awk -vFPAT='([^,]*)|("[^"]+")' -vOFS=, '{print $77}' | sed 's/"//g' > 010421_UKBB_Hearing_difficulty_f2247_171970ind_exomes_hearing_diff_new.regenie.hg38_multianno.snplist
