# Post-GWAS analyses

# Aim

The aim of this notebook is to be able to carry out post-GWAS analyses such as SNP annotation to genes, expression pathway and other.

Here [spnGeneSets v1.12](https://www.umc.edu/SoPH/Departments-and-Faculty/Data-Science/Research/Services/Software.html) is used, however FUMA is a web-based resource that is also useful for this purpose 


## Usage



In [1]:
[global]
# the output directory for generated files
parameter: cwd = path
# Path sumstats file
parameter: sumstatsFile = path
# Genome assembly hg_37, hg_38
parameter: hg = int
#Job size
parameter:job_size=1
# Load annovar module from cluster
parameter: annovar_module = '''
module load ANNOVAR/2020Jun08-foss-2018b-Perl-5.28.0
echo "Module annovar loaded"
{cmd}
'''
# Software container option
parameter: container_annovar = 'gaow/gatk4-annovar'

In [2]:
# Annotate snps to gene
[snp_to_gene]
# Column name for BP
parameter: bp = 'POS'
# Column name for p-value
parameter: pval = 'P'
# Column name for SNP
parameter: snp = 'SNP'
input: sumstatsFile
output: f'{_input:nn}.gene_ann'      
task: trunk_workers = 1, trunk_size = job_size, walltime = '3h', mem = '10G', tags = f'{step_name}_{_output:bn}'
R: expand='${ }', stderr = f'{_output:n}.stderr', stdout = f'{_output:n}.stdout'
    library('snpGeneSets')
    library('dplyr')
    # Import the sumstats file as dataframe
    data <- read.table(gzfile('${_input}'), header=T)
    head(data)
    # Filter SNPs with p-val <5e-06
    # Subset data to obtain only chr, pos and snp for gene mapping
    sig.p <- data %>%
      filter(P < 5e-8) %>%
      mutate(chr = CHR,
             pos = ${bp},
             snp = as.character(${snp})) %>%
      select(chr, pos, snp)
    head(sig.p)
    # Get the annotation of SNPs with different genome assemblies
    snpMapAnn<- getSNPMap(sig.p$snp, GRCh=${hg})
    # Mapping SNPs to genes (define gene boundary ‘up’ for the upstream region and ‘down’ for the downstream region with default value of 2,000 bp for both)
    snpGeneMapAnn<- snp2Gene(snpMapAnn$rsid_map$snp)
    cat("The unique number of genes is",length(unique(snpGeneMapAnn$map$gene_id),"\n"))
    cat("The number of variants that could not be mapped to a gene is:",length(snpGeneMapAnn$other),"\n")
    #Get the gene-name and gene-id for the mapped variants
    gene_mapped <- getGeneMap(snpGeneMapAnn$map$gene_id)$gene_map
    # Merge the datasets
    snp_gene = merge(x = snpMapAnn37$rsid_map,y = snpGeneMapAnn$map[,c("snp", "gene_id")],by="snp", all.x=TRUE)
    snp_gene_2 = merge(x = snp_gene,y = gene_mapped[,c("gene_id", "gene_name")],by="gene_id", all.x=TRUE)
    names(snp_gene_2)[names(snp_gene_2) == 'snp'] <- 'SNP'
    snp_gene_3 = merge(x = snp_gene_2,y = data[,c("A1", "A2", "N", "AF1","P","BETA", "SE", "INFO","SNP")],by="SNP", all.x=TRUE)
    # Get the final table with ordered pval
    final_gene_set <- snp_gene_3 %>%
     select(chr, ${snp}, pos, A1, A2, N, AF1, BETA, SE, ${pval}, INFO, gene_id, gene_name) %>%
     arrange(P)
    names(final_gene_set)[names(final_gene_set) == 'chr'] <- 'CHR'
    names(final_gene_set)[names(final_gene_set) == 'pos'] <- 'POS'
    # Write results to a table
    write.table(final_gene_set, '${_output}', sep = "\t", quote=FALSE, row.names=FALSE)

In [None]:
# Merge all the bimfiles into a single file to use later with awk
# Only need to run this cell once
[bim_merge]
# Path to the original bim files
paremeter: bimfiles = path
# Name for the merged bim files
parameter: bim_name = path
input: bimfiles
output: bim_name
task: trunk_workers = 1, walltime = '10h', mem = '10G', cores = numThreads, tags = f'{step_name}_{_output:bn}'
bash: expand= "${ }", stderr = f'{_output:n}.stderr', stdout = f'{_output:n}.stdout' 
      cat ${_input} > ${_output}

In [None]:
# Get the list of significantly associated SNPs
[annovar_1]
# Column name for BP
parameter: bp = 'POS'
# Column name for p-value
parameter: pval = 'P'
# Column name for SNP
parameter: snp = 'SNP'
input: sumstatsFile
output: f'{cwd}/{_input:bnn}.snp_annotate'
task: trunk_workers = 1, trunk_size = job_size, walltime = '3h', mem = '10G', tags = f'{step_name}_{_output:bn}'
R: expand='${ }', stderr = f'{_output}.stderr', stdout = f'{_output}.stdout'
    library('dplyr')
    # Import the sumstats file as dataframe
    data <- read.table(gzfile('${_input}'), header=T)
    # Filter SNPs with p-val <5e-06
    # Subset data to obtain only chr, pos and snp for gene mapping
    sig.p <- data %>%
      filter(P < 5e-8) %>%
      select(SNP)
    write.table(sig.p, '${_output}', sep = " ", quote=FALSE, row.names=FALSE, col.names=FALSE) 

In [None]:
# Get chr, start, end, ref_allele, alt_allele format
[annovar_2]
parameter: bim_name = path
output: f'{_input:n}.avinput'
task: trunk_workers = 1, walltime = '10h', mem = '10G', cores = numThreads, tags = f'{step_name}_{_output:bn}'
bash: expand= "${ }", stderr = f'{_output}.stderr', stdout = f'{_output}.stdout' 
    awk -F" " 'FNR==NR {lines[$1]; next} $2 in lines ' ${_input} ${bim_name} > ${_output:n}.tmp
    awk '{if ($2 ~ /D/) {print $1, $4, $4 + (length ($6) - length ($5)), $6, $5 } else {print $1, $4, $4, $6, $5 }}'  ${_output:n}.tmp >  ${_output}
    # remove temporary files
    rm -f ${_output:n}.tmp 

In [None]:
# Annotate variants file using ANNOVAR
[annovar_3]
# humandb path for ANNOVAR
parameter: humandb = path
# Human genome build
parameter: build = 'hg38'
#add xreffile to option without -exonicsplicing
#mart_export_2019_LOFtools3.txt #xreffile latest option -> Phenotype description,HGNC symbol,MIM morbid description,CGD_CONDITION,CGD_inh,CGD_man,CGD_comm,LOF_tools
#parameter: x_ref = path(f"{humandb}/mart_export_2019_LOFtools3.txt")
# Annovar protocol
parameter: protocol = ['refGene', 'refGeneWithVer', 'knownGene', 'ensGene', 'phastConsElements30way', 'encRegTfbsClustered', 'gwasCatalog', 'gnomad211_genome', 'gnomad211_exome', 'gme', 'kaviar_20150923', 'abraom', 'avsnp150', 'dbnsfp41a', 'dbscsnv11', 'regsnpintron', 'clinvar_20200316', 'gene4denovo201907']
# Annovar operation
parameter: operation = ['g', 'g', 'g', 'g', 'r', 'r', 'r', 'f', 'f', 'f', 'f', 'f', 'f', 'f', 'f', 'f', 'f', 'f']
# Annovar args
parameter: arg = ['"-splicing 12 -exonicsplicing"', '"-splicing 30"', '"-splicing 12 -exonicsplicing"', '"-splicing 12"', '', '', '', '', '', '', '', '', '', '', '', '', '', '']
output: f'{cwd}/{_input:bn}.{build}_multianno.csv'
task: trunk_workers = 1, walltime = '10h', mem = '30G', cores = numThreads, tags = f'{step_name}_{_output:bn}', template = '{cmd}' if executable('annotate_variation.pl').target_exists() else annovar_module
bash: container=container_annovar, volumes=[f'{humandb:a}:{humandb:a}'], expand="${ }", stderr=f'{_output}.stderr', stdout=f'{_output}.stdout'
    #do not add -intronhgvs as option -> writes cDNA variants as HGVS but creates issues (+2 splice site reported only)
    #-nastring . can only be . for VCF files
    #regsnpintron might cause shifted lines (be carefull using)
    table_annovar.pl \
        ${_input} \
        ${humandb} \
        -buildver ${build} \
        -out ${_output:nn}\
        -remove \
        -polish \
        -nastring . \
        -protocol ${",".join(protocol)} \
        -operation ${",".join(operation)} \
        -arg ${",".join(arg)} \
        -csvout 

## To run this notebook

In [6]:
cwd=/home/dc2325/scratch60/output/
sumstatsFile=/gpfs/gibbs/pi/dewan/data/UKBiobank/results/REGENIE_results/results_exome_data/f3393_hearing_aid_exomes/010421_UKBB_Hearing_aid_f3393_128254ind_exomes_hearing_aid_cat.regenie.snp_stats.gz
hg=38
job_size=1
container_annovar=/home/dc2325/scratch60/annovar.sif
bimfiles=`echo /gpfs/gibbs/pi/dewan/data/UKBiobank/genotype_files/ukb28374_exomedata/exome_data_OCT2020/ukb23155_c{1..22}_b0_v1.bim`
bim_name=/home/dc2325/scratch60/output/ukb23155_chr1_chr22.bim
humandb=/gpfs/ysm/datasets/db/annovar/humandb

sos run ~/project/UKBB_GWAS_dev/workflow/snptogene.ipynb annovar \
    --cwd $cwd \
    --sumstatsFile $sumstatsFile\
    --bim_name $bim_name \
    --hg $hg \
    --job_size $job_size \
    --humandb $humandb\
    --container_annovar $container_annovar\
    -s build

INFO: Running [32mannovar_1[0m: Get the list of significantly associated SNPs
INFO: [32mannovar_1[0m is [32mcompleted[0m.
INFO: [32mannovar_1[0m output:   [32m/home/dc2325/scratch60/output/010421_UKBB_Hearing_aid_f3393_128254ind_exomes_hearing_aid_cat.regenie.snp_annotate[0m
INFO: Running [32mannovar_2[0m: Get chr, start, end, ref_allele, alt_allele format
INFO: [32mannovar_2[0m is [32mcompleted[0m.
INFO: [32mannovar_2[0m output:   [32m/home/dc2325/scratch60/output/010421_UKBB_Hearing_aid_f3393_128254ind_exomes_hearing_aid_cat.regenie.avinput[0m
INFO: Running [32mannovar_3[0m: Annotate variants file using ANNOVAR
INFO: [32mannovar_3[0m is [32mcompleted[0m.
INFO: [32mannovar_3[0m output:   [32m/home/dc2325/scratch60/output/010421_UKBB_Hearing_aid_f3393_128254ind_exomes_hearing_aid_cat.regenie.hg38_multianno.csv[0m
INFO: Workflow annovar (ID=w85450d9626e3d7a0) is executed successfully with 3 completed steps.


In [1]:
cd /home/dc2325/scratch60/output/
awk 'NR==FNR{seen[$0]=1; next} seen[$0]' 010421_UKBB_f2247_f2257_136862ind_exomes_f2247_f2257.regenie.avinput 010421_UKBB_Hearing_difficulty_f2247_171970ind_exomes_hearing_diff_new.regenie.avinput

1 58468867 58468867 T C
1 58468874 58468874 T C
1 58468970 58468970 G T
1 58506159 58506159 G A
1 58506268 58506268 C A
1 58539307 58539307 C T
5 272741 272741 A G
5 272748 272748 G C
5 272755 272755 A G
5 73773756 73773756 A G
5 73776529 73776529 T C
5 73780632 73780632 G A
5 73780649 73780650 GT G
5 73780686 73780686 C A
5 73794340 73794340 A AGTT
5 73794436 73794436 T C
5 73795301 73795301 T A
5 73795403 73795403 C T
6 43301291 43301291 A C
6 43302413 43302413 C T
6 43305866 43305866 A G
6 43308652 43308652 G A
6 75841299 75841299 A G
6 158071628 158071628 C T
8 86556416 86556416 T G
8 86558437 86558437 A C
8 86558500 86558500 C T
22 38086345 38086387 GTGCGGGAGCGGGACTGGCCATCCCAGTACTCCGAGGGTGCTA G
22 50549676 50549676 G A



In [2]:
awk 'NR==FNR{seen[$0]=1; next} seen[$0]' 010421_UKBB_Hearing_difficulty_f2247_171970ind_exomes_hearing_diff_new.regenie.avinput 010421_UKBB_Hearing_background_noise_f2257_175531ind_exomes_hearing_noise_cat.regenie.avinput

5 272741 272741 A G
5 272748 272748 G C
5 272755 272755 A G
6 43301291 43301291 A C
6 43305866 43305866 A G
6 43308652 43308652 G A
6 75841299 75841299 A G



In [4]:
awk 'NR==FNR{seen[$0]=1; next} seen[$0]' 010421_UKBB_Hearing_background_noise_f2257_175531ind_exomes_hearing_noise_cat.regenie.avinput 010421_UKBB_Hearing_difficulty_f2247_171970ind_exomes_hearing_diff_new.regenie.avinput 

5 272741 272741 A G
5 272748 272748 G C
5 272755 272755 A G
6 43301291 43301291 A C
6 43305866 43305866 A G
6 43308652 43308652 G A
6 75841299 75841299 A G



In [5]:
awk 'NR==FNR{seen[$0]=1; next} seen[$0]' 010421_UKBB_Hearing_difficulty_f2247_171970ind_exomes_hearing_diff_new.regenie.avinput 010421_UKBB_Hearing_aid_f3393_128254ind_exomes_hearing_aid_cat.regenie.avinput

1 58468867 58468867 T C
1 58468874 58468874 T C
1 58468970 58468970 G T
1 58539307 58539307 C T
6 75841299 75841299 A G
8 86556416 86556416 T G
8 86558437 86558437 A C
8 86558500 86558500 C T
10 7292138 7292138 G A
17 41827392 41827392 A C
17 44848191 44848191 C T



In [6]:
awk 'NR==FNR{seen[$0]=1; next} seen[$0]' 010421_UKBB_f2247_f2257_136862ind_exomes_f2247_f2257.regenie.avinput 010421_UKBB_Hearing_aid_f3393_128254ind_exomes_hearing_aid_cat.regenie.avinput

1 58468867 58468867 T C
1 58468874 58468874 T C
1 58468970 58468970 G T
1 58539307 58539307 C T
6 75362956 75362956 T C
6 75841299 75841299 A G
8 86556416 86556416 T G
8 86558437 86558437 A C
8 86558500 86558500 C T



In [8]:
zcat /gpfs/gibbs/pi/dewan/data/UKBiobank/results/REGENIE_results/results_exome_data/f3393_hearing_aid_exomes/010421_UKBB_Hearing_aid_f3393_128254ind_exomes_hearing_aid_cat.regenie.snp_stats.gz | grep -w -f /home/dc2325/scratch60/output/010421_UKBB_Hearing_aid_f3393_128254ind_exomes_hearing_aid_cat.regenie.snp_annotate

1	58468867	T	C	1:58468867:T:C	-0.168986	0.021412099999999996	7.055048440506522e-16
1	58468874	T	C	1:58468874:T:C	-0.168015	0.0213657	9.11381062129295e-16
1	58468970	G	T	1:58468970:G:T	-0.14103	0.0156138	6.13903345360935e-20
1	58539307	C	T	1:58539307:C:T	-0.13430799999999998	0.0143388	3.3853221281535305e-21
2	168834219	T	C	2:168834219:T:C	-0.130908	0.021813	1.029437536659268e-09
6	75362956	T	C	6:75362956:T:C	2.48143	0.369753	8.570575794832358e-09
6	75841299	A	G	6:75841299:A:G	2.9806	0.407723	1.2227847112296838e-10
8	86556416	T	G	8:86556416:T:G	-0.115771	0.017346200000000003	1.4047531026968791e-11
8	86558437	A	C	8:86558437:A:C	-0.114282	0.0193435	2.110425374828825e-09
8	86558500	C	T	8:86558500:C:T	-0.116954	0.019379	9.270219126904037e-10
10	7243670	A	G	10:7243670:A:G	-0.0980153	0.0159944	1.0312880846394044e-09
10	7276841	T	C	10:7276841:T:C	-0.133236	0.0219836	7.090877782620701e-10
10	7277326	C	A	10:7277326:C:A	-0.143172	0.0223905	7.409689339136985e-11
10	7292138	G	A	10:7292138:G:A	-0.110

In [9]:
zcat /gpfs/gibbs/pi/dewan/data/UKBiobank/results/REGENIE_results/results_exome_data/f2257_hearing_noise_exomes/010421_UKBB_Hearing_background_noise_f2257_175531ind_exomes_hearing_noise_cat.regenie.snp_stats.gz  | grep -w -f /home/dc2325/scratch60/output/010421_UKBB_Hearing_background_noise_f2257_175531ind_exomes_hearing_noise_cat.regenie.snp_annotate

5	272741	A	G	5:272741:A:G	0.897546	0.128497	1.2220810223250552e-12
5	272748	G	C	5:272748:G:C	0.8718739999999999	0.139898	2.4925277377693566e-10
5	272755	A	G	5:272755:A:G	0.8530200000000001	0.12476400000000001	4.0766178334794515e-12
6	43301291	A	C	6:43301291:A:C	0.04535719999999999	0.00720349	2.987376772921112e-10
6	43305866	A	G	6:43305866:A:G	0.0454121	0.00720402	2.852462870044014e-10
6	43308652	G	A	6:43308652:G:A	0.044663800000000003	0.00718951	5.126842785315215e-10
6	75841299	A	G	6:75841299:A:G	2.93015	0.673602	3.333419717618388e-10



In [11]:
zcat /gpfs/gibbs/pi/dewan/data/UKBiobank/results/REGENIE_results/results_exome_data/f2247_hearing_difficulty_exomes/010421_UKBB_Hearing_difficulty_f2247_171970ind_exomes_hearing_diff_new.regenie.snp_stats.gz | grep -w -f /home/dc2325/scratch60/output/010421_UKBB_Hearing_difficulty_f2247_171970ind_exomes_hearing_diff_new.regenie.snp_annotate

1	58468867	T	C	1:58468867:T:C	0.0717153	0.00923927	1.1587773561551261e-14
1	58468874	T	C	1:58468874:T:C	0.0715995	0.00922922	1.1937132101928058e-14
1	58468970	G	T	1:58468970:G:T	0.0593697	0.00684749	5.851941823780803e-18
1	58506159	G	A	1:58506159:G:A	0.0802067	0.013556499999999999	4.0267994442394125e-09
1	58506268	C	A	1:58506268:C:A	0.08213530000000001	0.013631799999999998	2.092088248507645e-09
1	58539307	C	T	1:58539307:C:T	0.0580006	0.00632463	6.44465984741689e-20
5	272741	A	G	5:272741:A:G	1.19275	0.125574	4.1152857843551e-21
5	272748	G	C	5:272748:G:C	1.20544	0.13772	3.4268887043278663e-18
5	272755	A	G	5:272755:A:G	1.17113	0.122546	2.5409727055493115e-21
5	73773756	A	G	5:73773756:A:G	0.0452337	0.00785184	8.259999099997013e-09
5	73776529	T	C	5:73776529:T:C	0.04761169999999999	0.0078087	1.0857754055343505e-09
5	73780632	G	A	5:73780632:G:A	0.0471588	0.00780284	1.5153396952829279e-09
5	73780649	GT	G	5:73780649:D:1	0.04820119999999999	0.00790071	1.0603274386034904e-09
5	73780686	C	A	5:7378

In [13]:
zcat /gpfs/gibbs/pi/dewan/data/UKBiobank/results/REGENIE_results/results_exome_data/f2247_f2257_combined_exomes/010421_UKBB_f2247_f2257_136862ind_exomes_f2247_f2257.regenie.snp_stats.gz | grep -w -f /home/dc2325/scratch60/output/010421_UKBB_f2247_f2257_136862ind_exomes_f2247_f2257.regenie.snp_annotate

1	46132597	C	A	1:46132597:C:A	-0.048425199999999995	0.00873988	2.9676316895317354e-08
1	58468867	T	C	1:58468867:T:C	0.0679535	0.010248700000000001	4.1133910615245514e-11
1	58468874	T	C	1:58468874:T:C	0.0689153	0.0102364	2.0682355559317648e-11
1	58468970	G	T	1:58468970:G:T	0.0606005	0.00760586	2.052579560952533e-15
1	58506159	G	A	1:58506159:G:A	0.0846227	0.0150871	2.4171299521044176e-08
1	58506268	C	A	1:58506268:C:A	0.0862086	0.0151635	1.5624999063995115e-08
1	58539307	C	T	1:58539307:C:T	0.0607587	0.00701922	6.366489102003704e-18
5	272741	A	G	5:272741:A:G	1.3423	0.14832599999999999	6.737518954705801e-20
5	272748	G	C	5:272748:G:C	1.33381	0.162199	1.0801896208818909e-16
5	272755	A	G	5:272755:A:G	1.30036	0.143215	6.297961479295158e-20
5	73773756	A	G	5:73773756:A:G	0.052482299999999996	0.00871943	1.727547510306292e-09
5	73776529	T	C	5:73776529:T:C	0.0514956	0.00867282	2.9077696549191336e-09
5	73780632	G	A	5:73780632:G:A	0.0511833	0.00866646	3.527361740282659e-09
5	73780649	GT	G	5:73780649:D

In [24]:
zcat /gpfs/gibbs/pi/dewan/data/UKBiobank/results/FastGWA_results/results_imputed_data/f2247_f2257_combined/200828_UKBB_f2247_f2257_f2247_f2257.fastGWA.snp_stats.gz | awk '{print $5}' | grep -w -f /home/dc2325/scratch60/output/010421_UKBB_f2247_f2257_136862ind_exomes_f2247_f2257.regenie.hg38_multianno.csv.hg38_multianno.snplist

rs1707304
rs697591
rs822062
rs338240
rs3087585
rs11207245
rs4704097
rs7714670
rs11949860
rs6453022
rs10683146
rs7716253
rs2973549
rs2973548
rs1574430
rs2270860
rs2242416
rs2254303
rs146694394
rs2304787
rs1062850
rs1062851
rs36062310



In [20]:
zcat /gpfs/gibbs/pi/dewan/data/UKBiobank/results/FastGWA_results/results_imputed_data/f2247_hearing_difficulty/200828_UKBB_Hearing_difficulty_f2247_hearing_diff_new.fastGWA.snp_stats.gz | awk '{print $5}' | grep -w -f /home/dc2325/scratch60/output/010421_UKBB_Hearing_difficulty_f2247_171970ind_exomes_hearing_diff_new.regenie.hg38_multianno.snplist 

rs697591
rs822062
rs338240
rs3087585
rs11207245
rs4704097
rs7714670
rs11949860
rs6453022
rs10683146
rs7716253
rs2973549
rs2973548
rs1574430
rs2270860
rs2242416
rs2254303
rs9493627
rs146694394
rs2304787
rs1062850
rs1062851
rs749165
rs112243842
rs9896766
rs12164
rs36062310



In [21]:
zcat /gpfs/gibbs/pi/dewan/data/UKBiobank/results/FastGWA_results/results_imputed_data/f2257_hearing_background_noise/200828_UKBB_Hearing_background_noise_f2257_hearing_noise_cat.fastGWA.snp_stats.gz | awk '{print $5}' | grep -w -f /home/dc2325/scratch60/output/010421_UKBB_Hearing_background_noise_f2257_175531ind_exomes_hearing_noise_cat.regenie.hg38_multianno.snplist

rs1574430
rs2242416
rs2254303



In [25]:
zcat /gpfs/gibbs/pi/dewan/data/UKBiobank/results/FastGWA_results/results_imputed_data/f3393_hearing_aid/200828_UKBB_Hearing_aid_f3393_hearing_aid_cat.fastGWA.snp_stats.gz | awk '{print $5}' | grep -w -f /home/dc2325/scratch60/output/010421_UKBB_Hearing_aid_f3393_128254ind_exomes_hearing_aid_cat.regenie.hg38_multianno.snplist




### To get the snps from the csv file

In [None]:
cat 010421_UKBB_Hearing_difficulty_f2247_171970ind_exomes_hearing_diff_new.regenie.hg38_multianno.csv | awk -vFPAT='([^,]*)|("[^"]+")' -vOFS=, '{print $77}' | sed 's/"//g' > 010421_UKBB_Hearing_difficulty_f2247_171970ind_exomes_hearing_diff_new.regenie.hg38_multianno.snplist


In [None]:
cat 010421_UKBB_Hearing_background_noise_f2257_175531ind_exomes_hearing_noise_cat.regenie.hg38_multianno.csv | awk -vFPAT='([^,]*)|("[^"]+")' -vOFS=, '{print $77}' | sed 's/"//g' > 010421_UKBB_Hearing_background_noise_f2257_175531ind_exomes_hearing_noise_cat.regenie.hg38_multianno.snplist


In [None]:
cat 010421_UKBB_Hearing_aid_f3393_128254ind_exomes_hearing_aid_cat.regenie.hg38_multianno.csv | awk -vFPAT='([^,]*)|("[^"]+")' -vOFS=, '{print $77}' | sed 's/"//g' > 010421_UKBB_Hearing_aid_f3393_128254ind_exomes_hearing_aid_cat.regenie.hg38_multianno.snplist