In [11]:
import gwaslab as gl

### Check reference and download

In [12]:
gl.check_available_ref()

2026/01/12 11:26:37 Start to check available reference files...
2026/01/12 11:26:37  - Available keywords:  1kg_eas_hg19 1kg_eur_hg19 1kg_eas_hg38 1kg_eur_hg38 1kg_sas_hg19 1kg_amr_hg19 1kg_sas_hg38 1kg_amr_hg38 1kg_afr_hg19 1kg_pan_hg19 1kg_afr_hg38 1kg_pan_hg38 dbsnp_v151_hg19 dbsnp_v151_hg38 dbsnp_v157_hg19 dbsnp_v157_hg38 ucsc_genome_hg19 ucsc_genome_hg38 1kg_dbsnp151_hg19_auto 1kg_dbsnp151_hg38_auto recombination_hg19 recombination_hg38 ensembl_hg19_gtf ensembl_hg38_gtf refseq_hg19_gtf refseq_hg38_gtf testlink 19to38 38to19 1kg_hm3_hg38_eaf 1kg_hm3_hg19_eaf


{'1kg_eas_hg19': {'description': '1000 Genomes Project East Asian (1KG EAS) VCF on the hg19 reference. Multi-allelic variants were decomposed. Variants were normalized. The INFO field includes the AF annotation, representing allele frequency in the EAS population.',
  'suggested_use': 'LD reference panel for creating region plot; infer strand for EAS population'},
 '1kg_eur_hg19': {'description': '1000 Genomes Project European (1KG EUR) VCF on the hg19 reference. Multi-allelic variants were decomposed. Variants were normalized. The INFO field includes the AF annotation, representing allele frequency in the EUR population.',
  'suggested_use': 'LD reference panel for creating region plot; infer strand for EUR population'},
 '1kg_eas_hg38': {'description': '1000 Genomes Project East Asian (1KG EAS) VCF on the hg38 reference. Multi-allelic variants were decomposed. Variants were normalized. The INFO field includes the AF annotation, representing allele frequency in the EAS population.',
 

In [13]:
gl.download_ref("1kg_dbsnp151_hg38_auto")

2026/01/12 11:26:41 Start to download  1kg_dbsnp151_hg38_auto  ...
2026/01/12 11:26:41  -Downloading to: /home/ofgeha/.gwaslab/1kg_dbsnp151_hg38_auto.txt.gz
2026/01/12 11:26:41  -File /home/ofgeha/.gwaslab/1kg_dbsnp151_hg38_auto.txt.gz exists.
2026/01/12 11:26:41  - Updating record in config file...
2026/01/12 11:26:41 Downloaded  1kg_dbsnp151_hg38_auto  successfully!


### Load sample data

In [14]:
mysumstats = gl.Sumstats(
    "/mnt/hdd_1/ofgeha/galaxy-gwas-tools/Data/21001_T2T_liftover.tsv.gz",
    snpid="SNPID",
    chrom="CHR",
    pos="POS",
    ea="EA",            # In your data, it's now EA (was ALT)
    nea="NEA",          # In your data, it's now NEA (was REF)
    eaf="EAF",          # In your data, it's now EAF (was minor_AF/Frq)
    beta="BETA",
    se="SE",
    p="P",
    # Note: 'Dir' and 'N' are not in your column list, 
    # so we should omit them or point to the correct ones.
    # n="n_complete_samples", # Optional: if you want to use sample size
    verbose=True,
    readargs={'sep': '\t'}
)

2026/01/12 11:26:43 GWASLab v4.0.4 https://cloufield.github.io/gwaslab/
2026/01/12 11:26:43 (C) 2022-2026, Yunye He, Kamatani Lab, GPL-3.0 license, gwaslab@gmail.com
2026/01/12 11:26:43 Python version: 3.12.3 (main, Nov  6 2025, 13:44:16) [GCC 13.3.0]
2026/01/12 11:26:43 Start to initialize gl.Sumstats from file :/mnt/hdd_1/ofgeha/galaxy-gwas-tools/Data/21001_T2T_liftover.tsv.gz


2026/01/12 11:27:08  -Reading columns          : NEA,SNPID,EAF,POS,CHR,EA,BETA,SE,P
2026/01/12 11:27:08  -Renaming columns to      : NEA,SNPID,EAF,POS,CHR,EA,BETA,SE,P
2026/01/12 11:27:08  -Current Dataframe shape : 13791467  x  9
2026/01/12 11:27:08  -Initiating a status column: STATUS ...
2026/01/12 11:27:11 Start to reorder the columns ...(v4.0.4)
2026/01/12 11:27:11  -Reordering columns to    : SNPID,CHR,POS,EA,NEA,STATUS,EAF,BETA,SE,P
2026/01/12 11:27:12 Finished reordering the columns.
2026/01/12 11:27:12  -Trying to convert datatype for CHR: string -> Int64...Success
2026/01/12 11:27:14  -Column  : SNPID  CHR   POS   EA       NEA      STATUS EAF     BETA    SE      P      
2026/01/12 11:27:14  -DType   : object Int64 int64 category category int64  float64 float64 float64 float64
2026/01/12 11:27:14  -Verified: T      T     T     T        T        T      T       T       T       T      
2026/01/12 11:27:15  -Current Dataframe memory usage: 963.86 MB
2026/01/12 11:27:15 Finished lo

In [15]:
mysumstats.data

Unnamed: 0,SNPID,CHR,POS,EA,NEA,STATUS,EAF,BETA,SE,P
0,1:15791:C:T,1,15791,T,C,9999999,1.000000,894.616000,1204.870000,0.457786
1,1:69487:G:A,1,69487,A,G,9999999,0.999994,-2.715450,2.360060,0.249902
2,1:69569:T:C,1,69569,C,T,9999999,0.999812,-0.484284,0.423462,0.252778
3,1:139853:C:T,1,139853,T,C,9999999,0.999994,-2.703560,2.360130,0.251997
4,1:692794:CA:C,1,692794,C,CA,9999999,0.889410,-0.016436,0.019585,0.401342
...,...,...,...,...,...,...,...,...,...,...
13791462,X:154929412:C:T,23,154929412,T,C,9999999,0.754527,-0.016260,0.010723,0.129427
13791463,X:154929637:CT:C,23,154929637,C,CT,9999999,0.770274,-0.027098,0.011190,0.015456
13791464,X:154929952:CAA:C,23,154929952,C,CAA,9999999,0.760570,-0.020494,0.011278,0.069202
13791465,X:154930230:A:G,23,154930230,G,A,9999999,0.754113,-0.016347,0.010721,0.127334


In [16]:
mysumstats.basic_check()

2026/01/12 11:27:28 Start to check SNPID/rsID ...(v4.0.4)
2026/01/12 11:27:28  -Current Dataframe shape : 13791467 x 10 ; Memory usage: 963.86 MB
2026/01/12 11:27:28  -Checking SNPID data type...
2026/01/12 11:27:28  -Converted datatype for SNPID: object -> string
2026/01/12 11:27:28  -Checking if SNPID contains NA strings :na,NA,Na,Nan,NaN,<NA>,null,NULL,#N/A,#VALUE!,N/A,n/a,missing,...
2026/01/12 11:27:29  -Checking if SNPID is CHR:POS:NEA:EA...(separator: - ,: , _)
2026/01/12 11:27:39 Finished checking SNPID/rsID.
2026/01/12 11:27:39 Start to fix chromosome notation (CHR) ...(v4.0.4)
2026/01/12 11:27:39  -Checking CHR data type...
2026/01/12 11:27:43  -Variants with standardized chromosome notation: 13791467
2026/01/12 11:27:43  -All CHR are already fixed...
2026/01/12 11:27:46 Finished fixing chromosome notation (CHR).
2026/01/12 11:27:46 Start to fix basepair positions (POS) ...(v4.0.4)
2026/01/12 11:27:46  -Trying to convert datatype for POS: int64 -> Int64...
2026/01/12 11:27:47

Unnamed: 0,SNPID,CHR,POS,EA,NEA,STATUS,EAF,BETA,SE,P
0,1:69487:G:A,1,69487,A,G,9960099,0.999994,-2.715450,2.360060,0.249902
1,1:69569:T:C,1,69569,C,T,9960099,0.999812,-0.484284,0.423462,0.252778
2,1:139853:C:T,1,139853,T,C,9960099,0.999994,-2.703560,2.360130,0.251997
3,1:692794:CA:C,1,692794,C,CA,9960399,0.889410,-0.016436,0.019585,0.401342
4,1:693731:A:G,1,693731,G,A,9960099,0.884233,-0.004255,0.018507,0.818155
...,...,...,...,...,...,...,...,...,...,...
13779880,X:154929412:C:T,23,154929412,T,C,9960099,0.754527,-0.016260,0.010723,0.129427
13779881,X:154929637:CT:C,23,154929637,C,CT,9960399,0.770274,-0.027098,0.011190,0.015456
13779882,X:154929952:CAA:C,23,154929952,C,CAA,9960399,0.760570,-0.020494,0.011278,0.069202
13779883,X:154930230:A:G,23,154930230,G,A,9960099,0.754113,-0.016347,0.010721,0.127334


In [17]:
mysumstats.fix_id(fixsep=True)

2026/01/12 11:30:04 Start to check SNPID/rsID ...(v4.0.4)
2026/01/12 11:30:04  -Checking SNPID data type...
2026/01/12 11:30:04  -Checking if SNPID contains NA strings :na,NA,Na,Nan,NaN,<NA>,null,NULL,#N/A,#VALUE!,N/A,n/a,missing,...
2026/01/12 11:30:04  -Checking if SNPID is CHR:POS:NEA:EA...(separator: - ,: , _)
2026/01/12 11:30:14  -Replacing separators in SNPID with ":" ...
2026/01/12 11:30:20 Finished checking SNPID/rsID.


Unnamed: 0,SNPID,CHR,POS,EA,NEA,STATUS,EAF,BETA,SE,P
0,1:69487:G:A,1,69487,A,G,9960099,0.999994,-2.715450,2.360060,0.249902
1,1:69569:T:C,1,69569,C,T,9960099,0.999812,-0.484284,0.423462,0.252778
2,1:139853:C:T,1,139853,T,C,9960099,0.999994,-2.703560,2.360130,0.251997
3,1:692794:CA:C,1,692794,C,CA,9960399,0.889410,-0.016436,0.019585,0.401342
4,1:693731:A:G,1,693731,G,A,9960099,0.884233,-0.004255,0.018507,0.818155
...,...,...,...,...,...,...,...,...,...,...
13779880,X:154929412:C:T,23,154929412,T,C,9960099,0.754527,-0.016260,0.010723,0.129427
13779881,X:154929637:CT:C,23,154929637,C,CT,9960399,0.770274,-0.027098,0.011190,0.015456
13779882,X:154929952:CAA:C,23,154929952,C,CAA,9960399,0.760570,-0.020494,0.011278,0.069202
13779883,X:154930230:A:G,23,154930230,G,A,9960099,0.754113,-0.016347,0.010721,0.127334


In [18]:
mysumstats.data

Unnamed: 0,SNPID,CHR,POS,EA,NEA,STATUS,EAF,BETA,SE,P
0,1:69487:G:A,1,69487,A,G,9960099,0.999994,-2.715450,2.360060,0.249902
1,1:69569:T:C,1,69569,C,T,9960099,0.999812,-0.484284,0.423462,0.252778
2,1:139853:C:T,1,139853,T,C,9960099,0.999994,-2.703560,2.360130,0.251997
3,1:692794:CA:C,1,692794,C,CA,9960399,0.889410,-0.016436,0.019585,0.401342
4,1:693731:A:G,1,693731,G,A,9960099,0.884233,-0.004255,0.018507,0.818155
...,...,...,...,...,...,...,...,...,...,...
13779880,X:154929412:C:T,23,154929412,T,C,9960099,0.754527,-0.016260,0.010723,0.129427
13779881,X:154929637:CT:C,23,154929637,C,CT,9960399,0.770274,-0.027098,0.011190,0.015456
13779882,X:154929952:CAA:C,23,154929952,C,CAA,9960399,0.760570,-0.020494,0.011278,0.069202
13779883,X:154930230:A:G,23,154930230,G,A,9960099,0.754113,-0.016347,0.010721,0.127334


### Assign rsID

#### assign rsID using a SNPID-rsID table (variants in 1KG)

In [19]:
mysumstats.assign_rsid(ref_rsid_tsv=gl.get_path("1kg_dbsnp151_hg38_auto"))

2026/01/12 11:30:35 Start to assign rsID using reference file ...(v4.0.4)
2026/01/12 11:30:35  -Number of threads/cores to use: 1


2026/01/12 11:30:35  -13779885 rsID could be possibly fixed...
2026/01/12 11:30:36  -Setting block size:  5000000
2026/01/12 11:30:36  -Loading block: 0   1   2   3   4   5   6   7   8   9   10   11   12   13   
2026/01/12 11:37:02  -rsID annotation for 13638696 variants needed to be fixed!
2026/01/12 11:37:02  -Annotated 141189 rsID successfully!
2026/01/12 11:37:02  -Current Dataframe shape : 13779885 x 11 ; Memory usage: 1043.38 MB
2026/01/12 11:37:02 Finished assign rsID using reference file.


Unnamed: 0,SNPID,CHR,POS,EA,NEA,STATUS,EAF,BETA,SE,P,rsID
0,1:69487:G:A,1,69487,A,G,9960099,0.999994,-2.715450,2.360060,0.249902,
1,1:69569:T:C,1,69569,C,T,9960099,0.999812,-0.484284,0.423462,0.252778,
2,1:139853:C:T,1,139853,T,C,9960099,0.999994,-2.703560,2.360130,0.251997,rs533633326
3,1:692794:CA:C,1,692794,C,CA,9960399,0.889410,-0.016436,0.019585,0.401342,
4,1:693731:A:G,1,693731,G,A,9960099,0.884233,-0.004255,0.018507,0.818155,
...,...,...,...,...,...,...,...,...,...,...,...
13779880,X:154929412:C:T,23,154929412,T,C,9960099,0.754527,-0.016260,0.010723,0.129427,
13779881,X:154929637:CT:C,23,154929637,C,CT,9960399,0.770274,-0.027098,0.011190,0.015456,
13779882,X:154929952:CAA:C,23,154929952,C,CAA,9960399,0.760570,-0.020494,0.011278,0.069202,
13779883,X:154930230:A:G,23,154930230,G,A,9960099,0.754113,-0.016347,0.010721,0.127334,


In [20]:
mysumstats.data.columns

Index(['SNPID', 'CHR', 'POS', 'EA', 'NEA', 'STATUS', 'EAF', 'BETA', 'SE', 'P',
       'rsID'],
      dtype='object')

In [22]:
import os

# 1. Define the base path to your local VCFs
base_ref_path = "/mnt/hdd_1/ofgeha/gwas-sumstats-harmoniser/gwas-ref/homo_sapiens-chr"

# 2. Iterate through chromosomes
# We use .unique() to process the file for each chromosome one by one
for c in mysumstats.data['CHR'].unique():
    chrom_str = str(c)
    ref_file = f"{base_ref_path}{chrom_str}.vcf.gz"
    
    if os.path.exists(ref_file):
        print(f">>> Processing Chromosome {chrom_str}...")
        
        # We tell assign_rsid2 exactly which columns to use
        mysumstats.assign_rsid2(
            vcf_path=ref_file,
            rsid="rsID",      # Target column
            chrom="CHR",     # Source chromosome column
            pos="POS",       # Source position column
            ea="EA",         # Effect allele
            nea="NEA",       # Non-effect allele
            threads=6,
            overwrite="empty",
            verbose="True"
        )
    else:
        print(f"!!! Skipping: {ref_file} not found.")

print("Assignment complete!")

>>> Processing Chromosome 1...
2026/01/12 12:03:54 Start to assign rsID from reference ...(v4.0.4)
2026/01/12 12:03:54  -Number of threads/cores to use: 6


[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


2026/01/12 12:03:55  -Determining reference mode: vcf/bcf...
2026/01/12 12:03:55  -Extracting new lookup TSV from: /mnt/hdd_1/ofgeha/gwas-sumstats-harmoniser/gwas-ref/homo_sapiens-chr1.vcf.gz...


[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


2026/01/12 12:03:55 Start to extract lookup table from vcf/bcf ...(v4.0.4)
2026/01/12 12:03:55  -Number of threads/cores to use: 6
2026/01/12 12:03:55  -Current Dataframe shape : 11707851 x 2 ; Memory usage: 290.30 MB
2026/01/12 12:03:55  -Converting chromosome notation to reference notation...
2026/01/12 12:04:17  -Running multiprocessing: 6 workers, 23 chromosomes
2026/01/12 12:04:17  -Calling: bcftools view -r <CHR> -T <TARGETS> -Ou <VCF>| bcftools query -f '<FMT>'
2026/01/12 12:04:18  -Finished:

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 2

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 3

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 4

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 5

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 6

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 7

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 1 8

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 9

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 10

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 11

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 12

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 13

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 14

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 15

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 16

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 17

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 18

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 19

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 20

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 21

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 22

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 23

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"



2026/01/12 12:08:29  -Converting chromosome notation back to sumstats notation...
2026/01/12 12:08:42  -Lookup table created: /tmp/tmp6hs9hl0h.lookup.txt.gz
2026/01/12 12:08:42 Finished extracting lookup table from vcf/bcf.
2026/01/12 12:08:42 Start to assign from lookup table ...(v4.0.4)
2026/01/12 12:08:42  -Current Dataframe shape : 13779885 x 11 ; Memory usage: 1043.38 MB
2026/01/12 12:08:42  -Initialized ALLELE_FLIPPED column
2026/01/12 12:08:42  -Detected allele mode: REF_ALT using ALT(EA/ALT) / REF(NEA/REF)...
2026/01/12 12:08:45  -Loaded 382,616 lookup rows...
2026/01/12 12:08:47  -Found 37078 flipped variants in this chunk
2026/01/12 12:08:48  -Updated ALLELE_FLIPPED=True for 37078 variants in this chunk
2026/01/12 12:08:48  -Newly annotated sumstats rows: 74,824 (chunk lookup rows: 382,616) | New flips: 74,824
2026/01/12 12:08:48  -Total unique sumstats rows annotated: 74,824
2026/01/12 12:08:48  -Total unique rows with allele flips: 37,078
2026/01/12 12:08:48  -Current Data

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


2026/01/12 12:08:50  -Determining reference mode: vcf/bcf...
2026/01/12 12:08:50  -Extracting new lookup TSV from: /mnt/hdd_1/ofgeha/gwas-sumstats-harmoniser/gwas-ref/homo_sapiens-chr2.vcf.gz...


[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


2026/01/12 12:08:50 Start to extract lookup table from vcf/bcf ...(v4.0.4)
2026/01/12 12:08:50  -Number of threads/cores to use: 6
2026/01/12 12:08:50  -Current Dataframe shape : 11633027 x 2 ; Memory usage: 288.45 MB
2026/01/12 12:08:51  -Converting chromosome notation to reference notation...
2026/01/12 12:09:12  -Running multiprocessing: 6 workers, 23 chromosomes
2026/01/12 12:09:12  -Calling: bcftools view -r <CHR> -T <TARGETS> -Ou <VCF>| bcftools query -f '<FMT>'
2026/01/12 12:09:13  -Finished: 1

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 3

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 4

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 5

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 6

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 7

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 8

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 2 9

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 10

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 11

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 12

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 13

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 14

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 15

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 16

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 17

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 18

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 19

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 20

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 21

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 22

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 23

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"



2026/01/12 12:13:18  -Converting chromosome notation back to sumstats notation...
2026/01/12 12:13:26  -Lookup table created: /tmp/tmp4diespqr.lookup.txt.gz
2026/01/12 12:13:26 Finished extracting lookup table from vcf/bcf.
2026/01/12 12:13:26 Start to assign from lookup table ...(v4.0.4)
2026/01/12 12:13:26  -Current Dataframe shape : 13779885 x 12 ; Memory usage: 1056.53 MB
2026/01/12 12:13:26  -Detected allele mode: REF_ALT using ALT(EA/ALT) / REF(NEA/REF)...
2026/01/12 12:13:27  -Loaded 427,453 lookup rows...
2026/01/12 12:13:28  -Found 40987 flipped variants in this chunk
2026/01/12 12:13:28  -Updated ALLELE_FLIPPED=True for 40987 variants in this chunk
2026/01/12 12:13:29  -Newly annotated sumstats rows: 80,727 (chunk lookup rows: 427,453) | New flips: 80,727
2026/01/12 12:13:29  -Total unique sumstats rows annotated: 80,727
2026/01/12 12:13:29  -Total unique rows with allele flips: 40,987
2026/01/12 12:13:29 Finished assigning from lookup table.
2026/01/12 12:13:29  -Filled 807

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


2026/01/12 12:13:30  -Determining reference mode: vcf/bcf...
2026/01/12 12:13:30  -Extracting new lookup TSV from: /mnt/hdd_1/ofgeha/gwas-sumstats-harmoniser/gwas-ref/homo_sapiens-chr3.vcf.gz...
2026/01/12 12:13:30 Start to extract lookup table from vcf/bcf ...(v4.0.4)
2026/01/12 12:13:30  -Number of threads/cores to use: 6
2026/01/12 12:13:30  -Current Dataframe shape : 11552300 x 2 ; Memory usage: 286.45 MB


[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


2026/01/12 12:13:30  -Converting chromosome notation to reference notation...
2026/01/12 12:13:41  -Running multiprocessing: 6 workers, 23 chromosomes
2026/01/12 12:13:41  -Calling: bcftools view -r <CHR> -T <TARGETS> -Ou <VCF>| bcftools query -f '<FMT>'
2026/01/12 12:13:42  -Finished: 1

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 2

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 4

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 5

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 6

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 7

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 8 3

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 9

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 10

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 11

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 12

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 13

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 14

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 15

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 16

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 17

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 18

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 19

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 20

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 21

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 22

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 23

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"



2026/01/12 12:16:02  -Converting chromosome notation back to sumstats notation...
2026/01/12 12:16:09  -Lookup table created: /tmp/tmpt59msah7.lookup.txt.gz
2026/01/12 12:16:09 Finished extracting lookup table from vcf/bcf.
2026/01/12 12:16:09 Start to assign from lookup table ...(v4.0.4)
2026/01/12 12:16:09  -Current Dataframe shape : 13779885 x 12 ; Memory usage: 1056.53 MB
2026/01/12 12:16:09  -Detected allele mode: REF_ALT using ALT(EA/ALT) / REF(NEA/REF)...
2026/01/12 12:16:10  -Loaded 366,130 lookup rows...
2026/01/12 12:16:11  -Found 35112 flipped variants in this chunk
2026/01/12 12:16:11  -Updated ALLELE_FLIPPED=True for 35112 variants in this chunk
2026/01/12 12:16:12  -Newly annotated sumstats rows: 71,996 (chunk lookup rows: 366,130) | New flips: 71,996
2026/01/12 12:16:12  -Total unique sumstats rows annotated: 71,996
2026/01/12 12:16:12  -Total unique rows with allele flips: 35,112
2026/01/12 12:16:12 Finished assigning from lookup table.
2026/01/12 12:16:12  -Filled 719

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


2026/01/12 12:16:13  -Determining reference mode: vcf/bcf...
2026/01/12 12:16:13  -Extracting new lookup TSV from: /mnt/hdd_1/ofgeha/gwas-sumstats-harmoniser/gwas-ref/homo_sapiens-chr4.vcf.gz...
2026/01/12 12:16:13 Start to extract lookup table from vcf/bcf ...(v4.0.4)
2026/01/12 12:16:13  -Number of threads/cores to use: 6
2026/01/12 12:16:13  -Current Dataframe shape : 11480304 x 2 ; Memory usage: 284.66 MB


[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


2026/01/12 12:16:13  -Converting chromosome notation to reference notation...
2026/01/12 12:16:24  -Running multiprocessing: 6 workers, 23 chromosomes
2026/01/12 12:16:24  -Calling: bcftools view -r <CHR> -T <TARGETS> -Ou <VCF>| bcftools query -f '<FMT>'
2026/01/12 12:16:24  -Finished: 1

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 2

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 3

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 5

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 6

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 7

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 8

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 4 9

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 10

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 11

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 12

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 13

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 14

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 15

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 16

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 17

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 18

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 19

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 20

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 21

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 22

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 23

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"



2026/01/12 12:19:39  -Converting chromosome notation back to sumstats notation...
2026/01/12 12:19:52  -Lookup table created: /tmp/tmp2iq_57yu.lookup.txt.gz
2026/01/12 12:19:52 Finished extracting lookup table from vcf/bcf.
2026/01/12 12:19:52 Start to assign from lookup table ...(v4.0.4)
2026/01/12 12:19:52  -Current Dataframe shape : 13779885 x 12 ; Memory usage: 1056.53 MB
2026/01/12 12:19:52  -Detected allele mode: REF_ALT using ALT(EA/ALT) / REF(NEA/REF)...
2026/01/12 12:19:55  -Loaded 366,246 lookup rows...
2026/01/12 12:19:57  -Found 35110 flipped variants in this chunk
2026/01/12 12:19:57  -Updated ALLELE_FLIPPED=True for 35110 variants in this chunk
2026/01/12 12:19:58  -Newly annotated sumstats rows: 68,409 (chunk lookup rows: 366,246) | New flips: 68,409
2026/01/12 12:19:58  -Total unique sumstats rows annotated: 68,409
2026/01/12 12:19:58  -Total unique rows with allele flips: 35,110
2026/01/12 12:19:58 Finished assigning from lookup table.
2026/01/12 12:19:58  -Filled 684

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


2026/01/12 12:19:59  -Determining reference mode: vcf/bcf...
2026/01/12 12:19:59  -Extracting new lookup TSV from: /mnt/hdd_1/ofgeha/gwas-sumstats-harmoniser/gwas-ref/homo_sapiens-chr5.vcf.gz...


[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


2026/01/12 12:20:00 Start to extract lookup table from vcf/bcf ...(v4.0.4)
2026/01/12 12:20:00  -Number of threads/cores to use: 6
2026/01/12 12:20:00  -Current Dataframe shape : 11411895 x 2 ; Memory usage: 282.96 MB
2026/01/12 12:20:00  -Converting chromosome notation to reference notation...
2026/01/12 12:20:21  -Running multiprocessing: 6 workers, 23 chromosomes
2026/01/12 12:20:21  -Calling: bcftools view -r <CHR> -T <TARGETS> -Ou <VCF>| bcftools query -f '<FMT>'
2026/01/12 12:20:23  -Finished: 1

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 2

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 3

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 4

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 6

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 7

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 8

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 9

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 10 5

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 11

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 12

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 13

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 14

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 15

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 16

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 17

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 18

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 19

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 20

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 21

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 22

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 23

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"



2026/01/12 12:24:42  -Converting chromosome notation back to sumstats notation...
2026/01/12 12:24:54  -Lookup table created: /tmp/tmpawpdvqx4.lookup.txt.gz
2026/01/12 12:24:54 Finished extracting lookup table from vcf/bcf.
2026/01/12 12:24:54 Start to assign from lookup table ...(v4.0.4)
2026/01/12 12:24:54  -Current Dataframe shape : 13779885 x 12 ; Memory usage: 1056.53 MB
2026/01/12 12:24:54  -Detected allele mode: REF_ALT using ALT(EA/ALT) / REF(NEA/REF)...
2026/01/12 12:24:57  -Loaded 338,878 lookup rows...
2026/01/12 12:24:59  -Found 32211 flipped variants in this chunk
2026/01/12 12:24:59  -Updated ALLELE_FLIPPED=True for 32211 variants in this chunk
2026/01/12 12:24:59  -Newly annotated sumstats rows: 66,998 (chunk lookup rows: 338,878) | New flips: 66,998
2026/01/12 12:24:59  -Total unique sumstats rows annotated: 66,998
2026/01/12 12:24:59  -Total unique rows with allele flips: 32,211
2026/01/12 12:25:00 Finished assigning from lookup table.
2026/01/12 12:25:00  -Filled 669

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


2026/01/12 12:25:01  -Determining reference mode: vcf/bcf...
2026/01/12 12:25:01  -Extracting new lookup TSV from: /mnt/hdd_1/ofgeha/gwas-sumstats-harmoniser/gwas-ref/homo_sapiens-chr6.vcf.gz...


[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


2026/01/12 12:25:02 Start to extract lookup table from vcf/bcf ...(v4.0.4)
2026/01/12 12:25:02  -Number of threads/cores to use: 6
2026/01/12 12:25:02  -Current Dataframe shape : 11344897 x 2 ; Memory usage: 281.30 MB
2026/01/12 12:25:02  -Converting chromosome notation to reference notation...
2026/01/12 12:25:23  -Running multiprocessing: 6 workers, 23 chromosomes
2026/01/12 12:25:23  -Calling: bcftools view -r <CHR> -T <TARGETS> -Ou <VCF>| bcftools query -f '<FMT>'
2026/01/12 12:25:24  -Finished: 1

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 2

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 3

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 4

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 5

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 7

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 8

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 9

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 10

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 6 11

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 12

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 13

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 14

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 15

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 16

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 17

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 18

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 19

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 20

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 21

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 22

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 23

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"



2026/01/12 12:29:44  -Converting chromosome notation back to sumstats notation...
2026/01/12 12:29:56  -Lookup table created: /tmp/tmpcb1iduo8.lookup.txt.gz
2026/01/12 12:29:56 Finished extracting lookup table from vcf/bcf.
2026/01/12 12:29:56 Start to assign from lookup table ...(v4.0.4)
2026/01/12 12:29:56  -Current Dataframe shape : 13779885 x 12 ; Memory usage: 1056.53 MB
2026/01/12 12:29:56  -Detected allele mode: REF_ALT using ALT(EA/ALT) / REF(NEA/REF)...
2026/01/12 12:29:59  -Loaded 336,128 lookup rows...
2026/01/12 12:30:01  -Found 32104 flipped variants in this chunk
2026/01/12 12:30:01  -Updated ALLELE_FLIPPED=True for 32104 variants in this chunk
2026/01/12 12:30:02  -Newly annotated sumstats rows: 64,641 (chunk lookup rows: 336,128) | New flips: 64,641
2026/01/12 12:30:02  -Total unique sumstats rows annotated: 64,641
2026/01/12 12:30:02  -Total unique rows with allele flips: 32,104
2026/01/12 12:30:02 Finished assigning from lookup table.
2026/01/12 12:30:02  -Filled 646

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


2026/01/12 12:30:03  -Determining reference mode: vcf/bcf...
2026/01/12 12:30:03  -Extracting new lookup TSV from: /mnt/hdd_1/ofgeha/gwas-sumstats-harmoniser/gwas-ref/homo_sapiens-chr7.vcf.gz...


[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


2026/01/12 12:30:04 Start to extract lookup table from vcf/bcf ...(v4.0.4)
2026/01/12 12:30:04  -Number of threads/cores to use: 6
2026/01/12 12:30:04  -Current Dataframe shape : 11280256 x 2 ; Memory usage: 279.70 MB
2026/01/12 12:30:04  -Converting chromosome notation to reference notation...
2026/01/12 12:30:24  -Running multiprocessing: 6 workers, 23 chromosomes
2026/01/12 12:30:24  -Calling: bcftools view -r <CHR> -T <TARGETS> -Ou <VCF>| bcftools query -f '<FMT>'
2026/01/12 12:30:25  -Finished: 1

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 2

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 3

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 4

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 5

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 6

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 8

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 9

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 10

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 11

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 7 12

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 13

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 14

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 15

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 16

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 17

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 18

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 19

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 20

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 21

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 22

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 23

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"



2026/01/12 12:34:44  -Converting chromosome notation back to sumstats notation...
2026/01/12 12:34:54  -Lookup table created: /tmp/tmp03elz__5.lookup.txt.gz
2026/01/12 12:34:54 Finished extracting lookup table from vcf/bcf.
2026/01/12 12:34:54 Start to assign from lookup table ...(v4.0.4)
2026/01/12 12:34:54  -Current Dataframe shape : 13779885 x 12 ; Memory usage: 1056.53 MB
2026/01/12 12:34:54  -Detected allele mode: REF_ALT using ALT(EA/ALT) / REF(NEA/REF)...
2026/01/12 12:34:57  -Loaded 304,986 lookup rows...
2026/01/12 12:34:58  -Found 28857 flipped variants in this chunk
2026/01/12 12:34:59  -Updated ALLELE_FLIPPED=True for 28857 variants in this chunk
2026/01/12 12:34:59  -Newly annotated sumstats rows: 59,367 (chunk lookup rows: 304,986) | New flips: 59,367
2026/01/12 12:34:59  -Total unique sumstats rows annotated: 59,367
2026/01/12 12:34:59  -Total unique rows with allele flips: 28,857
2026/01/12 12:34:59 Finished assigning from lookup table.
2026/01/12 12:35:00  -Filled 593

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


2026/01/12 12:35:01  -Determining reference mode: vcf/bcf...
2026/01/12 12:35:01  -Extracting new lookup TSV from: /mnt/hdd_1/ofgeha/gwas-sumstats-harmoniser/gwas-ref/homo_sapiens-chr8.vcf.gz...


[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


2026/01/12 12:35:02 Start to extract lookup table from vcf/bcf ...(v4.0.4)
2026/01/12 12:35:02  -Number of threads/cores to use: 6
2026/01/12 12:35:02  -Current Dataframe shape : 11220889 x 2 ; Memory usage: 278.23 MB
2026/01/12 12:35:02  -Converting chromosome notation to reference notation...
2026/01/12 12:35:22  -Running multiprocessing: 6 workers, 23 chromosomes
2026/01/12 12:35:22  -Calling: bcftools view -r <CHR> -T <TARGETS> -Ou <VCF>| bcftools query -f '<FMT>'
2026/01/12 12:35:23  -Finished: 1

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 2

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 3

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 4

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 5

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 6

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 7

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 9

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 10

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 11

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 12 8

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 13

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 14

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 15

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 16

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 17

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 18

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 19

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 20

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 21

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 22

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 23

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"



2026/01/12 12:39:42  -Converting chromosome notation back to sumstats notation...
2026/01/12 12:39:53  -Lookup table created: /tmp/tmp040gjs7v.lookup.txt.gz
2026/01/12 12:39:53 Finished extracting lookup table from vcf/bcf.
2026/01/12 12:39:53 Start to assign from lookup table ...(v4.0.4)
2026/01/12 12:39:53  -Current Dataframe shape : 13779885 x 12 ; Memory usage: 1056.53 MB
2026/01/12 12:39:53  -Detected allele mode: REF_ALT using ALT(EA/ALT) / REF(NEA/REF)...
2026/01/12 12:39:56  -Loaded 312,130 lookup rows...
2026/01/12 12:39:57  -Found 29393 flipped variants in this chunk
2026/01/12 12:39:58  -Updated ALLELE_FLIPPED=True for 29393 variants in this chunk
2026/01/12 12:39:58  -Newly annotated sumstats rows: 59,394 (chunk lookup rows: 312,130) | New flips: 59,394
2026/01/12 12:39:58  -Total unique sumstats rows annotated: 59,394
2026/01/12 12:39:58  -Total unique rows with allele flips: 29,393
2026/01/12 12:39:58 Finished assigning from lookup table.
2026/01/12 12:39:59  -Filled 593

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


2026/01/12 12:40:00  -Determining reference mode: vcf/bcf...
2026/01/12 12:40:00  -Extracting new lookup TSV from: /mnt/hdd_1/ofgeha/gwas-sumstats-harmoniser/gwas-ref/homo_sapiens-chr9.vcf.gz...


[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


2026/01/12 12:40:01 Start to extract lookup table from vcf/bcf ...(v4.0.4)
2026/01/12 12:40:01  -Number of threads/cores to use: 6
2026/01/12 12:40:01  -Current Dataframe shape : 11161495 x 2 ; Memory usage: 276.76 MB
2026/01/12 12:40:01  -Converting chromosome notation to reference notation...
2026/01/12 12:40:21  -Running multiprocessing: 6 workers, 23 chromosomes
2026/01/12 12:40:21  -Calling: bcftools view -r <CHR> -T <TARGETS> -Ou <VCF>| bcftools query -f '<FMT>'
2026/01/12 12:40:22  -Finished: 1

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 2

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 3

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 4

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 5

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 6

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 7

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 8

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 10

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 11

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 12

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 9 13

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 14

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 15

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 16

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 17

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 18

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 19

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 20

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 21

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 22

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 23

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"



2026/01/12 12:44:44  -Converting chromosome notation back to sumstats notation...
2026/01/12 12:44:51  -Lookup table created: /tmp/tmp9i3jb_6p.lookup.txt.gz
2026/01/12 12:44:51 Finished extracting lookup table from vcf/bcf.
2026/01/12 12:44:51 Start to assign from lookup table ...(v4.0.4)
2026/01/12 12:44:51  -Current Dataframe shape : 13779885 x 12 ; Memory usage: 1056.53 MB
2026/01/12 12:44:51  -Detected allele mode: REF_ALT using ALT(EA/ALT) / REF(NEA/REF)...
2026/01/12 12:44:53  -Loaded 209,025 lookup rows...
2026/01/12 12:44:55  -Found 18290 flipped variants in this chunk
2026/01/12 12:44:55  -Updated ALLELE_FLIPPED=True for 18290 variants in this chunk
2026/01/12 12:44:55  -Newly annotated sumstats rows: 43,157 (chunk lookup rows: 209,025) | New flips: 43,157
2026/01/12 12:44:55  -Total unique sumstats rows annotated: 43,157
2026/01/12 12:44:55  -Total unique rows with allele flips: 18,289
2026/01/12 12:44:55 Finished assigning from lookup table.
2026/01/12 12:44:56  -Filled 431

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


2026/01/12 12:44:57  -Determining reference mode: vcf/bcf...
2026/01/12 12:44:57  -Extracting new lookup TSV from: /mnt/hdd_1/ofgeha/gwas-sumstats-harmoniser/gwas-ref/homo_sapiens-chr10.vcf.gz...


[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


2026/01/12 12:44:58 Start to extract lookup table from vcf/bcf ...(v4.0.4)
2026/01/12 12:44:58  -Number of threads/cores to use: 6
2026/01/12 12:44:58  -Current Dataframe shape : 11118338 x 2 ; Memory usage: 275.69 MB
2026/01/12 12:44:58  -Converting chromosome notation to reference notation...
2026/01/12 12:45:18  -Running multiprocessing: 6 workers, 23 chromosomes
2026/01/12 12:45:18  -Calling: bcftools view -r <CHR> -T <TARGETS> -Ou <VCF>| bcftools query -f '<FMT>'
2026/01/12 12:45:19  -Finished: 1

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 2

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 3

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 4

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 5

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 6

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 7

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 8

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 9

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 11

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 12

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 13

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 14 10

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 15

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 16

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 17

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 18

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 19

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 20

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 21

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 22

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 23

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"



2026/01/12 12:49:40  -Converting chromosome notation back to sumstats notation...
2026/01/12 12:49:48  -Lookup table created: /tmp/tmp4_tg8ug0.lookup.txt.gz
2026/01/12 12:49:48 Finished extracting lookup table from vcf/bcf.
2026/01/12 12:49:48 Start to assign from lookup table ...(v4.0.4)
2026/01/12 12:49:48  -Current Dataframe shape : 13779885 x 12 ; Memory usage: 1056.53 MB
2026/01/12 12:49:48  -Detected allele mode: REF_ALT using ALT(EA/ALT) / REF(NEA/REF)...
2026/01/12 12:49:51  -Loaded 251,686 lookup rows...
2026/01/12 12:49:52  -Found 24178 flipped variants in this chunk
2026/01/12 12:49:53  -Updated ALLELE_FLIPPED=True for 24178 variants in this chunk
2026/01/12 12:49:53  -Newly annotated sumstats rows: 47,780 (chunk lookup rows: 251,686) | New flips: 47,780
2026/01/12 12:49:53  -Total unique sumstats rows annotated: 47,780
2026/01/12 12:49:53  -Total unique rows with allele flips: 24,178
2026/01/12 12:49:53 Finished assigning from lookup table.
2026/01/12 12:49:54  -Filled 477

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


2026/01/12 12:49:55  -Determining reference mode: vcf/bcf...
2026/01/12 12:49:55  -Extracting new lookup TSV from: /mnt/hdd_1/ofgeha/gwas-sumstats-harmoniser/gwas-ref/homo_sapiens-chr11.vcf.gz...


[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


2026/01/12 12:49:56 Start to extract lookup table from vcf/bcf ...(v4.0.4)
2026/01/12 12:49:56  -Number of threads/cores to use: 6
2026/01/12 12:49:56  -Current Dataframe shape : 11070558 x 2 ; Memory usage: 274.50 MB
2026/01/12 12:49:56  -Converting chromosome notation to reference notation...
2026/01/12 12:50:16  -Running multiprocessing: 6 workers, 23 chromosomes
2026/01/12 12:50:16  -Calling: bcftools view -r <CHR> -T <TARGETS> -Ou <VCF>| bcftools query -f '<FMT>'
2026/01/12 12:50:17  -Finished: 1

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 2

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 3

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 4

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 5

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 6

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 7

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 8

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 9

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 10

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 12

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 13

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 14

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 11 15

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 16

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 17

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 18

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 19

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 20

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 21

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 22

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 23

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"



2026/01/12 12:54:40  -Converting chromosome notation back to sumstats notation...
2026/01/12 12:54:49  -Lookup table created: /tmp/tmprkeok5dp.lookup.txt.gz
2026/01/12 12:54:49 Finished extracting lookup table from vcf/bcf.
2026/01/12 12:54:49 Start to assign from lookup table ...(v4.0.4)
2026/01/12 12:54:49  -Current Dataframe shape : 13779885 x 12 ; Memory usage: 1056.53 MB
2026/01/12 12:54:49  -Detected allele mode: REF_ALT using ALT(EA/ALT) / REF(NEA/REF)...
2026/01/12 12:54:51  -Loaded 255,799 lookup rows...
2026/01/12 12:54:52  -Found 24295 flipped variants in this chunk
2026/01/12 12:54:53  -Updated ALLELE_FLIPPED=True for 24295 variants in this chunk
2026/01/12 12:54:53  -Newly annotated sumstats rows: 51,048 (chunk lookup rows: 255,799) | New flips: 51,048
2026/01/12 12:54:53  -Total unique sumstats rows annotated: 51,048
2026/01/12 12:54:53  -Total unique rows with allele flips: 24,295
2026/01/12 12:54:53 Finished assigning from lookup table.
2026/01/12 12:54:54  -Filled 510

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


2026/01/12 12:54:55  -Determining reference mode: vcf/bcf...
2026/01/12 12:54:55  -Extracting new lookup TSV from: /mnt/hdd_1/ofgeha/gwas-sumstats-harmoniser/gwas-ref/homo_sapiens-chr12.vcf.gz...


[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


2026/01/12 12:54:56 Start to extract lookup table from vcf/bcf ...(v4.0.4)
2026/01/12 12:54:56  -Number of threads/cores to use: 6
2026/01/12 12:54:56  -Current Dataframe shape : 11019510 x 2 ; Memory usage: 273.23 MB
2026/01/12 12:54:56  -Converting chromosome notation to reference notation...
2026/01/12 12:55:16  -Running multiprocessing: 6 workers, 23 chromosomes
2026/01/12 12:55:16  -Calling: bcftools view -r <CHR> -T <TARGETS> -Ou <VCF>| bcftools query -f '<FMT>'
2026/01/12 12:55:17  -Finished: 1

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 2

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 3

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 4

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 5

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 6

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 7

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 8

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 9

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 10

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 11

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 13

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 14

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 15

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 12 16

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 17

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 18

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 19

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 20

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 21

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 22

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 23

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"



2026/01/12 12:59:39  -Converting chromosome notation back to sumstats notation...
2026/01/12 12:59:47  -Lookup table created: /tmp/tmp2imp77ro.lookup.txt.gz
2026/01/12 12:59:47 Finished extracting lookup table from vcf/bcf.
2026/01/12 12:59:47 Start to assign from lookup table ...(v4.0.4)
2026/01/12 12:59:47  -Current Dataframe shape : 13779885 x 12 ; Memory usage: 1056.53 MB
2026/01/12 12:59:47  -Detected allele mode: REF_ALT using ALT(EA/ALT) / REF(NEA/REF)...
2026/01/12 12:59:49  -Loaded 233,244 lookup rows...
2026/01/12 12:59:51  -Found 22432 flipped variants in this chunk
2026/01/12 12:59:51  -Updated ALLELE_FLIPPED=True for 22432 variants in this chunk
2026/01/12 12:59:52  -Newly annotated sumstats rows: 44,047 (chunk lookup rows: 233,244) | New flips: 44,047
2026/01/12 12:59:52  -Total unique sumstats rows annotated: 44,047
2026/01/12 12:59:52  -Total unique rows with allele flips: 22,432
2026/01/12 12:59:52 Finished assigning from lookup table.
2026/01/12 12:59:52  -Filled 440

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


2026/01/12 12:59:54  -Determining reference mode: vcf/bcf...
2026/01/12 12:59:54  -Extracting new lookup TSV from: /mnt/hdd_1/ofgeha/gwas-sumstats-harmoniser/gwas-ref/homo_sapiens-chr13.vcf.gz...


[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


2026/01/12 12:59:54 Start to extract lookup table from vcf/bcf ...(v4.0.4)
2026/01/12 12:59:54  -Number of threads/cores to use: 6
2026/01/12 12:59:54  -Current Dataframe shape : 10975463 x 2 ; Memory usage: 272.14 MB
2026/01/12 12:59:54  -Converting chromosome notation to reference notation...
2026/01/12 13:00:15  -Running multiprocessing: 6 workers, 23 chromosomes
2026/01/12 13:00:15  -Calling: bcftools view -r <CHR> -T <TARGETS> -Ou <VCF>| bcftools query -f '<FMT>'
2026/01/12 13:00:16  -Finished: 1

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 2

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 3

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 4

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 5

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 6

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 7

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 8

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 9

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 10

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 11

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 12

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 14

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 15

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 13 16

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 17

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 18

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 19

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 20

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 21

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 22

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 23

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"



2026/01/12 13:04:40  -Converting chromosome notation back to sumstats notation...
2026/01/12 13:04:46  -Lookup table created: /tmp/tmp4r46ytsr.lookup.txt.gz
2026/01/12 13:04:46 Finished extracting lookup table from vcf/bcf.
2026/01/12 13:04:46 Start to assign from lookup table ...(v4.0.4)
2026/01/12 13:04:46  -Current Dataframe shape : 13779885 x 12 ; Memory usage: 1056.53 MB
2026/01/12 13:04:46  -Detected allele mode: REF_ALT using ALT(EA/ALT) / REF(NEA/REF)...
2026/01/12 13:04:47  -Loaded 173,049 lookup rows...
2026/01/12 13:04:49  -Found 16730 flipped variants in this chunk
2026/01/12 13:04:49  -Updated ALLELE_FLIPPED=True for 16730 variants in this chunk
2026/01/12 13:04:49  -Newly annotated sumstats rows: 32,409 (chunk lookup rows: 173,049) | New flips: 32,409
2026/01/12 13:04:49  -Total unique sumstats rows annotated: 32,409
2026/01/12 13:04:49  -Total unique rows with allele flips: 16,730
2026/01/12 13:04:49 Finished assigning from lookup table.
2026/01/12 13:04:50  -Filled 324

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


2026/01/12 13:04:51  -Determining reference mode: vcf/bcf...
2026/01/12 13:04:51  -Extracting new lookup TSV from: /mnt/hdd_1/ofgeha/gwas-sumstats-harmoniser/gwas-ref/homo_sapiens-chr14.vcf.gz...


[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


2026/01/12 13:04:52 Start to extract lookup table from vcf/bcf ...(v4.0.4)
2026/01/12 13:04:52  -Number of threads/cores to use: 6
2026/01/12 13:04:52  -Current Dataframe shape : 10943054 x 2 ; Memory usage: 271.34 MB
2026/01/12 13:04:52  -Converting chromosome notation to reference notation...
2026/01/12 13:05:12  -Running multiprocessing: 6 workers, 23 chromosomes
2026/01/12 13:05:12  -Calling: bcftools view -r <CHR> -T <TARGETS> -Ou <VCF>| bcftools query -f '<FMT>'
2026/01/12 13:05:13  -Finished: 1

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 2

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 3

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 4

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 5

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 6

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 7

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 8

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 9

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 10

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 11

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 12

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 13

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 15

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 16

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 14 17

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 18

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 19

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 20

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 21

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 22

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 23

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"



2026/01/12 13:09:38  -Converting chromosome notation back to sumstats notation...
2026/01/12 13:09:44  -Lookup table created: /tmp/tmpg1edjiik.lookup.txt.gz
2026/01/12 13:09:44 Finished extracting lookup table from vcf/bcf.
2026/01/12 13:09:44 Start to assign from lookup table ...(v4.0.4)
2026/01/12 13:09:44  -Current Dataframe shape : 13779885 x 12 ; Memory usage: 1056.53 MB
2026/01/12 13:09:44  -Detected allele mode: REF_ALT using ALT(EA/ALT) / REF(NEA/REF)...
2026/01/12 13:09:45  -Loaded 159,866 lookup rows...
2026/01/12 13:09:46  -Found 15259 flipped variants in this chunk
2026/01/12 13:09:46  -Updated ALLELE_FLIPPED=True for 15259 variants in this chunk
2026/01/12 13:09:47  -Newly annotated sumstats rows: 30,246 (chunk lookup rows: 159,866) | New flips: 30,246
2026/01/12 13:09:47  -Total unique sumstats rows annotated: 30,246
2026/01/12 13:09:47  -Total unique rows with allele flips: 15,259
2026/01/12 13:09:47 Finished assigning from lookup table.
2026/01/12 13:09:48  -Filled 302

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


2026/01/12 13:09:49  -Determining reference mode: vcf/bcf...
2026/01/12 13:09:49  -Extracting new lookup TSV from: /mnt/hdd_1/ofgeha/gwas-sumstats-harmoniser/gwas-ref/homo_sapiens-chr15.vcf.gz...


[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


2026/01/12 13:09:50 Start to extract lookup table from vcf/bcf ...(v4.0.4)
2026/01/12 13:09:50  -Number of threads/cores to use: 6
2026/01/12 13:09:50  -Current Dataframe shape : 10912808 x 2 ; Memory usage: 270.59 MB
2026/01/12 13:09:50  -Converting chromosome notation to reference notation...
2026/01/12 13:10:11  -Running multiprocessing: 6 workers, 23 chromosomes
2026/01/12 13:10:11  -Calling: bcftools view -r <CHR> -T <TARGETS> -Ou <VCF>| bcftools query -f '<FMT>'
2026/01/12 13:10:12  -Finished: 1

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 2

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 3

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 4

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 5

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 6

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 7

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 8

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 9

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 10

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 11

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 12

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 13

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 14

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 16

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 17

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 15 18

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 19

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 20

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 21

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 22

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 23

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"



2026/01/12 13:14:36  -Converting chromosome notation back to sumstats notation...
2026/01/12 13:14:41  -Lookup table created: /tmp/tmpasr6ll04.lookup.txt.gz
2026/01/12 13:14:41 Finished extracting lookup table from vcf/bcf.
2026/01/12 13:14:41 Start to assign from lookup table ...(v4.0.4)
2026/01/12 13:14:41  -Current Dataframe shape : 13779885 x 12 ; Memory usage: 1056.53 MB
2026/01/12 13:14:41  -Detected allele mode: REF_ALT using ALT(EA/ALT) / REF(NEA/REF)...
2026/01/12 13:14:43  -Loaded 142,117 lookup rows...
2026/01/12 13:14:44  -Found 13780 flipped variants in this chunk
2026/01/12 13:14:44  -Updated ALLELE_FLIPPED=True for 13780 variants in this chunk
2026/01/12 13:14:45  -Newly annotated sumstats rows: 26,925 (chunk lookup rows: 142,117) | New flips: 26,925
2026/01/12 13:14:45  -Total unique sumstats rows annotated: 26,925
2026/01/12 13:14:45  -Total unique rows with allele flips: 13,780
2026/01/12 13:14:45 Finished assigning from lookup table.
2026/01/12 13:14:45  -Filled 269

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


2026/01/12 13:14:47  -Determining reference mode: vcf/bcf...
2026/01/12 13:14:47  -Extracting new lookup TSV from: /mnt/hdd_1/ofgeha/gwas-sumstats-harmoniser/gwas-ref/homo_sapiens-chr16.vcf.gz...


[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


2026/01/12 13:14:47 Start to extract lookup table from vcf/bcf ...(v4.0.4)
2026/01/12 13:14:47  -Number of threads/cores to use: 6
2026/01/12 13:14:47  -Current Dataframe shape : 10885883 x 2 ; Memory usage: 269.92 MB
2026/01/12 13:14:47  -Converting chromosome notation to reference notation...
2026/01/12 13:15:08  -Running multiprocessing: 6 workers, 23 chromosomes
2026/01/12 13:15:08  -Calling: bcftools view -r <CHR> -T <TARGETS> -Ou <VCF>| bcftools query -f '<FMT>'
2026/01/12 13:15:09  -Finished: 1

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 2

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 3

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 4

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 5

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 6

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 7

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 8

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 9

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 10

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 11

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 12

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 13

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 14

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 15

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 17

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 18

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 16 19

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 20

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 21

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 22

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 23

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"



2026/01/12 13:19:34  -Converting chromosome notation back to sumstats notation...
2026/01/12 13:19:40  -Lookup table created: /tmp/tmp7iwrypyh.lookup.txt.gz
2026/01/12 13:19:40 Finished extracting lookup table from vcf/bcf.
2026/01/12 13:19:40 Start to assign from lookup table ...(v4.0.4)
2026/01/12 13:19:40  -Current Dataframe shape : 13779885 x 12 ; Memory usage: 1056.53 MB
2026/01/12 13:19:40  -Detected allele mode: REF_ALT using ALT(EA/ALT) / REF(NEA/REF)...
2026/01/12 13:19:43  -Loaded 190,772 lookup rows...
2026/01/12 13:19:44  -Found 17220 flipped variants in this chunk
2026/01/12 13:19:44  -Updated ALLELE_FLIPPED=True for 17220 variants in this chunk
2026/01/12 13:19:44  -Newly annotated sumstats rows: 35,231 (chunk lookup rows: 190,772) | New flips: 35,231
2026/01/12 13:19:44  -Total unique sumstats rows annotated: 35,231
2026/01/12 13:19:44  -Total unique rows with allele flips: 17,220
2026/01/12 13:19:44 Finished assigning from lookup table.
2026/01/12 13:19:45  -Filled 352

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


2026/01/12 13:19:47  -Determining reference mode: vcf/bcf...
2026/01/12 13:19:47  -Extracting new lookup TSV from: /mnt/hdd_1/ofgeha/gwas-sumstats-harmoniser/gwas-ref/homo_sapiens-chr17.vcf.gz...


[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


2026/01/12 13:19:47 Start to extract lookup table from vcf/bcf ...(v4.0.4)
2026/01/12 13:19:47  -Number of threads/cores to use: 6
2026/01/12 13:19:47  -Current Dataframe shape : 10850652 x 2 ; Memory usage: 269.05 MB
2026/01/12 13:19:47  -Converting chromosome notation to reference notation...
2026/01/12 13:20:08  -Running multiprocessing: 6 workers, 23 chromosomes
2026/01/12 13:20:08  -Calling: bcftools view -r <CHR> -T <TARGETS> -Ou <VCF>| bcftools query -f '<FMT>'
2026/01/12 13:20:09  -Finished: 1

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 2

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 3

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 4

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 5

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 6

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 7

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 8

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 9

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 10

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 11

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 12

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 13

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 14

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 15

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 16

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 18

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 19 17

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 20

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 21

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 22

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 23

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"



2026/01/12 13:24:31  -Converting chromosome notation back to sumstats notation...
2026/01/12 13:24:36  -Lookup table created: /tmp/tmpfkvzmqxs.lookup.txt.gz
2026/01/12 13:24:36 Finished extracting lookup table from vcf/bcf.
2026/01/12 13:24:36 Start to assign from lookup table ...(v4.0.4)
2026/01/12 13:24:36  -Current Dataframe shape : 13779885 x 12 ; Memory usage: 1056.53 MB
2026/01/12 13:24:36  -Detected allele mode: REF_ALT using ALT(EA/ALT) / REF(NEA/REF)...
2026/01/12 13:24:38  -Loaded 139,479 lookup rows...
2026/01/12 13:24:39  -Found 12880 flipped variants in this chunk
2026/01/12 13:24:39  -Updated ALLELE_FLIPPED=True for 12880 variants in this chunk
2026/01/12 13:24:39  -Newly annotated sumstats rows: 28,268 (chunk lookup rows: 139,479) | New flips: 28,268
2026/01/12 13:24:39  -Total unique sumstats rows annotated: 28,268
2026/01/12 13:24:39  -Total unique rows with allele flips: 12,880
2026/01/12 13:24:39 Finished assigning from lookup table.
2026/01/12 13:24:40  -Filled 282

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


2026/01/12 13:24:41  -Determining reference mode: vcf/bcf...
2026/01/12 13:24:41  -Extracting new lookup TSV from: /mnt/hdd_1/ofgeha/gwas-sumstats-harmoniser/gwas-ref/homo_sapiens-chr18.vcf.gz...


[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


2026/01/12 13:24:42 Start to extract lookup table from vcf/bcf ...(v4.0.4)
2026/01/12 13:24:42  -Number of threads/cores to use: 6
2026/01/12 13:24:42  -Current Dataframe shape : 10822384 x 2 ; Memory usage: 268.35 MB
2026/01/12 13:24:42  -Converting chromosome notation to reference notation...
2026/01/12 13:25:03  -Running multiprocessing: 6 workers, 23 chromosomes
2026/01/12 13:25:03  -Calling: bcftools view -r <CHR> -T <TARGETS> -Ou <VCF>| bcftools query -f '<FMT>'
2026/01/12 13:25:04  -Finished: 1

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 2

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 3

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 4

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 5

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 6

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 7

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 8

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 9

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 10

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 11

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 12

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 13

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 14

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 15

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 16

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 17

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 19

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 20 18

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 21

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 22

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 23

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"



2026/01/12 13:29:28  -Converting chromosome notation back to sumstats notation...
2026/01/12 13:29:33  -Lookup table created: /tmp/tmpk74dpume.lookup.txt.gz
2026/01/12 13:29:34 Finished extracting lookup table from vcf/bcf.
2026/01/12 13:29:34 Start to assign from lookup table ...(v4.0.4)
2026/01/12 13:29:34  -Current Dataframe shape : 13779885 x 12 ; Memory usage: 1056.53 MB
2026/01/12 13:29:34  -Detected allele mode: REF_ALT using ALT(EA/ALT) / REF(NEA/REF)...
2026/01/12 13:29:35  -Loaded 149,441 lookup rows...
2026/01/12 13:29:36  -Found 13047 flipped variants in this chunk
2026/01/12 13:29:36  -Updated ALLELE_FLIPPED=True for 13047 variants in this chunk
2026/01/12 13:29:37  -Newly annotated sumstats rows: 28,662 (chunk lookup rows: 149,441) | New flips: 28,662
2026/01/12 13:29:37  -Total unique sumstats rows annotated: 28,662
2026/01/12 13:29:37  -Total unique rows with allele flips: 13,047
2026/01/12 13:29:37 Finished assigning from lookup table.
2026/01/12 13:29:38  -Filled 286

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


2026/01/12 13:29:39  -Determining reference mode: vcf/bcf...
2026/01/12 13:29:39  -Extracting new lookup TSV from: /mnt/hdd_1/ofgeha/gwas-sumstats-harmoniser/gwas-ref/homo_sapiens-chr19.vcf.gz...


[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


2026/01/12 13:29:40 Start to extract lookup table from vcf/bcf ...(v4.0.4)
2026/01/12 13:29:40  -Number of threads/cores to use: 6
2026/01/12 13:29:40  -Current Dataframe shape : 10793722 x 2 ; Memory usage: 267.64 MB
2026/01/12 13:29:40  -Converting chromosome notation to reference notation...
2026/01/12 13:30:00  -Running multiprocessing: 6 workers, 23 chromosomes
2026/01/12 13:30:00  -Calling: bcftools view -r <CHR> -T <TARGETS> -Ou <VCF>| bcftools query -f '<FMT>'
2026/01/12 13:30:02  -Finished: 1

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 2

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 3

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 4

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 5

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 6

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 7

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 8

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 9

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 10

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 11

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 12

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 13

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 14

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 15

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 16

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 17

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 18

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 20

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 19 21

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 22

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 23

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"



2026/01/12 13:34:28  -Converting chromosome notation back to sumstats notation...
2026/01/12 13:34:32  -Lookup table created: /tmp/tmp5uhq6acn.lookup.txt.gz
2026/01/12 13:34:32 Finished extracting lookup table from vcf/bcf.
2026/01/12 13:34:32 Start to assign from lookup table ...(v4.0.4)
2026/01/12 13:34:32  -Current Dataframe shape : 13779885 x 12 ; Memory usage: 1056.53 MB
2026/01/12 13:34:32  -Detected allele mode: REF_ALT using ALT(EA/ALT) / REF(NEA/REF)...
2026/01/12 13:34:33  -Loaded 130,015 lookup rows...
2026/01/12 13:34:34  -Found 11252 flipped variants in this chunk
2026/01/12 13:34:34  -Updated ALLELE_FLIPPED=True for 11252 variants in this chunk
2026/01/12 13:34:35  -Newly annotated sumstats rows: 24,068 (chunk lookup rows: 130,015) | New flips: 24,068
2026/01/12 13:34:35  -Total unique sumstats rows annotated: 24,068
2026/01/12 13:34:35  -Total unique rows with allele flips: 11,252
2026/01/12 13:34:35 Finished assigning from lookup table.
2026/01/12 13:34:36  -Filled 240

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


2026/01/12 13:34:37  -Determining reference mode: vcf/bcf...
2026/01/12 13:34:37  -Extracting new lookup TSV from: /mnt/hdd_1/ofgeha/gwas-sumstats-harmoniser/gwas-ref/homo_sapiens-chr20.vcf.gz...


[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


2026/01/12 13:34:38 Start to extract lookup table from vcf/bcf ...(v4.0.4)
2026/01/12 13:34:38  -Number of threads/cores to use: 6
2026/01/12 13:34:38  -Current Dataframe shape : 10769654 x 2 ; Memory usage: 267.04 MB
2026/01/12 13:34:38  -Converting chromosome notation to reference notation...
2026/01/12 13:34:58  -Running multiprocessing: 6 workers, 23 chromosomes
2026/01/12 13:34:58  -Calling: bcftools view -r <CHR> -T <TARGETS> -Ou <VCF>| bcftools query -f '<FMT>'
2026/01/12 13:34:59  -Finished: 1

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 2

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 3

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 4

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 5

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 6

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 7

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 8

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 9

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 10

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 11

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 12

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 13

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 14

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 15

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 16

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 17

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 18

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 19

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 21

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 20 22

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 23

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"



2026/01/12 13:39:26  -Converting chromosome notation back to sumstats notation...
2026/01/12 13:39:30  -Lookup table created: /tmp/tmp5n76dv4l.lookup.txt.gz
2026/01/12 13:39:30 Finished extracting lookup table from vcf/bcf.
2026/01/12 13:39:30 Start to assign from lookup table ...(v4.0.4)
2026/01/12 13:39:30  -Current Dataframe shape : 13779885 x 12 ; Memory usage: 1056.53 MB
2026/01/12 13:39:30  -Detected allele mode: REF_ALT using ALT(EA/ALT) / REF(NEA/REF)...
2026/01/12 13:39:31  -Loaded 115,166 lookup rows...
2026/01/12 13:39:32  -Found 10978 flipped variants in this chunk
2026/01/12 13:39:32  -Updated ALLELE_FLIPPED=True for 10978 variants in this chunk
2026/01/12 13:39:33  -Newly annotated sumstats rows: 23,966 (chunk lookup rows: 115,166) | New flips: 23,966
2026/01/12 13:39:33  -Total unique sumstats rows annotated: 23,966
2026/01/12 13:39:33  -Total unique rows with allele flips: 10,977
2026/01/12 13:39:33 Finished assigning from lookup table.
2026/01/12 13:39:34  -Filled 239

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


2026/01/12 13:39:35  -Determining reference mode: vcf/bcf...
2026/01/12 13:39:35  -Extracting new lookup TSV from: /mnt/hdd_1/ofgeha/gwas-sumstats-harmoniser/gwas-ref/homo_sapiens-chr21.vcf.gz...


[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


2026/01/12 13:39:36 Start to extract lookup table from vcf/bcf ...(v4.0.4)
2026/01/12 13:39:36  -Number of threads/cores to use: 6
2026/01/12 13:39:36  -Current Dataframe shape : 10745688 x 2 ; Memory usage: 266.45 MB
2026/01/12 13:39:36  -Converting chromosome notation to reference notation...
2026/01/12 13:39:56  -Running multiprocessing: 6 workers, 23 chromosomes
2026/01/12 13:39:56  -Calling: bcftools view -r <CHR> -T <TARGETS> -Ou <VCF>| bcftools query -f '<FMT>'
2026/01/12 13:39:57  -Finished: 1

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 2

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 3

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 4

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 5

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 6

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 7

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 8

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 9

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 10

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 11

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 12

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 13

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 14

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 15

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 16

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 17

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 18

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 19

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 20

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 22 21

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 23

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"



2026/01/12 13:44:25  -Converting chromosome notation back to sumstats notation...
2026/01/12 13:44:28  -Lookup table created: /tmp/tmp5cblvzy6.lookup.txt.gz
2026/01/12 13:44:28 Finished extracting lookup table from vcf/bcf.
2026/01/12 13:44:28 Start to assign from lookup table ...(v4.0.4)
2026/01/12 13:44:28  -Current Dataframe shape : 13779885 x 12 ; Memory usage: 1056.53 MB
2026/01/12 13:44:28  -Detected allele mode: REF_ALT using ALT(EA/ALT) / REF(NEA/REF)...
2026/01/12 13:44:29  -Loaded 63,026 lookup rows...
2026/01/12 13:44:29  -Found 6092 flipped variants in this chunk
2026/01/12 13:44:29  -Updated ALLELE_FLIPPED=True for 6092 variants in this chunk
2026/01/12 13:44:30  -Newly annotated sumstats rows: 12,058 (chunk lookup rows: 63,026) | New flips: 12,058
2026/01/12 13:44:30  -Total unique sumstats rows annotated: 12,058
2026/01/12 13:44:30  -Total unique rows with allele flips: 6,092
2026/01/12 13:44:30 Finished assigning from lookup table.
2026/01/12 13:44:31  -Filled 12058 rs

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


2026/01/12 13:44:32  -Determining reference mode: vcf/bcf...
2026/01/12 13:44:32  -Extracting new lookup TSV from: /mnt/hdd_1/ofgeha/gwas-sumstats-harmoniser/gwas-ref/homo_sapiens-chr22.vcf.gz...


[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


2026/01/12 13:44:33 Start to extract lookup table from vcf/bcf ...(v4.0.4)
2026/01/12 13:44:33  -Number of threads/cores to use: 6
2026/01/12 13:44:33  -Current Dataframe shape : 10733630 x 2 ; Memory usage: 266.15 MB
2026/01/12 13:44:33  -Converting chromosome notation to reference notation...
2026/01/12 13:44:53  -Running multiprocessing: 6 workers, 23 chromosomes
2026/01/12 13:44:53  -Calling: bcftools view -r <CHR> -T <TARGETS> -Ou <VCF>| bcftools query -f '<FMT>'
2026/01/12 13:44:54  -Finished: 1

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 2

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 3

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 4

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 5

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 6

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 7

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 8

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 9

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 10

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 11

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 12

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 13

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 14

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 15

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 16

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 17

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 18

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 19

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 20

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 21

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 23

[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"
[W::bcf_hrec_check] Invalid tag name: "HGMD-PUBLIC_20204"


 22
2026/01/12 13:49:20  -Converting chromosome notation back to sumstats notation...
2026/01/12 13:49:23  -Lookup table created: /tmp/tmpioq8t_tp.lookup.txt.gz
2026/01/12 13:49:23 Finished extracting lookup table from vcf/bcf.
2026/01/12 13:49:23 Start to assign from lookup table ...(v4.0.4)
2026/01/12 13:49:23  -Current Dataframe shape : 13779885 x 12 ; Memory usage: 1056.53 MB
2026/01/12 13:49:23  -Detected allele mode: REF_ALT using ALT(EA/ALT) / REF(NEA/REF)...
2026/01/12 13:49:24  -Loaded 69,370 lookup rows...
2026/01/12 13:49:25  -Found 6599 flipped variants in this chunk
2026/01/12 13:49:25  -Updated ALLELE_FLIPPED=True for 6599 variants in this chunk
2026/01/12 13:49:25  -Newly annotated sumstats rows: 13,468 (chunk lookup rows: 69,370) | New flips: 13,468
2026/01/12 13:49:25  -Total unique sumstats rows annotated: 13,468
2026/01/12 13:49:25  -Total unique rows with allele flips: 6,599
2026/01/12 13:49:25 Finished assigning from lookup table.
2026/01/12 13:49:26  -Filled 13468

#### Note: you may need a dictionay to match the chromosome with your VCF

In [None]:
gl.get_number_to_NC(build="38")

{1: 'NC_000001.11',
 2: 'NC_000002.12',
 3: 'NC_000003.12',
 4: 'NC_000004.12',
 5: 'NC_000005.10',
 6: 'NC_000006.12',
 7: 'NC_000007.14',
 8: 'NC_000008.11',
 9: 'NC_000009.12',
 10: 'NC_000010.11',
 11: 'NC_000011.10',
 12: 'NC_000012.12',
 13: 'NC_000013.11',
 14: 'NC_000014.9',
 15: 'NC_000015.10',
 16: 'NC_000016.10',
 17: 'NC_000017.11',
 18: 'NC_000018.10',
 19: 'NC_000019.10',
 20: 'NC_000020.11',
 21: 'NC_000021.9',
 22: 'NC_000022.11',
 23: 'NC_000023.11',
 24: 'NC_000024.10',
 25: 'NC_012920.1'}

In [None]:
gl.get_number_to_NC(build="19")

{1: 'NC_000001.10',
 2: 'NC_000002.11',
 3: 'NC_000003.11',
 4: 'NC_000004.11',
 5: 'NC_000005.9',
 6: 'NC_000006.11',
 7: 'NC_000007.13',
 8: 'NC_000008.10',
 9: 'NC_000009.11',
 10: 'NC_000010.10',
 11: 'NC_000011.9',
 12: 'NC_000012.11',
 13: 'NC_000013.10',
 14: 'NC_000014.8',
 15: 'NC_000015.9',
 16: 'NC_000016.9',
 17: 'NC_000017.10',
 18: 'NC_000018.9',
 19: 'NC_000019.9',
 20: 'NC_000020.10',
 21: 'NC_000021.8',
 22: 'NC_000022.10',
 23: 'NC_000023.10',
 24: 'NC_000024.9',
 25: 'NC_012920.1'}

In [None]:
# Save to a gzipped TSV file
mysumstats.to_csv("/mnt/hdd_1/ofgeha/galaxy-gwas-tools/Data/21001_T2T_liftover_rsid.tsv.gz", 
                  sep="\t", 
                  index=False)