In [4]:
import gwaslab as gl
import pandas as pd

### Check reference and download

In [2]:
gl.check_available_ref()

2026/01/03 00:11:34 Start to check available reference files...
2026/01/03 00:11:34  - Available keywords:  1kg_eas_hg19 1kg_eur_hg19 1kg_eas_hg38 1kg_eur_hg38 1kg_sas_hg19 1kg_amr_hg19 1kg_sas_hg38 1kg_amr_hg38 1kg_afr_hg19 1kg_pan_hg19 1kg_afr_hg38 1kg_pan_hg38 dbsnp_v151_hg19 dbsnp_v151_hg38 dbsnp_v157_hg19 dbsnp_v157_hg38 ucsc_genome_hg19 ucsc_genome_hg38 1kg_dbsnp151_hg19_auto 1kg_dbsnp151_hg38_auto recombination_hg19 recombination_hg38 ensembl_hg19_gtf ensembl_hg38_gtf refseq_hg19_gtf refseq_hg38_gtf testlink 19to38 38to19 1kg_hm3_hg38_eaf 1kg_hm3_hg19_eaf


{'1kg_eas_hg19': {'description': '1000 Genomes Project East Asian (1KG EAS) VCF on the hg19 reference. Multi-allelic variants were decomposed. Variants were normalized. The INFO field includes the AF annotation, representing allele frequency in the EAS population.',
  'suggested_use': 'LD reference panel for creating region plot; infer strand for EAS population'},
 '1kg_eur_hg19': {'description': '1000 Genomes Project European (1KG EUR) VCF on the hg19 reference. Multi-allelic variants were decomposed. Variants were normalized. The INFO field includes the AF annotation, representing allele frequency in the EUR population.',
  'suggested_use': 'LD reference panel for creating region plot; infer strand for EUR population'},
 '1kg_eas_hg38': {'description': '1000 Genomes Project East Asian (1KG EAS) VCF on the hg38 reference. Multi-allelic variants were decomposed. Variants were normalized. The INFO field includes the AF annotation, representing allele frequency in the EAS population.',
 

In [3]:
gl.download_ref("1kg_dbsnp151_hg19_auto")

2026/01/03 00:11:55 Start to download  1kg_dbsnp151_hg19_auto  ...
2026/01/03 00:11:55  -Downloading to: /home/icog-bioai2/.gwaslab/1kg_dbsnp151_hg19_auto.txt.gz
2026/01/03 00:12:08  - Updating record in config file...
2026/01/03 00:12:08 Downloaded  1kg_dbsnp151_hg19_auto  successfully!


In [5]:
df = pd.read_csv(
    "/mnt/hdd_1/ofgeha/galaxytool/Data/21001_raw.gwas.imputed_v3.both_sexes.tsv.bgz",
    sep="\t",
    compression="gzip"
)

# Split variant into CHR, POS, REF, ALT
df[['CHR', 'POS', 'REF', 'ALT']] = df['variant'].str.split(":", expand=True)
df['POS'] = df['POS'].astype(int)

In [6]:
# Create GWASLab Sumstats object and run liftover in one go
my_sumstats = (
    gl.Sumstats(
        df,
        snpid="variant",
        chrom="CHR",
        pos="POS",
        ea="minor_allele",
        beta="beta",
        se="se",
        p="pval",
        n="n_complete_samples",
        maf="minor_AF",
        verbose=True
    )
    .fix_chr()                    # fixes any CHR dtype issues automatically
)

2026/01/03 00:15:32 GWASLab v4.0.2 https://cloufield.github.io/gwaslab/
2026/01/03 00:15:32 (C) 2022-2026, Yunye He, Kamatani Lab, GPL-3.0 license, gwaslab@gmail.com
2026/01/03 00:15:32 Python version: 3.12.12 (main, Oct 10 2025, 08:52:57) [GCC 11.4.0]
2026/01/03 00:15:32 Start to initialize gl.Sumstats from pandas DataFrame ...
2026/01/03 00:15:43  -Reading columns          : n_complete_samples,beta,minor_allele,POS,se,variant,CHR,minor_AF,pval
2026/01/03 00:15:43  -Renaming columns to      : N,BETA,EA,POS,SE,SNPID,CHR,MAF,P
2026/01/03 00:15:43  -Current Dataframe shape : 13791467  x  15
2026/01/03 00:15:51  -Initiating a status column: STATUS ...
2026/01/03 00:15:51  NEA not available: assigning REF to NEA...
2026/01/03 00:15:51  -EA,REF and ALT columns are available: assigning NEA...
2026/01/03 00:15:52  -For variants with EA == ALT : assigning REF to NEA ...
2026/01/03 00:15:57  -For variants with EA != ALT : assigning ALT to NEA ...
2026/01/03 00:16:04 Start to reorder the columns

In [7]:

my_sumstats.data

Unnamed: 0,SNPID,CHR,POS,EA,NEA,STATUS,REF,ALT,MAF,BETA,SE,P,N,low_confidence_variant,AC,ytx,tstat
0,1:15791:C:T,1,15791,T,C,9995999,C,T,5.446880e-09,894.616000,1204.870000,0.457786,359983,True,0.003922,1.238950e-01,0.742499
1,1:69487:G:A,1,69487,A,G,9995999,G,A,5.768250e-06,-2.715450,2.360060,0.249902,359983,True,4.152940,1.035150e+02,-1.150590
2,1:69569:T:C,1,69569,C,T,9995999,T,C,1.878960e-04,-0.484284,0.423462,0.252778,359983,True,135.278000,3.644560e+03,-1.143630
3,1:139853:C:T,1,139853,T,C,9995999,C,T,5.681100e-06,-2.703560,2.360130,0.251997,359983,True,4.090200,1.018400e+02,-1.145510
4,1:692794:CA:C,1,692794,C,CA,9995999,CA,C,1.105900e-01,-0.016436,0.019585,0.401342,359983,False,79621.100000,2.179000e+06,-0.839228
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13791462,X:154929412:C:T,23,154929412,T,C,9995999,C,T,2.454730e-01,-0.016260,0.010723,0.129427,359983,False,176732.000000,4.837480e+06,-1.516370
13791463,X:154929637:CT:C,23,154929637,C,CT,9995999,CT,C,2.297260e-01,-0.027098,0.011190,0.015456,359983,False,165395.000000,4.525550e+06,-2.421510
13791464,X:154929952:CAA:C,23,154929952,C,CAA,9995999,CAA,C,2.394300e-01,-0.020494,0.011278,0.069202,359983,False,172381.000000,4.717910e+06,-1.817100
13791465,X:154930230:A:G,23,154930230,G,A,9995999,A,G,2.458870e-01,-0.016347,0.010721,0.127334,359983,False,177030.000000,4.845680e+06,-1.524700


In [8]:
my_sumstats.basic_check()

2026/01/03 00:16:34 Start to check SNPID/rsID ...(v4.0.2)
2026/01/03 00:16:34  -Checking SNPID data type...
2026/01/03 00:16:34  -Converted datatype for SNPID: object -> string
2026/01/03 00:16:34  -Checking NA strings :na,NA,Na,Nan,NaN,<NA>,null,NULL,#N/A,#VALUE!,N/A,n/a,missing,
2026/01/03 00:16:34  -Checking if SNPID contains NA strings...
2026/01/03 00:16:35  -Checking if SNPID is CHR:POS:NEA:EA...(separator: - ,: , _)
2026/01/03 00:16:46 Finished checking SNPID/rsID.
2026/01/03 00:16:46 Start to fix chromosome notation (CHR) ...(v4.0.2)
2026/01/03 00:16:46  -Checking CHR data type...
2026/01/03 00:16:49  -Variants with standardized chromosome notation: 13791467
2026/01/03 00:16:49  -All CHR are already fixed...
2026/01/03 00:16:56 Finished fixing chromosome notation (CHR).
2026/01/03 00:16:56 Start to fix basepair positions (POS) ...(v4.0.2)
2026/01/03 00:16:56  -Trying to convert datatype for POS: int64 -> Int64...
2026/01/03 00:17:01  -Position bound:(0 , 250,000,000)
2026/01/03

Unnamed: 0,SNPID,CHR,POS,EA,NEA,STATUS,REF,ALT,MAF,BETA,SE,P,N,low_confidence_variant,AC,ytx,tstat
0,1:69487:G:A,1,69487,A,G,9960099,G,A,0.000006,-2.715450,2.360060,0.249902,359983,True,4.15294,103.515,-1.150590
1,1:69569:T:C,1,69569,C,T,9960099,T,C,0.000188,-0.484284,0.423462,0.252778,359983,True,135.27800,3644.560,-1.143630
2,1:139853:C:T,1,139853,T,C,9960099,C,T,0.000006,-2.703560,2.360130,0.251997,359983,True,4.09020,101.840,-1.145510
3,1:692794:CA:C,1,692794,C,CA,9960399,CA,C,0.110590,-0.016436,0.019585,0.401342,359983,False,79621.10000,2179000.000,-0.839228
4,1:693731:A:G,1,693731,G,A,9960099,A,G,0.115767,-0.004255,0.018507,0.818155,359983,False,83348.00000,2281760.000,-0.229918
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13779880,X:154929412:C:T,23,154929412,T,C,9960099,C,T,0.245473,-0.016260,0.010723,0.129427,359983,False,176732.00000,4837480.000,-1.516370
13779881,X:154929637:CT:C,23,154929637,C,CT,9960399,CT,C,0.229726,-0.027098,0.011190,0.015456,359983,False,165395.00000,4525550.000,-2.421510
13779882,X:154929952:CAA:C,23,154929952,C,CAA,9960399,CAA,C,0.239430,-0.020494,0.011278,0.069202,359983,False,172381.00000,4717910.000,-1.817100
13779883,X:154930230:A:G,23,154930230,G,A,9960099,A,G,0.245887,-0.016347,0.010721,0.127334,359983,False,177030.00000,4845680.000,-1.524700


In [9]:
my_sumstats.fix_id(fixsep=True)

2026/01/03 00:20:07 Start to check SNPID/rsID ...(v4.0.2)
2026/01/03 00:20:07  -Checking SNPID data type...
2026/01/03 00:20:07  -Checking NA strings :na,NA,Na,Nan,NaN,<NA>,null,NULL,#N/A,#VALUE!,N/A,n/a,missing,
2026/01/03 00:20:07  -Checking if SNPID contains NA strings...
2026/01/03 00:20:07  -Checking if SNPID is CHR:POS:NEA:EA...(separator: - ,: , _)
2026/01/03 00:20:23  -Replacing separators in SNPID with ":" ...
2026/01/03 00:20:29 Finished checking SNPID/rsID.


Unnamed: 0,SNPID,CHR,POS,EA,NEA,STATUS,REF,ALT,MAF,BETA,SE,P,N,low_confidence_variant,AC,ytx,tstat
0,1:69487:G:A,1,69487,A,G,9960099,G,A,0.000006,-2.715450,2.360060,0.249902,359983,True,4.15294,103.515,-1.150590
1,1:69569:T:C,1,69569,C,T,9960099,T,C,0.000188,-0.484284,0.423462,0.252778,359983,True,135.27800,3644.560,-1.143630
2,1:139853:C:T,1,139853,T,C,9960099,C,T,0.000006,-2.703560,2.360130,0.251997,359983,True,4.09020,101.840,-1.145510
3,1:692794:CA:C,1,692794,C,CA,9960399,CA,C,0.110590,-0.016436,0.019585,0.401342,359983,False,79621.10000,2179000.000,-0.839228
4,1:693731:A:G,1,693731,G,A,9960099,A,G,0.115767,-0.004255,0.018507,0.818155,359983,False,83348.00000,2281760.000,-0.229918
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13779880,X:154929412:C:T,23,154929412,T,C,9960099,C,T,0.245473,-0.016260,0.010723,0.129427,359983,False,176732.00000,4837480.000,-1.516370
13779881,X:154929637:CT:C,23,154929637,C,CT,9960399,CT,C,0.229726,-0.027098,0.011190,0.015456,359983,False,165395.00000,4525550.000,-2.421510
13779882,X:154929952:CAA:C,23,154929952,C,CAA,9960399,CAA,C,0.239430,-0.020494,0.011278,0.069202,359983,False,172381.00000,4717910.000,-1.817100
13779883,X:154930230:A:G,23,154930230,G,A,9960099,A,G,0.245887,-0.016347,0.010721,0.127334,359983,False,177030.00000,4845680.000,-1.524700


In [11]:
my_sumstats.data

Unnamed: 0,SNPID,CHR,POS,EA,NEA,STATUS,REF,ALT,MAF,BETA,SE,P,N,low_confidence_variant,AC,ytx,tstat
0,1:69487:G:A,1,69487,A,G,9960099,G,A,0.000006,-2.715450,2.360060,0.249902,359983,True,4.15294,103.515,-1.150590
1,1:69569:T:C,1,69569,C,T,9960099,T,C,0.000188,-0.484284,0.423462,0.252778,359983,True,135.27800,3644.560,-1.143630
2,1:139853:C:T,1,139853,T,C,9960099,C,T,0.000006,-2.703560,2.360130,0.251997,359983,True,4.09020,101.840,-1.145510
3,1:692794:CA:C,1,692794,C,CA,9960399,CA,C,0.110590,-0.016436,0.019585,0.401342,359983,False,79621.10000,2179000.000,-0.839228
4,1:693731:A:G,1,693731,G,A,9960099,A,G,0.115767,-0.004255,0.018507,0.818155,359983,False,83348.00000,2281760.000,-0.229918
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13779880,X:154929412:C:T,23,154929412,T,C,9960099,C,T,0.245473,-0.016260,0.010723,0.129427,359983,False,176732.00000,4837480.000,-1.516370
13779881,X:154929637:CT:C,23,154929637,C,CT,9960399,CT,C,0.229726,-0.027098,0.011190,0.015456,359983,False,165395.00000,4525550.000,-2.421510
13779882,X:154929952:CAA:C,23,154929952,C,CAA,9960399,CAA,C,0.239430,-0.020494,0.011278,0.069202,359983,False,172381.00000,4717910.000,-1.817100
13779883,X:154930230:A:G,23,154930230,G,A,9960099,A,G,0.245887,-0.016347,0.010721,0.127334,359983,False,177030.00000,4845680.000,-1.524700


In [12]:
my_sumstats.assign_rsid(ref_rsid_tsv=gl.get_path("1kg_dbsnp151_hg19_auto"))

2026/01/03 00:21:18  -Number of threads/cores to use: 1
2026/01/03 00:21:18 Start to assign rsID using reference file ...(v4.0.2)
2026/01/03 00:21:18  -13779885 rsID could be possibly fixed...
2026/01/03 00:21:25  -Setting block size:  5000000
2026/01/03 00:21:25  -Loading block: 0   1   2   3   4   5   6   7   8   9   10   11   12   13   14   15   
2026/01/03 00:28:48  -rsID annotation for 973956 variants needed to be fixed!
2026/01/03 00:28:48  -Annotated 12805929 rsID successfully!
2026/01/03 00:28:48  -Current Dataframe shape : 13779885 x 18 ; Memory usage: 1651.70 MB
2026/01/03 00:28:48 Finished assign rsID using reference file.


Unnamed: 0,SNPID,CHR,POS,EA,NEA,STATUS,REF,ALT,MAF,BETA,SE,P,N,low_confidence_variant,AC,ytx,tstat,rsID
0,1:69487:G:A,1,69487,A,G,9960099,G,A,0.000006,-2.715450,2.360060,0.249902,359983,True,4.15294,103.515,-1.150590,rs568226429
1,1:69569:T:C,1,69569,C,T,9960099,T,C,0.000188,-0.484284,0.423462,0.252778,359983,True,135.27800,3644.560,-1.143630,rs2531267
2,1:139853:C:T,1,139853,T,C,9960099,C,T,0.000006,-2.703560,2.360130,0.251997,359983,True,4.09020,101.840,-1.145510,rs533633326
3,1:692794:CA:C,1,692794,C,CA,9960399,CA,C,0.110590,-0.016436,0.019585,0.401342,359983,False,79621.10000,2179000.000,-0.839228,rs1268544509
4,1:693731:A:G,1,693731,G,A,9960099,A,G,0.115767,-0.004255,0.018507,0.818155,359983,False,83348.00000,2281760.000,-0.229918,rs12238997
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13779880,X:154929412:C:T,23,154929412,T,C,9960099,C,T,0.245473,-0.016260,0.010723,0.129427,359983,False,176732.00000,4837480.000,-1.516370,
13779881,X:154929637:CT:C,23,154929637,C,CT,9960399,CT,C,0.229726,-0.027098,0.011190,0.015456,359983,False,165395.00000,4525550.000,-2.421510,
13779882,X:154929952:CAA:C,23,154929952,C,CAA,9960399,CAA,C,0.239430,-0.020494,0.011278,0.069202,359983,False,172381.00000,4717910.000,-1.817100,
13779883,X:154930230:A:G,23,154930230,G,A,9960099,A,G,0.245887,-0.016347,0.010721,0.127334,359983,False,177030.00000,4845680.000,-1.524700,


In [15]:
my_sumstats.data

Unnamed: 0,SNPID,CHR,POS,EA,NEA,STATUS,REF,ALT,MAF,BETA,SE,P,N,low_confidence_variant,AC,ytx,tstat,rsID
0,1:69487:G:A,1,69487,A,G,9960099,G,A,0.000006,-2.715450,2.360060,0.249902,359983,True,4.15294,103.515,-1.150590,rs568226429
1,1:69569:T:C,1,69569,C,T,9960099,T,C,0.000188,-0.484284,0.423462,0.252778,359983,True,135.27800,3644.560,-1.143630,rs2531267
2,1:139853:C:T,1,139853,T,C,9960099,C,T,0.000006,-2.703560,2.360130,0.251997,359983,True,4.09020,101.840,-1.145510,rs533633326
3,1:692794:CA:C,1,692794,C,CA,9960399,CA,C,0.110590,-0.016436,0.019585,0.401342,359983,False,79621.10000,2179000.000,-0.839228,rs1268544509
4,1:693731:A:G,1,693731,G,A,9960099,A,G,0.115767,-0.004255,0.018507,0.818155,359983,False,83348.00000,2281760.000,-0.229918,rs12238997
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13779880,X:154929412:C:T,23,154929412,T,C,9960099,C,T,0.245473,-0.016260,0.010723,0.129427,359983,False,176732.00000,4837480.000,-1.516370,
13779881,X:154929637:CT:C,23,154929637,C,CT,9960399,CT,C,0.229726,-0.027098,0.011190,0.015456,359983,False,165395.00000,4525550.000,-2.421510,
13779882,X:154929952:CAA:C,23,154929952,C,CAA,9960399,CAA,C,0.239430,-0.020494,0.011278,0.069202,359983,False,172381.00000,4717910.000,-1.817100,
13779883,X:154930230:A:G,23,154930230,G,A,9960099,A,G,0.245887,-0.016347,0.010721,0.127334,359983,False,177030.00000,4845680.000,-1.524700,


In [16]:
gl.get_number_to_NC(build="19")

{1: 'NC_000001.10',
 2: 'NC_000002.11',
 3: 'NC_000003.11',
 4: 'NC_000004.11',
 5: 'NC_000005.9',
 6: 'NC_000006.11',
 7: 'NC_000007.13',
 8: 'NC_000008.10',
 9: 'NC_000009.11',
 10: 'NC_000010.10',
 11: 'NC_000011.9',
 12: 'NC_000012.11',
 13: 'NC_000013.10',
 14: 'NC_000014.8',
 15: 'NC_000015.9',
 16: 'NC_000016.9',
 17: 'NC_000017.10',
 18: 'NC_000018.9',
 19: 'NC_000019.9',
 20: 'NC_000020.10',
 21: 'NC_000021.8',
 22: 'NC_000022.10',
 23: 'NC_000023.10',
 24: 'NC_000024.9',
 25: 'NC_012920.1'}

In [17]:
gl.get_number_to_NC(build="38")

{1: 'NC_000001.11',
 2: 'NC_000002.12',
 3: 'NC_000003.12',
 4: 'NC_000004.12',
 5: 'NC_000005.10',
 6: 'NC_000006.12',
 7: 'NC_000007.14',
 8: 'NC_000008.11',
 9: 'NC_000009.12',
 10: 'NC_000010.11',
 11: 'NC_000011.10',
 12: 'NC_000012.12',
 13: 'NC_000013.11',
 14: 'NC_000014.9',
 15: 'NC_000015.10',
 16: 'NC_000016.10',
 17: 'NC_000017.11',
 18: 'NC_000018.10',
 19: 'NC_000019.10',
 20: 'NC_000020.11',
 21: 'NC_000021.9',
 22: 'NC_000022.11',
 23: 'NC_000023.11',
 24: 'NC_000024.10',
 25: 'NC_012920.1'}