In [1]:
import gwaslab as gl

### Check reference and download

In [2]:
gl.check_available_ref()

2026/01/10 11:08:02 Start to check available reference files...
2026/01/10 11:08:02  - Available keywords:  1kg_eas_hg19 1kg_eur_hg19 1kg_eas_hg38 1kg_eur_hg38 1kg_sas_hg19 1kg_amr_hg19 1kg_sas_hg38 1kg_amr_hg38 1kg_afr_hg19 1kg_pan_hg19 1kg_afr_hg38 1kg_pan_hg38 dbsnp_v151_hg19 dbsnp_v151_hg38 dbsnp_v157_hg19 dbsnp_v157_hg38 ucsc_genome_hg19 ucsc_genome_hg38 1kg_dbsnp151_hg19_auto 1kg_dbsnp151_hg38_auto recombination_hg19 recombination_hg38 ensembl_hg19_gtf ensembl_hg38_gtf refseq_hg19_gtf refseq_hg38_gtf testlink 19to38 38to19 1kg_hm3_hg38_eaf 1kg_hm3_hg19_eaf


{'1kg_eas_hg19': {'description': '1000 Genomes Project East Asian (1KG EAS) VCF on the hg19 reference. Multi-allelic variants were decomposed. Variants were normalized. The INFO field includes the AF annotation, representing allele frequency in the EAS population.',
  'suggested_use': 'LD reference panel for creating region plot; infer strand for EAS population'},
 '1kg_eur_hg19': {'description': '1000 Genomes Project European (1KG EUR) VCF on the hg19 reference. Multi-allelic variants were decomposed. Variants were normalized. The INFO field includes the AF annotation, representing allele frequency in the EUR population.',
  'suggested_use': 'LD reference panel for creating region plot; infer strand for EUR population'},
 '1kg_eas_hg38': {'description': '1000 Genomes Project East Asian (1KG EAS) VCF on the hg38 reference. Multi-allelic variants were decomposed. Variants were normalized. The INFO field includes the AF annotation, representing allele frequency in the EAS population.',
 

In [3]:
gl.download_ref("1kg_dbsnp151_hg38_auto")

2026/01/10 11:12:06 Start to download  1kg_dbsnp151_hg38_auto  ...
2026/01/10 11:12:06  -Downloading to: /home/ofgeha/.gwaslab/1kg_dbsnp151_hg38_auto.txt.gz
2026/01/10 11:12:49  - Updating record in config file...
2026/01/10 11:12:49 Downloaded  1kg_dbsnp151_hg38_auto  successfully!


### Load sample data

In [6]:
mysumstats = gl.Sumstats(
    "/mnt/hdd_1/ofgeha/galaxy-gwas-tools/Data/21001_T2T_liftover.tsv.gz",
    snpid="SNPID",
    chrom="CHR",
    pos="POS",
    ea="EA",            # In your data, it's now EA (was ALT)
    nea="NEA",          # In your data, it's now NEA (was REF)
    eaf="EAF",          # In your data, it's now EAF (was minor_AF/Frq)
    beta="BETA",
    se="SE",
    p="P",
    # Note: 'Dir' and 'N' are not in your column list, 
    # so we should omit them or point to the correct ones.
    # n="n_complete_samples", # Optional: if you want to use sample size
    verbose=True,
    readargs={'sep': '\t'}
)

2026/01/10 11:19:17 GWASLab v4.0.4 https://cloufield.github.io/gwaslab/
2026/01/10 11:19:17 (C) 2022-2026, Yunye He, Kamatani Lab, GPL-3.0 license, gwaslab@gmail.com
2026/01/10 11:19:17 Python version: 3.12.3 (main, Nov  6 2025, 13:44:16) [GCC 13.3.0]
2026/01/10 11:19:17 Start to initialize gl.Sumstats from file :/mnt/hdd_1/ofgeha/galaxy-gwas-tools/Data/21001_T2T_liftover.tsv.gz


2026/01/10 11:19:47  -Reading columns          : CHR,NEA,P,EAF,SNPID,BETA,POS,SE,EA
2026/01/10 11:19:47  -Renaming columns to      : CHR,NEA,P,EAF,SNPID,BETA,POS,SE,EA
2026/01/10 11:19:47  -Current Dataframe shape : 13640038  x  9
2026/01/10 11:19:47  -Initiating a status column: STATUS ...
2026/01/10 11:19:50 Start to reorder the columns ...(v4.0.4)
2026/01/10 11:19:50  -Reordering columns to    : SNPID,CHR,POS,EA,NEA,STATUS,EAF,BETA,SE,P
2026/01/10 11:19:51 Finished reordering the columns.
2026/01/10 11:19:51  -Trying to convert datatype for CHR: string -> Int64...Success
2026/01/10 11:19:53  -Column  : SNPID  CHR   POS   EA       NEA      STATUS EAF     BETA    SE      P      
2026/01/10 11:19:53  -DType   : object Int64 int64 category category int64  float64 float64 float64 float64
2026/01/10 11:19:53  -Verified: T      T     T     T        T        T      T       T       T       T      
2026/01/10 11:19:53  -Current Dataframe memory usage: 953.30 MB
2026/01/10 11:19:53 Finished lo

In [7]:
mysumstats.data

Unnamed: 0,SNPID,CHR,POS,EA,NEA,STATUS,EAF,BETA,SE,P
0,1:692794:CA:C,1,119016,C,CA,9999999,0.110590,-0.016436,0.019585,0.401342
1,1:693731:A:G,1,119953,G,A,9999999,0.115767,-0.004255,0.018507,0.818155
2,1:707522:G:C,1,133743,C,G,9999999,0.097255,-0.010428,0.020804,0.616190
3,1:717587:G:A,1,144100,A,G,9999999,0.015679,0.001174,0.049643,0.981132
4,1:723329:A:T,1,149839,T,A,9999999,0.001732,-0.117102,0.146483,0.424044
...,...,...,...,...,...,...,...,...,...,...
13640033,X:154929412:C:T,23,153165819,T,C,9999999,0.245473,-0.016260,0.010723,0.129427
13640034,X:154929637:CT:C,23,153166044,C,CT,9999999,0.229726,-0.027098,0.011190,0.015456
13640035,X:154929952:CAA:C,23,153166359,C,CAA,9999999,0.239430,-0.020494,0.011278,0.069202
13640036,X:154930230:A:G,23,153166637,G,A,9999999,0.245887,-0.016347,0.010721,0.127334


In [8]:
mysumstats.basic_check()

2026/01/10 11:23:04 Start to check SNPID/rsID ...(v4.0.4)
2026/01/10 11:23:04  -Current Dataframe shape : 13640038 x 10 ; Memory usage: 953.30 MB
2026/01/10 11:23:04  -Checking SNPID data type...
2026/01/10 11:23:04  -Converted datatype for SNPID: object -> string
2026/01/10 11:23:04  -Checking if SNPID contains NA strings :na,NA,Na,Nan,NaN,<NA>,null,NULL,#N/A,#VALUE!,N/A,n/a,missing,...
2026/01/10 11:23:05  -Checking if SNPID is CHR:POS:NEA:EA...(separator: - ,: , _)
2026/01/10 11:23:15 Finished checking SNPID/rsID.
2026/01/10 11:23:15 Start to fix chromosome notation (CHR) ...(v4.0.4)
2026/01/10 11:23:15  -Checking CHR data type...
2026/01/10 11:23:19  -Variants with standardized chromosome notation: 13640038
2026/01/10 11:23:19  -All CHR are already fixed...
2026/01/10 11:23:22 Finished fixing chromosome notation (CHR).
2026/01/10 11:23:22 Start to fix basepair positions (POS) ...(v4.0.4)
2026/01/10 11:23:22  -Trying to convert datatype for POS: int64 -> Int64...
2026/01/10 11:23:22

Unnamed: 0,SNPID,CHR,POS,EA,NEA,STATUS,EAF,BETA,SE,P
0,1:692794:CA:C,1,119016,C,CA,9960399,0.110590,-0.016436,0.019585,0.401342
1,1:693731:A:G,1,119953,G,A,9960099,0.115767,-0.004255,0.018507,0.818155
2,1:707522:G:C,1,133743,C,G,9960099,0.097255,-0.010428,0.020804,0.616190
3,1:717587:G:A,1,144100,A,G,9960099,0.015679,0.001174,0.049643,0.981132
4,1:723329:A:T,1,149839,T,A,9960099,0.001732,-0.117102,0.146483,0.424044
...,...,...,...,...,...,...,...,...,...,...
13640033,X:154929412:C:T,23,153165819,T,C,9960099,0.245473,-0.016260,0.010723,0.129427
13640034,X:154929637:CT:C,23,153166044,C,CT,9960399,0.229726,-0.027098,0.011190,0.015456
13640035,X:154929952:CAA:C,23,153166359,C,CAA,9960399,0.239430,-0.020494,0.011278,0.069202
13640036,X:154930230:A:G,23,153166637,G,A,9960099,0.245887,-0.016347,0.010721,0.127334
