In [2]:
import gwaslab as gl
import pandas as pd


In [4]:
# Load gzipped GWAS summary stats
df = pd.read_csv(
    "/mnt/hdd_1/ofgeha/galaxy-gwas-tools/Data/21001_raw.gwas.imputed_v3.both_sexes.tsv.bgz",
    sep="\t",
    compression="gzip"
)

# Split variant into CHR, POS, REF, ALT
df[['CHR', 'POS', 'REF', 'ALT']] = df['variant'].str.split(":", expand=True)
df['POS'] = df['POS'].astype(int)


In [13]:
df.columns

Index(['variant', 'minor_allele', 'minor_AF', 'low_confidence_variant',
       'n_complete_samples', 'AC', 'ytx', 'beta', 'se', 'tstat', 'pval', 'CHR',
       'POS', 'REF', 'ALT'],
      dtype='object')

In [None]:
# 1. Create a copy and rename columns to GWASLab standard names
df_standard = df.rename(columns={
    'variant': 'SNPID',
    'CHR': 'CHR',
    'POS': 'POS',
    'ALT': 'EA',
    'REF': 'NEA',
    'minor_AF': 'EAF',  # Map your frequency directly to EAF
    'beta': 'BETA',
    'se': 'SE',
    'pval': 'P'
})

In [16]:
# 2. Load the DataFrame without specifying column mappings 
# (GWASLab will recognize SNPID, CHR, POS, EA, NEA, EAF, BETA, SE, P automatically)
mysumstats = gl.Sumstats(df_standard, 
                        #  nrows=5000, 
                         verbose=True)

mysumstats.basic_check(verbose=True)

2026/01/10 10:36:40 GWASLab v4.0.4 https://cloufield.github.io/gwaslab/
2026/01/10 10:36:40 (C) 2022-2026, Yunye He, Kamatani Lab, GPL-3.0 license, gwaslab@gmail.com
2026/01/10 10:36:40 Python version: 3.12.3 (main, Nov  6 2025, 13:44:16) [GCC 13.3.0]
2026/01/10 10:36:40 Start to initialize gl.Sumstats from pandas DataFrame ...
2026/01/10 10:36:41  -Reading columns          : 
2026/01/10 10:36:41  -Renaming columns to      : 
2026/01/10 10:36:41  -Current Dataframe shape : 13791467  x  15
2026/01/10 10:36:42  -Initiating a status column: STATUS ...
2026/01/10 10:36:44 Start to reorder the columns ...(v4.0.4)
2026/01/10 10:36:44  -Reordering columns to    : SNPID,CHR,POS,EA,NEA,STATUS,EAF,BETA,SE,P,minor_allele,low_confidence_variant,n_complete_samples,AC,ytx,tstat
2026/01/10 10:36:45 Finished reordering the columns.
2026/01/10 10:36:45  -Trying to convert datatype for CHR: object -> Int64...Failed...
2026/01/10 10:36:48  -Column  : SNPID  CHR    POS   EA       NEA      STATUS EAF     B

Unnamed: 0,SNPID,CHR,POS,EA,NEA,STATUS,EAF,BETA,SE,P,minor_allele,low_confidence_variant,n_complete_samples,AC,ytx,tstat
0,1:69487:G:A,1,69487,A,G,9960099,0.000006,-2.715450,2.360060,0.249902,A,True,359983,4.15294,103.515,-1.150590
1,1:69569:T:C,1,69569,C,T,9960099,0.000188,-0.484284,0.423462,0.252778,C,True,359983,135.27800,3644.560,-1.143630
2,1:139853:C:T,1,139853,T,C,9960099,0.000006,-2.703560,2.360130,0.251997,T,True,359983,4.09020,101.840,-1.145510
3,1:692794:CA:C,1,692794,C,CA,9960399,0.110590,-0.016436,0.019585,0.401342,C,False,359983,79621.10000,2179000.000,-0.839228
4,1:693731:A:G,1,693731,G,A,9960099,0.115767,-0.004255,0.018507,0.818155,G,False,359983,83348.00000,2281760.000,-0.229918
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13779880,X:154929412:C:T,23,154929412,T,C,9960099,0.245473,-0.016260,0.010723,0.129427,T,False,359983,176732.00000,4837480.000,-1.516370
13779881,X:154929637:CT:C,23,154929637,C,CT,9960399,0.229726,-0.027098,0.011190,0.015456,C,False,359983,165395.00000,4525550.000,-2.421510
13779882,X:154929952:CAA:C,23,154929952,C,CAA,9960399,0.239430,-0.020494,0.011278,0.069202,C,False,359983,172381.00000,4717910.000,-1.817100
13779883,X:154930230:A:G,23,154930230,G,A,9960099,0.245887,-0.016347,0.010721,0.127334,G,False,359983,177030.00000,4845680.000,-1.524700


In [17]:
mysumstats.data

Unnamed: 0,SNPID,CHR,POS,EA,NEA,STATUS,EAF,BETA,SE,P,minor_allele,low_confidence_variant,n_complete_samples,AC,ytx,tstat
0,1:69487:G:A,1,69487,A,G,9960099,0.000006,-2.715450,2.360060,0.249902,A,True,359983,4.15294,103.515,-1.150590
1,1:69569:T:C,1,69569,C,T,9960099,0.000188,-0.484284,0.423462,0.252778,C,True,359983,135.27800,3644.560,-1.143630
2,1:139853:C:T,1,139853,T,C,9960099,0.000006,-2.703560,2.360130,0.251997,T,True,359983,4.09020,101.840,-1.145510
3,1:692794:CA:C,1,692794,C,CA,9960399,0.110590,-0.016436,0.019585,0.401342,C,False,359983,79621.10000,2179000.000,-0.839228
4,1:693731:A:G,1,693731,G,A,9960099,0.115767,-0.004255,0.018507,0.818155,G,False,359983,83348.00000,2281760.000,-0.229918
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13779880,X:154929412:C:T,23,154929412,T,C,9960099,0.245473,-0.016260,0.010723,0.129427,T,False,359983,176732.00000,4837480.000,-1.516370
13779881,X:154929637:CT:C,23,154929637,C,CT,9960399,0.229726,-0.027098,0.011190,0.015456,C,False,359983,165395.00000,4525550.000,-2.421510
13779882,X:154929952:CAA:C,23,154929952,C,CAA,9960399,0.239430,-0.020494,0.011278,0.069202,C,False,359983,172381.00000,4717910.000,-1.817100
13779883,X:154930230:A:G,23,154930230,G,A,9960099,0.245887,-0.016347,0.010721,0.127334,G,False,359983,177030.00000,4845680.000,-1.524700


In [None]:
mysumstats.liftover(from_build="19", to_build="38")

Unnamed: 0,NEA,STATUS,EAF,BETA,SE,P,SNPID
0,1:15791:C:T,9999999,1.000000,894.616000,1204.870000,0.457786,
1,1:69487:G:A,9999999,0.999994,-2.715450,2.360060,0.249902,
2,1:69569:T:C,9999999,0.999812,-0.484284,0.423462,0.252778,
3,1:139853:C:T,9999999,0.999994,-2.703560,2.360130,0.251997,
4,1:692794:CA:C,9999999,0.889410,-0.016436,0.019585,0.401342,
...,...,...,...,...,...,...,...
13791462,X:154929412:C:T,9999999,0.754527,-0.016260,0.010723,0.129427,
13791463,X:154929637:CT:C,9999999,0.770274,-0.027098,0.011190,0.015456,
13791464,X:154929952:CAA:C,9999999,0.760570,-0.020494,0.011278,0.069202,
13791465,X:154930230:A:G,9999999,0.754113,-0.016347,0.010721,0.127334,


In [18]:
mysumstats.data

Unnamed: 0,SNPID,CHR,POS,EA,NEA,STATUS,EAF,BETA,SE,P,minor_allele,low_confidence_variant,n_complete_samples,AC,ytx,tstat
0,1:69487:G:A,1,69487,A,G,9960099,0.000006,-2.715450,2.360060,0.249902,A,True,359983,4.15294,103.515,-1.150590
1,1:69569:T:C,1,69569,C,T,9960099,0.000188,-0.484284,0.423462,0.252778,C,True,359983,135.27800,3644.560,-1.143630
2,1:139853:C:T,1,139853,T,C,9960099,0.000006,-2.703560,2.360130,0.251997,T,True,359983,4.09020,101.840,-1.145510
3,1:692794:CA:C,1,692794,C,CA,9960399,0.110590,-0.016436,0.019585,0.401342,C,False,359983,79621.10000,2179000.000,-0.839228
4,1:693731:A:G,1,693731,G,A,9960099,0.115767,-0.004255,0.018507,0.818155,G,False,359983,83348.00000,2281760.000,-0.229918
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13779880,X:154929412:C:T,23,154929412,T,C,9960099,0.245473,-0.016260,0.010723,0.129427,T,False,359983,176732.00000,4837480.000,-1.516370
13779881,X:154929637:CT:C,23,154929637,C,CT,9960399,0.229726,-0.027098,0.011190,0.015456,C,False,359983,165395.00000,4525550.000,-2.421510
13779882,X:154929952:CAA:C,23,154929952,C,CAA,9960399,0.239430,-0.020494,0.011278,0.069202,C,False,359983,172381.00000,4717910.000,-1.817100
13779883,X:154930230:A:G,23,154930230,G,A,9960099,0.245887,-0.016347,0.010721,0.127334,G,False,359983,177030.00000,4845680.000,-1.524700


In [19]:
! wget https://s3-us-west-2.amazonaws.com/human-pangenomics/T2T/CHM13/assemblies/chain/v1_nflo/grch38-chm13v2.chain

--2026-01-10 10:39:53--  https://s3-us-west-2.amazonaws.com/human-pangenomics/T2T/CHM13/assemblies/chain/v1_nflo/grch38-chm13v2.chain
Resolving s3-us-west-2.amazonaws.com (s3-us-west-2.amazonaws.com)... 3.5.79.179, 52.218.246.64, 52.92.240.144, ...
Connecting to s3-us-west-2.amazonaws.com (s3-us-west-2.amazonaws.com)|3.5.79.179|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 6288201 (6.0M) [binary/octet-stream]
Saving to: ‘grch38-chm13v2.chain’


2026-01-10 10:39:55 (3.74 MB/s) - ‘grch38-chm13v2.chain’ saved [6288201/6288201]



In [20]:
mysumstats.data

Unnamed: 0,SNPID,CHR,POS,EA,NEA,STATUS,EAF,BETA,SE,P,minor_allele,low_confidence_variant,n_complete_samples,AC,ytx,tstat
0,1:69487:G:A,1,69487,A,G,9960099,0.000006,-2.715450,2.360060,0.249902,A,True,359983,4.15294,103.515,-1.150590
1,1:69569:T:C,1,69569,C,T,9960099,0.000188,-0.484284,0.423462,0.252778,C,True,359983,135.27800,3644.560,-1.143630
2,1:139853:C:T,1,139853,T,C,9960099,0.000006,-2.703560,2.360130,0.251997,T,True,359983,4.09020,101.840,-1.145510
3,1:692794:CA:C,1,692794,C,CA,9960399,0.110590,-0.016436,0.019585,0.401342,C,False,359983,79621.10000,2179000.000,-0.839228
4,1:693731:A:G,1,693731,G,A,9960099,0.115767,-0.004255,0.018507,0.818155,G,False,359983,83348.00000,2281760.000,-0.229918
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13779880,X:154929412:C:T,23,154929412,T,C,9960099,0.245473,-0.016260,0.010723,0.129427,T,False,359983,176732.00000,4837480.000,-1.516370
13779881,X:154929637:CT:C,23,154929637,C,CT,9960399,0.229726,-0.027098,0.011190,0.015456,C,False,359983,165395.00000,4525550.000,-2.421510
13779882,X:154929952:CAA:C,23,154929952,C,CAA,9960399,0.239430,-0.020494,0.011278,0.069202,C,False,359983,172381.00000,4717910.000,-1.817100
13779883,X:154930230:A:G,23,154930230,G,A,9960099,0.245887,-0.016347,0.010721,0.127334,G,False,359983,177030.00000,4845680.000,-1.524700


In [21]:
mysumstats.liftover(from_build="38", to_build="13",chain_path="./grch38-chm13v2.chain")

2026/01/10 10:40:08 Start to perform liftover ...(v4.0.4)
2026/01/10 10:40:08  -Using provided chain file: ./grch38-chm13v2.chain
2026/01/10 10:40:10  -Converting variants with status code xxx0xxx: 13,779,885
2026/01/10 10:40:11  -Target build: 13
2026/01/10 10:40:11  -Input positions are 1-based
2026/01/10 10:40:11  -Output positions will be 1-based
2026/01/10 10:41:41  -Mapped: 13640038 variants
2026/01/10 10:41:41  -Unmapped: 139847 variants
2026/01/10 10:41:41  -Examples of unmapped variants:
2026/01/10 10:41:41    SNPID=1:69487:G:A | CHR=1 | POS=69487 | STATUS=9960099
2026/01/10 10:41:41    SNPID=1:69569:T:C | CHR=1 | POS=69569 | STATUS=9960099
2026/01/10 10:41:41    SNPID=1:139853:C:T | CHR=1 | POS=139853 | STATUS=9960099
2026/01/10 10:41:41    SNPID=1:905017:T:C | CHR=1 | POS=905017 | STATUS=9960099
2026/01/10 10:41:41    SNPID=1:909221:T:C | CHR=1 | POS=909221 | STATUS=9960099
2026/01/10 10:41:49  -Removed 139847 unmapped variants
2026/01/10 10:41:49 Start to fix chromosome not

In [None]:
mysumstats.data