In [2]:
import gwaslab as gl
import pandas as pd


In [4]:
# Load gzipped GWAS summary stats
df = pd.read_csv(
    "/mnt/hdd_1/ofgeha/galaxy-gwas-tools/Data/21001_raw.gwas.imputed_v3.both_sexes.tsv.bgz",
    sep="\t",
    compression="gzip"
)

# Split variant into CHR, POS, REF, ALT
df[['CHR', 'POS', 'REF', 'ALT']] = df['variant'].str.split(":", expand=True)
df['POS'] = df['POS'].astype(int)


In [13]:
df.columns

Index(['variant', 'minor_allele', 'minor_AF', 'low_confidence_variant',
       'n_complete_samples', 'AC', 'ytx', 'beta', 'se', 'tstat', 'pval', 'CHR',
       'POS', 'REF', 'ALT'],
      dtype='object')

In [None]:
# 1. Create a copy and rename columns to GWASLab standard names
df_standard = df.rename(columns={
    'variant': 'SNPID',
    'CHR': 'CHR',
    'POS': 'POS',
    'ALT': 'EA',
    'REF': 'NEA',
    'minor_AF': 'EAF',  # Map your frequency directly to EAF
    'beta': 'BETA',
    'se': 'SE',
    'pval': 'P'
})

In [None]:
# 2. Load the DataFrame without specifying column mappings 
# (GWASLab will recognize SNPID, CHR, POS, EA, NEA, EAF, BETA, SE, P automatically)
mysumstats = gl.Sumstats(df_standard, 
                         nrows=5000, 
                         verbose=True)

mysumstats.basic_check(verbose=True)

2026/01/10 10:32:37 GWASLab v4.0.4 https://cloufield.github.io/gwaslab/
2026/01/10 10:32:37 (C) 2022-2026, Yunye He, Kamatani Lab, GPL-3.0 license, gwaslab@gmail.com
2026/01/10 10:32:37 Python version: 3.12.3 (main, Nov  6 2025, 13:44:16) [GCC 13.3.0]
2026/01/10 10:32:37 Start to initialize gl.Sumstats from pandas DataFrame ...
2026/01/10 10:32:38  -Reading columns          : 
2026/01/10 10:32:38  -Renaming columns to      : 
2026/01/10 10:32:38  -Current Dataframe shape : 13791467  x  15
2026/01/10 10:32:38  -Initiating a status column: STATUS ...
2026/01/10 10:32:41 Start to reorder the columns ...(v4.0.4)
2026/01/10 10:32:41  -Reordering columns to    : SNPID,CHR,POS,EA,NEA,STATUS,EAF,BETA,SE,P,minor_allele,low_confidence_variant,n_complete_samples,AC,ytx,tstat
2026/01/10 10:32:41 Finished reordering the columns.
2026/01/10 10:32:42  -Trying to convert datatype for CHR: object -> Int64...Failed...
2026/01/10 10:32:45  -Column  : SNPID  CHR    POS   EA       NEA      STATUS EAF     B

In [12]:
mysumstats = gl.Sumstats(df,
             snpid="SNP",
             chrom="CHR",
             pos="POS",
             ea="ALT",
             nea="REF",
             neaf="Frq",
             beta="BETA",
             se="SE",
             p="P",
             nrows=5000,
             verbose=False)
mysumstats.basic_check(verbose=False)

KeyError: 'EAF'

In [11]:
mysumstats.data

Unnamed: 0,NEA,STATUS,EAF,BETA,SE,P,SNPID
0,1:15791:C:T,9999999,1.000000,894.616000,1204.870000,0.457786,
1,1:69487:G:A,9999999,0.999994,-2.715450,2.360060,0.249902,
2,1:69569:T:C,9999999,0.999812,-0.484284,0.423462,0.252778,
3,1:139853:C:T,9999999,0.999994,-2.703560,2.360130,0.251997,
4,1:692794:CA:C,9999999,0.889410,-0.016436,0.019585,0.401342,
...,...,...,...,...,...,...,...
13791462,X:154929412:C:T,9999999,0.754527,-0.016260,0.010723,0.129427,
13791463,X:154929637:CT:C,9999999,0.770274,-0.027098,0.011190,0.015456,
13791464,X:154929952:CAA:C,9999999,0.760570,-0.020494,0.011278,0.069202,
13791465,X:154930230:A:G,9999999,0.754113,-0.016347,0.010721,0.127334,
