In [1]:
import gwaslab as gl
import pandas as pd

In [2]:
# 1. Load your original UKBB dataset
# Nealelab UKBB v3 is in build 19
df = pd.read_csv(
    "/mnt/hdd_1/ofgeha/galaxy-gwas-tools/Data/21001_raw.gwas.imputed_v3.both_sexes.tsv.bgz",
    sep="\t",
    compression="gzip")

In [3]:
# Split variant into CHR, POS, REF, ALT
df[['CHR', 'POS', 'REF', 'ALT']] = df['variant'].str.split(":", expand=True)
df['POS'] = df['POS'].astype(int)

In [4]:
# Rename to GWASLab standards
df_standard = df.rename(columns={
    'variant': 'SNPID',
    'ALT': 'EA',
    'REF': 'NEA',
    'minor_AF': 'EAF',
    'beta': 'BETA',
    'se': 'SE',
    'pval': 'P'
})

In [5]:
# 2. Initialize as Build 19 (Essential!)
mysumstats = gl.Sumstats(df_standard, build="19")

2026/01/10 15:45:01 GWASLab v4.0.4 https://cloufield.github.io/gwaslab/
2026/01/10 15:45:01 (C) 2022-2026, Yunye He, Kamatani Lab, GPL-3.0 license, gwaslab@gmail.com
2026/01/10 15:45:01 Python version: 3.12.3 (main, Nov  6 2025, 13:44:16) [GCC 13.3.0]
2026/01/10 15:45:01 Start to initialize gl.Sumstats from pandas DataFrame ...
2026/01/10 15:45:02  -Reading columns          : 
2026/01/10 15:45:02  -Renaming columns to      : 
2026/01/10 15:45:02  -Current Dataframe shape : 13791467  x  15
2026/01/10 15:45:02  -Initiating a status column: STATUS ...
2026/01/10 15:45:02  -Genomic coordinates are based on GRCh37/hg19...
2026/01/10 15:45:05 Start to reorder the columns ...(v4.0.4)
2026/01/10 15:45:05  -Reordering columns to    : SNPID,CHR,POS,EA,NEA,STATUS,EAF,BETA,SE,P,minor_allele,low_confidence_variant,n_complete_samples,AC,ytx,tstat
2026/01/10 15:45:05 Finished reordering the columns.
2026/01/10 15:45:06  -Trying to convert datatype for CHR: object -> Int64...Failed...
2026/01/10 15:45

In [None]:
# 3. Step One: Liftover from hg19 to hg38
# GWASLab has built-in chains for 19 to 38
mysumstats.liftover(from_build="19", to_build="38")

2026/01/10 15:45:50 Start to perform liftover ...(v4.0.4)
2026/01/10 15:45:50  -Current Dataframe shape : 13791467 x 16 ; Memory usage: 1489.96 MB
2026/01/10 15:45:50 Start to fix chromosome notation (CHR) ...(v4.0.4)
2026/01/10 15:45:50  -Checking CHR data type...
2026/01/10 15:45:52  -Variants with standardized chromosome notation: 13364303
2026/01/10 15:45:53  -Variants with fixable chromosome notations: 427164
2026/01/10 15:45:53  -No unrecognized chromosome notations...
2026/01/10 15:45:53  -Identifying non-autosomal chromosomes : X, Y, and MT ...
2026/01/10 15:45:53  -Identified 427164 variants on sex chromosomes...
2026/01/10 15:45:53  -Standardizing first sex chromosome chromosome notations: X to 23...
