In [34]:
# Examples:
# chroms = [22]                    # quick test
# chroms = list(range(1, 23))      # autosomes
# chroms = list(range(1, 24))      # 1–23
# chroms = None                    # FULL genome

chroms = None   # <-- global run


In [35]:
import gzip

def filter_gwas_by_chrom(
    input_gwas,
    output_gwas,
    chroms=None,
    chrom_col=0
):
    """
    chroms: list[int] or None
    chrom_col: 0-based column index of chromosome
    """

    if chroms is None:
        # No filtering → copy file as-is
        with gzip.open(input_gwas, "rb") as fin, gzip.open(output_gwas, "wb") as fout:
            fout.write(fin.read())
        return "unfiltered"

    chroms = set(str(c) for c in chroms)

    with gzip.open(input_gwas, "rt") as fin, gzip.open(output_gwas, "wt") as fout:
        header = fin.readline()
        fout.write(header)

        for line in fin:
            if line.split("\t")[chrom_col] in chroms:
                fout.write(line)

    return f"filtered_chr_{'_'.join(sorted(chroms))}"


In [36]:
input_gwas = "/mnt/hdd_1/ofgeha/test/21001_raw.gwas.imputed_v3.both_sexes.tsv.gz"

if chroms is None:
    gwas_for_nf = input_gwas
    mode = "global"
else:
    gwas_for_nf = "/mnt/hdd_1/ofgeha/test/21001_raw_subset.tsv.gz"
    mode = filter_gwas_by_chrom(
        input_gwas,
        gwas_for_nf,
        chroms=chroms
    )

print("Run mode:", mode)
print("GWAS used:", gwas_for_nf)


Run mode: global
GWAS used: /mnt/hdd_1/ofgeha/test/21001_raw.gwas.imputed_v3.both_sexes.tsv.gz


In [None]:
import subprocess

cmd = [
    "nextflow", "run", "EBISPOT/gwas-sumstats-harmoniser",
    "-r", "v1.1.10",
    "--ref", "/mnt/hdd_1/ofgeha/gwas-sumstats-harmoniser/gwas-ref",
    "--harm",
    "--file", input_gwas,
    "--chromlist", "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,",
    "--terminate_error", "ignore",
    "-profile", "standard,singularity",
    "-resume"
]


print(" ".join(cmd))
subprocess.run(cmd, check=True)


nextflow run EBISPOT/gwas-sumstats-harmoniser -r v1.1.10 --ref /mnt/hdd_1/ofgeha/gwas-sumstats-harmoniser/gwas-ref --harm --file /mnt/hdd_1/ofgeha/test/21001_raw.gwas.imputed_v3.both_sexes.tsv.gz --chromlist 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22, --terminate_error ignore -profile standard,singularity -resume


[33mNextflow 25.10.3 is available - Please consider updating your version to it[m



 N E X T F L O W   ~  version 25.10.0

Launching `https://github.com/EBISPOT/gwas-sumstats-harmoniser` [grave_heyrovsky] DSL2 - revision: 436c17a91c [v1.1.10]

Start harmonising files
Harmonizing the file /mnt/hdd_1/ofgeha/test/21001_raw.gwas.imputed_v3.both_sexes.tsv.gz
[-        ] NFC…jor_direction:map_to_build -
[-        ] NFC…rection:ten_percent_counts -
[-        ] NFC…ion:ten_percent_counts_sum -
[-        ] NFC…ion:generate_strand_counts -
[-        ] NFC…on:summarise_strand_counts -
[-        ] NFC…RM:main_harm:harmonization -

[-        ] NFC…jor_direction:map_to_build | 0 of 1
[-        ] NFC…rection:ten_percent_counts -
[-        ] NFC…ion:ten_percent_counts_sum -
[-        ] NFC…ion:generate_strand_counts -
[-        ] NFC…on:summarise_strand_counts -
[-        ] NFC…RM:main_harm:harmonization -
[-        ] NFC…arm:concatenate_chr_splits -
[-        ] NFC…LOGHARM:quality_control:qc -
[-        ] NFC…_control:harmonization_log -
[-        ] NFC…y_control:update_meta_yaml -

In [6]:
import pandas as pd
df = pd.read_csv("/mnt/hdd_1/ofgeha/galaxy-gwas-tools/Gwaslab/21001_raw/final/21001_raw.h.tsv.gz")

In [None]:
df.shape