In [1]:
# Examples:
chroms = [22]                    # quick test
# chroms = list(range(1, 23))      # autosomes
# chroms = list(range(1, 24))      # 1–23
# chroms = None                    # FULL genome

# chroms = None   # <-- global run


In [2]:
import gzip

def filter_gwas_by_chrom(
    input_gwas,
    output_gwas,
    chroms=None,
    chrom_col=0
):
    """
    chroms: list[int] or None
    chrom_col: 0-based column index of chromosome
    """

    if chroms is None:
        # No filtering → copy file as-is
        with gzip.open(input_gwas, "rb") as fin, gzip.open(output_gwas, "wb") as fout:
            fout.write(fin.read())
        return "unfiltered"

    chroms = set(str(c) for c in chroms)

    with gzip.open(input_gwas, "rt") as fin, gzip.open(output_gwas, "wt") as fout:
        header = fin.readline()
        fout.write(header)

        for line in fin:
            if line.split("\t")[chrom_col] in chroms:
                fout.write(line)

    return f"filtered_chr_{'_'.join(sorted(chroms))}"


In [3]:
input_gwas = "/mnt/hdd_1/ofgeha/test/21001_raw.gwas.imputed_v3.both_sexes.tsv.gz"

if chroms is None:
    gwas_for_nf = input_gwas
    mode = "global"
else:
    gwas_for_nf = "/mnt/hdd_1/ofgeha/test/21001_raw_subset.tsv.gz"
    mode = filter_gwas_by_chrom(
        input_gwas,
        gwas_for_nf,
        chroms=chroms
    )

print("Run mode:", mode)
print("GWAS used:", gwas_for_nf)


Run mode: filtered_chr_22
GWAS used: /mnt/hdd_1/ofgeha/test/21001_raw_subset.tsv.gz


In [4]:
import subprocess

cmd = [
    "nextflow", "run", "EBISPOT/gwas-sumstats-harmoniser",
    "-r", "v1.1.10",
    "--ref", "/mnt/hdd_1/ofgeha/gwas-sumstats-harmoniser/gwas-ref",
    "--harm",
    "--file", gwas_for_nf,
    "-profile", "standard,singularity",
    "-resume"
]

print(" ".join(cmd))
subprocess.run(cmd, check=True)


nextflow run EBISPOT/gwas-sumstats-harmoniser -r v1.1.10 --ref /mnt/hdd_1/ofgeha/gwas-sumstats-harmoniser/gwas-ref --harm --file /mnt/hdd_1/ofgeha/test/21001_raw_subset.tsv.gz -profile standard,singularity -resume


[33mNextflow 25.10.3 is available - Please consider updating your version to it[m



 N E X T F L O W   ~  version 25.10.0

Launching `https://github.com/EBISPOT/gwas-sumstats-harmoniser` [agitated_brenner] DSL2 - revision: 436c17a91c [v1.1.10]

Start harmonising files
Harmonizing the file /mnt/hdd_1/ofgeha/test/21001_raw_subset.tsv.gz
[-        ] NFC…jor_direction:map_to_build -
[-        ] NFC…rection:ten_percent_counts -
[-        ] NFC…ion:ten_percent_counts_sum -
[-        ] NFC…ion:generate_strand_counts -

[-        ] NFC…jor_direction:map_to_build -
[-        ] NFC…rection:ten_percent_counts -
[-        ] NFC…ion:ten_percent_counts_sum -
[-        ] NFC…ion:generate_strand_counts -
[-        ] NFC…on:summarise_strand_counts -
[-        ] NFC…RM:main_harm:harmonization -
[-        ] NFC…arm:concatenate_chr_splits -
[-        ] NFC…LOGHARM:quality_control:qc -
[-        ] NFC…_control:harmonization_log -
[-        ] NFC…y_control:update_meta_yaml -
no,other setting
[chr1, chr12, chr15, chr13, chr19, chr18, chrY, chr14, chr7, chr16, chr3, chr8, chrX, chr5, chrMT,

CalledProcessError: Command '['nextflow', 'run', 'EBISPOT/gwas-sumstats-harmoniser', '-r', 'v1.1.10', '--ref', '/mnt/hdd_1/ofgeha/gwas-sumstats-harmoniser/gwas-ref', '--harm', '--file', '/mnt/hdd_1/ofgeha/test/21001_raw_subset.tsv.gz', '-profile', 'standard,singularity', '-resume']' returned non-zero exit status 1.

In [None]:
import subprocess
import os
from pathlib import Path

# Set your paths
input_file = "/mnt/hdd_1/ofgeha/test/21001_raw.gwas.imputed_v3.both_sexes.tsv.gz"
ref_dir = "/mnt/hdd_1/ofgeha/gwas-sumstats-harmoniser/gwas-ref"
output_dir = "./gwas_output"  # or wherever you want output

# Create command
command = [
    "nextflow", "run", "EBISPOT/gwas-sumstats-harmoniser",
    "-r", "v1.1.10",
    "--ref", ref_dir,
    "--harm",
    "--file", input_file,
    "--chromlist", "22,",
    "--terminate_error", "ignore",
    "-profile", "standard,singularity",
    "-resume"
]

# Run the command
print("Running Nextflow pipeline...")
result = subprocess.run(command, capture_output=True, text=True)

# Print output
print("STDOUT:", result.stdout)
print("STDERR:", result.stderr)
print("Return code:", result.returncode)

In [None]:
%%bash
cd /mnt/hdd_1/ofgeha/gwas-sumstats-harmoniser

nextflow run EBISPOT/gwas-sumstats-harmoniser \
  -r v1.1.10 \
  --ref /mnt/hdd_1/ofgeha/gwas-sumstats-harmoniser/gwas-ref \
  --harm \
  --file /mnt/hdd_1/ofgeha/test/21001_raw.gwas.imputed_v3.both_sexes.tsv.gz \
  --chromlist '22,' \
  --terminate_error ignore \
  -profile standard,singularity \
  -resume

In [None]:
import subprocess
import sys
import time
from IPython.display import clear_output

def run_nextflow_pipeline():
    """Run Nextflow pipeline with real-time output"""
    
    cmd = [
        "nextflow", "run", "EBISPOT/gwas-sumstats-harmoniser",
        "-r", "v1.1.10",
        "--ref", "/mnt/hdd_1/ofgeha/gwas-sumstats-harmoniser/gwas-ref",
        "--harm",
        "--file", "/mnt/hdd_1/ofgeha/test/21001_raw.gwas.imputed_v3.both_sexes.tsv.gz",
        "--chromlist", "16,",
        "--terminate_error", "ignore",
        "-profile", "standard,singularity",
        "-resume"
    ]
    
    print(f"Running command: {' '.join(cmd)}")
    print("="*60)
    
    # Run with real-time output
    process = subprocess.Popen(
        cmd,
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
        text=True,
        bufsize=1,
        universal_newlines=True
    )
    
    # Print output in real-time
    while True:
        output = process.stdout.readline()
        if output == '' and process.poll() is not None:
            break
        if output:
            print(output.strip())
    
    return process.poll()

# Run the pipeline
exit_code = run_nextflow_pipeline()
print(f"\nPipeline finished with exit code: {exit_code}")