Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial script to convert hapmap to zarr #852

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions maintenance/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
import click
import black
import daiquiri
import msprime
import zarr

import stdpopsim
from . import ensembl
Expand Down Expand Up @@ -327,5 +329,40 @@ def add_species(ensembl_id, force):
writer.write_ensembl_release()


@cli.command()
@click.argument("species")
@click.argument("path", type=click.Path(exists=True, file_okay=False, dir_okay=True))
def add_recombination_map(species, path):
"""
Add a recombination map to the local recombination map repository.
"""
species = stdpopsim.get_species(species)
path = pathlib.Path(path)

with zarr.ZipStore("recomb_map.zip", mode="w") as store:
root = zarr.group(store=store)
for filename in path.glob("*.txt"):
chrom_id = filename.stem.split("_")[-1]
try:
species.genome.get_chromosome(chrom_id)
except ValueError:
logger.warning(f"Skipping {chrom_id}")
continue
logger.info(f"reading {chrom_id}")
recomb_map = msprime.RateMap.read_hapmap(filename)
# TODO check that the map has the same sequence length as the
# chromosome. Either error of do something about it.

chrom_group = root.create_group(chrom_id)
chrom_group.create_dataset("position", data=recomb_map.position)
chrom_group.create_dataset("rate", data=recomb_map.rate)

# TODO check that all the chromosomes are in here. If not, check
# if the corresponding chrom has a recombination rate of 0, and
# insert the 0 map accordingly.
# Probably also worth showing some information about the mean
# recombination rates, and how they compare to the catalog values?


def main():
cli()