Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions bio2zarr/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import numcodecs
import tabulate

from . import plink, provenance, vcf, vcf_utils
from . import icf, plink, provenance, vcf, vcf_utils

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -167,7 +167,7 @@ def check_overwrite_dir(path, force):
def get_compressor(cname):
if cname is None:
return None
config = vcf.ICF_DEFAULT_COMPRESSOR.get_config()
config = icf.ICF_DEFAULT_COMPRESSOR.get_config()
config["cname"] = cname
return numcodecs.get_codec(config)

Expand Down Expand Up @@ -198,7 +198,7 @@ def explode(
"""
setup_logging(verbose)
check_overwrite_dir(icf_path, force)
vcf.explode(
icf.explode(
icf_path,
vcfs,
worker_processes=worker_processes,
Expand Down Expand Up @@ -235,7 +235,7 @@ def dexplode_init(
"""
setup_logging(verbose)
check_overwrite_dir(icf_path, force)
work_summary = vcf.explode_init(
work_summary = icf.explode_init(
icf_path,
vcfs,
target_num_partitions=num_partitions,
Expand Down Expand Up @@ -263,7 +263,7 @@ def dexplode_partition(icf_path, partition, verbose, one_based):
setup_logging(verbose)
if one_based:
partition -= 1
vcf.explode_partition(icf_path, partition)
icf.explode_partition(icf_path, partition)


@click.command
Expand All @@ -274,7 +274,7 @@ def dexplode_finalise(icf_path, verbose):
Final step for distributed conversion of VCF(s) to intermediate columnar format.
"""
setup_logging(verbose)
vcf.explode_finalise(icf_path)
icf.explode_finalise(icf_path)


@click.command
Expand Down
18 changes: 18 additions & 0 deletions bio2zarr/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import numpy as np

INT_MISSING = -1
INT_FILL = -2
STR_MISSING = "."
STR_FILL = ""

FLOAT32_MISSING, FLOAT32_FILL = np.array([0x7F800001, 0x7F800002], dtype=np.int32).view(
np.float32
)
FLOAT32_MISSING_AS_INT32, FLOAT32_FILL_AS_INT32 = np.array(
[0x7F800001, 0x7F800002], dtype=np.int32
)


MIN_INT_VALUE = np.iinfo(np.int32).min + 2
VCF_INT_MISSING = np.iinfo(np.int32).min
VCF_INT_FILL = np.iinfo(np.int32).min + 1
22 changes: 22 additions & 0 deletions bio2zarr/core.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
import concurrent.futures as cf
import contextlib
import dataclasses
import json
import logging
import math
import multiprocessing
import os
import os.path
import threading
import time

import humanfriendly
import numcodecs
import numpy as np
import tqdm
Expand All @@ -18,6 +21,17 @@
numcodecs.blosc.use_threads = False


def display_number(x):
ret = "n/a"
if math.isfinite(x):
ret = f"{x: 0.2g}"
return ret


def display_size(n):
return humanfriendly.format_size(n, binary=True)


def min_int_dtype(min_value, max_value):
if min_value > max_value:
raise ValueError("min_value must be <= max_value")
Expand Down Expand Up @@ -277,3 +291,11 @@ def __exit__(self, exc_type, exc_val, exc_tb):
self._update_progress()
self.progress_bar.close()
return False


class JsonDataclass:
def asdict(self):
return dataclasses.asdict(self)

def asjson(self):
return json.dumps(self.asdict(), indent=4)
Loading