Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions bio2zarr/vcf.py
Original file line number Diff line number Diff line change
Expand Up @@ -1544,8 +1544,13 @@ def init_array(self, variable):
object_codec=object_codec,
dimension_separator=self.dimension_separator,
)
# Dimension names are part of the spec in Zarr v3
a.attrs["_ARRAY_DIMENSIONS"] = variable.dimensions
a.attrs.update(
{
"description": variable.description,
# Dimension names are part of the spec in Zarr v3
"_ARRAY_DIMENSIONS": variable.dimensions,
}
)

def get_array(self, name):
return self.root["wip_" + name]
Expand Down
36 changes: 36 additions & 0 deletions tests/test_vcf.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,3 +319,39 @@ def test_check_overlap(regions):
]
with pytest.raises(ValueError, match="Multiple VCFs have the region"):
vcf.check_overlap(partitions)


class TestVcfDescriptions:
@pytest.mark.parametrize(
("field", "description"),
[
("variant_NS", "Number of Samples With Data"),
("variant_AN", "Total number of alleles in called genotypes"),
(
"variant_AC",
"Allele count in genotypes, for each ALT allele, "
"in the same order as listed",
),
("variant_DP", "Total Depth"),
("variant_AF", "Allele Frequency"),
("variant_AA", "Ancestral Allele"),
("variant_DB", "dbSNP membership, build 129"),
("variant_H2", "HapMap2 membership"),
("call_GQ", "Genotype Quality"),
("call_DP", "Read Depth"),
("call_HQ", "Haplotype Quality"),
],
)
def test_fields(self, schema, field, description):
assert schema["columns"][field]["description"] == description

# This information is not in the schema yet,
# https://github.com/sgkit-dev/bio2zarr/issues/123
# @pytest.mark.parametrize(
# ("filt", "description"),
# [
# ("s50","Less than 50% of samples have data"),
# ("q10", "Quality below 10"),
# ])
# def test_filters(self, schema, filt, description):
# assert schema["filters"][field]["description"] == description
23 changes: 23 additions & 0 deletions tests/test_vcf_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,29 @@ def test_vcf_dimensions(self, ds):
assert ds.variant_H2.dims == ("variants",)
assert ds.variant_position.dims == ("variants",)

@pytest.mark.parametrize(
("field", "description"),
[
("variant_NS", "Number of Samples With Data"),
("variant_AN", "Total number of alleles in called genotypes"),
(
"variant_AC",
"Allele count in genotypes, for each ALT allele, "
"in the same order as listed",
),
("variant_DP", "Total Depth"),
("variant_AF", "Allele Frequency"),
("variant_AA", "Ancestral Allele"),
("variant_DB", "dbSNP membership, build 129"),
("variant_H2", "HapMap2 membership"),
("call_GQ", "Genotype Quality"),
("call_DP", "Read Depth"),
("call_HQ", "Haplotype Quality"),
],
)
def test_vcf_field_description(self, ds, field, description):
assert ds[field].attrs["description"] == description


class Test1000G2020Example:
data_path = "tests/data/vcf/1kg_2020_chrM.vcf.gz"
Expand Down