Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
17 changed files
with
790 additions
and
26 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
"""Module to store variantplaner object.""" | ||
|
||
# std import | ||
from __future__ import annotations | ||
|
||
# 3rd party import | ||
# project import | ||
from variantplaner.objects.vcf import Vcf, VcfParsingBehavior |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
"""Declare Genotypes object.""" | ||
|
||
# std import | ||
from __future__ import annotations | ||
|
||
# 3rd party import | ||
import polars | ||
|
||
# project import | ||
|
||
|
||
class Annotations(polars.LazyFrame): | ||
"""Object to manage lazyframe as Annotations.""" | ||
|
||
def __init__(self): | ||
"""Initialize a Annotations object.""" | ||
super(polars.LazyFrame, self)(schema=Annotations.minimal_schema()) | ||
|
||
@classmethod | ||
def minimal_schema(cls) -> dict[str, type]: | ||
"""Get minimal schema of genotypes polars.LazyFrame.""" | ||
return { | ||
"id": polars.UInt64, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
"""Declare Vcf object.""" | ||
|
||
# std import | ||
from __future__ import annotations | ||
|
||
import re | ||
import typing | ||
|
||
# 3rd party import | ||
import polars | ||
|
||
# project import | ||
from variantplaner.objects.csv import Csv | ||
|
||
if typing.TYPE_CHECKING: | ||
import pathlib | ||
import sys | ||
|
||
from variantplaner.objects.csv import ScanCsv | ||
from variantplaner.objects.vcf_header import VcfHeader | ||
|
||
if sys.version_info >= (3, 11): | ||
from typing import Unpack | ||
else: | ||
from typing_extensions import Unpack | ||
|
||
|
||
class ContigsLength: | ||
"""Store contigs -> length information.""" | ||
|
||
def __init__(self): | ||
"""Initialise a contigs length.""" | ||
self.lf = polars.LazyFrame( | ||
schema={ | ||
"contig": polars.String, | ||
"length": polars.UInt64, | ||
"offset": polars.UInt64, | ||
} | ||
) | ||
|
||
def from_vcf_header(self, header: VcfHeader) -> int: | ||
"""Fill a object with VcfHeader. | ||
Argument: | ||
header: VcfHeader | ||
Returns: Number of contigs line view | ||
""" | ||
contigs_id = re.compile(r"ID=(?P<id>[^,]+)") | ||
contigs_len = re.compile(r"length=(?P<length>[^,>]+)") | ||
|
||
count = 0 | ||
contigs2len = {"contig": list(), "length": list()} | ||
for contig_line in header.contigs: | ||
if (len_match := contigs_len.search(contig_line)) and (id_match := contigs_id.search(contig_line)): | ||
contigs2len["contig"].append(id_match.groupdict()["id"]) | ||
contigs2len["length"].append(int(len_match.groupdict()["length"])) | ||
count += 1 | ||
|
||
self.lf = polars.LazyFrame(contigs2len, schema={"contig": polars.String, "length": polars.UInt64}) | ||
self.lf = self.lf.with_columns(offset=polars.col("length").cum_sum() - polars.col("length")) | ||
|
||
return count | ||
|
||
def from_path(self, path: pathlib.Path, /, **scan_csv_args: Unpack[ScanCsv]) -> int: | ||
"""Fill object with file point by pathlib.Path. | ||
Argument: | ||
path: path of input file | ||
Returns: Number of contigs line view | ||
""" | ||
csv = Csv() | ||
csv.from_path(path, **scan_csv_args) | ||
count = super().collect().shape[0] | ||
self.lf = csv.with_columns(offset=polars.col("length").cum_sum() - polars.col("length")) | ||
|
||
return count |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
"""Declare CSV object.""" | ||
|
||
# std import | ||
from __future__ import annotations | ||
|
||
import dataclasses | ||
import typing | ||
|
||
# 3rd party import | ||
import polars | ||
|
||
# project import | ||
from variantplaner.exception import NotAVariantCsvError | ||
|
||
if typing.TYPE_CHECKING: # pragma: no cover | ||
import pathlib | ||
import sys | ||
from collections.abc import Sequence | ||
|
||
if sys.version_info >= (3, 11): | ||
from typing import Unpack | ||
else: | ||
from typing_extensions import Unpack | ||
|
||
class ScanCsv(typing.TypedDict, total=False): | ||
"""A struct to check type of parameter give to [polars.scan_csv][].""" | ||
|
||
has_header: bool | ||
separator: str | ||
comment_prefix: str | None | ||
quote_char: str | None | ||
skip_rows: int | ||
dtypes: polars.type_aliases.SchemaDict | Sequence[polars.type_aliases.PolarsDataType] | None | ||
null_values: str | Sequence[str] | dict[str, str] | None | ||
missing_utf8_is_empty_string: bool | ||
ignore_errors: bool | ||
cache: bool | ||
with_column_names: typing.Callable[[list[str]], list[str]] | None | ||
infer_schema_length: int | None | ||
n_rows: int | None | ||
encoding: polars.type_aliases.CsvEncoding | ||
low_memory: bool | ||
rechunk: bool | ||
skip_rows_after_header: int | ||
row_index_name: str | None | ||
row_index_offset: int | ||
try_parse_dates: bool | ||
eol_char: str | ||
new_columns: Sequence[str] | None | ||
|
||
|
||
@dataclasses.dataclass | ||
class ColRename: | ||
"""A struct to store rename parameter.""" | ||
|
||
chr: str = "chr" | ||
ref: str = "ref" | ||
alt: str = "alt" | ||
other: dict[str, str] = dataclasses.field(default_factory=dict) | ||
|
||
|
||
class Csv(polars.LazyFrame): | ||
"""Object to manage lazyframe as Csv.""" | ||
|
||
def __init__(self): | ||
"""Initialize a Csv object.""" | ||
super().__init__() | ||
|
||
def from_path(self, path: pathlib.Path, /, **scan_csv_args: Unpack[ScanCsv]) -> None: | ||
"""Populate Csv obejct with csv file content.""" | ||
self = typing.cast(Csv, polars.scan_csv(path, **scan_csv_args)) # noqa: F841 | ||
|
||
def variants_from_path(self, path: pathlib.Path, col_rename: ColRename, /, **scan_csv_args: Unpack[ScanCsv]) -> None: | ||
"""Populate Csv object with csv file.""" | ||
self.from_path(path, **scan_csv_args) | ||
|
||
self = super().rename(dataclasses.asdict(col_rename)) | ||
|
||
if any(elt not in super().columns for elt in ["chr", "pos", "ref", "alt"]): | ||
raise NotAVariantCsvError(path) | ||
|
||
self = super().cast({"pos": polars.UInt64}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
"""Declare Genotypes object.""" | ||
|
||
# std import | ||
from __future__ import annotations | ||
|
||
# 3rd party import | ||
import polars | ||
|
||
# project import | ||
|
||
|
||
class Genotypes(polars.LazyFrame): | ||
"""Object to manage lazyframe as Genotypes.""" | ||
|
||
def __init__(self): | ||
"""Initialize a Genotypes object.""" | ||
super(polars.LazyFrame, self)(schema=Genotypes.minimal_schema()) | ||
|
||
@classmethod | ||
def minimal_schema(cls) -> dict[str, type]: | ||
"""Get minimal schema of genotypes polars.LazyFrame.""" | ||
return { | ||
"id": polars.UInt64, | ||
"samples": polars.String, | ||
} |
Oops, something went wrong.