Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
26 changed files
with
808 additions
and
671 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
"""Module to store variantplaner object.""" | ||
|
||
# std import | ||
from __future__ import annotations | ||
|
||
# 3rd party import | ||
# project import | ||
from variantplaner.objects.vcf import Vcf, VcfParsingBehavior |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
"""Declare Genotypes object.""" | ||
|
||
# std import | ||
from __future__ import annotations | ||
|
||
# 3rd party import | ||
import polars | ||
|
||
# project import | ||
|
||
|
||
class Annotations(polars.LazyFrame): | ||
"""Object to manage lazyframe as Annotations.""" | ||
|
||
def __init__(self): | ||
"""Initialize a Annotations object.""" | ||
self.lf = polars.LazyFrame(schema=Annotations.minimal_schema()) | ||
|
||
@classmethod | ||
def minimal_schema(cls) -> dict[str, type]: | ||
"""Get minimal schema of genotypes polars.LazyFrame.""" | ||
return { | ||
"id": polars.UInt64, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
"""Declare Vcf object.""" | ||
|
||
# std import | ||
from __future__ import annotations | ||
|
||
import re | ||
import typing | ||
|
||
# 3rd party import | ||
import polars | ||
|
||
# project import | ||
from variantplaner.objects.csv import Csv | ||
|
||
if typing.TYPE_CHECKING: | ||
import pathlib | ||
import sys | ||
|
||
from variantplaner.objects.csv import ScanCsv | ||
from variantplaner.objects.vcf_header import VcfHeader | ||
|
||
if sys.version_info >= (3, 11): | ||
from typing import Unpack | ||
else: | ||
from typing_extensions import Unpack | ||
|
||
|
||
class ContigsLength: | ||
"""Store contigs -> length information.""" | ||
|
||
def __init__(self): | ||
"""Initialise a contigs length.""" | ||
self.lf = polars.LazyFrame( | ||
schema={ | ||
"contig": polars.String, | ||
"length": polars.UInt64, | ||
"offset": polars.UInt64, | ||
} | ||
) | ||
|
||
def from_vcf_header(self, header: VcfHeader) -> int: | ||
"""Fill a object with VcfHeader. | ||
Argument: | ||
header: VcfHeader | ||
Returns: Number of contigs line view | ||
""" | ||
contigs_id = re.compile(r"ID=(?P<id>[^,]+)") | ||
contigs_len = re.compile(r"length=(?P<length>[^,>]+)") | ||
|
||
count = 0 | ||
contigs2len = {"contig": list(), "length": list()} | ||
for contig_line in header.contigs: | ||
if (len_match := contigs_len.search(contig_line)) and (id_match := contigs_id.search(contig_line)): | ||
contigs2len["contig"].append(id_match.groupdict()["id"]) | ||
contigs2len["length"].append(int(len_match.groupdict()["length"])) | ||
count += 1 | ||
|
||
self.lf = polars.LazyFrame(contigs2len, schema={"contig": polars.String, "length": polars.UInt64}) | ||
|
||
self.__compute_offset() | ||
|
||
return count | ||
|
||
def from_path(self, path: pathlib.Path, /, **scan_csv_args: Unpack[ScanCsv]) -> int: | ||
"""Fill object with file point by pathlib.Path. | ||
Argument: | ||
path: path of input file | ||
Returns: Number of contigs line view | ||
""" | ||
csv = Csv() | ||
csv.from_path(path, **scan_csv_args) | ||
self.lf = csv.lf | ||
|
||
self.__compute_offset() | ||
|
||
return self.lf.collect().shape[0] | ||
|
||
def __compute_offset(self): | ||
self.lf = self.lf.with_columns(offset=polars.col("length").cum_sum() - polars.col("length")) | ||
self.lf = self.lf.cast({"offset": polars.UInt64}) |
Oops, something went wrong.