Skip to content

Commit

Permalink
[WIP]
Browse files Browse the repository at this point in the history
  • Loading branch information
natir committed Dec 20, 2023
1 parent c6445bd commit 3723e9b
Show file tree
Hide file tree
Showing 6 changed files with 181 additions and 58 deletions.
15 changes: 13 additions & 2 deletions src/variantplaner/cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,16 @@ def parser_process(value: typing.Any, state: typing.Any) -> typing.Any:
done = True
if not done:
value.append(state.rargs.pop(0))
print(len(value))
if len(value) == 1:
print(value)
value = value[0]
print(value)
else:
value = tuple(value)
self.nargs = len(value)

value = tuple(value)
self.nargs = len(value)
print(value)

# call the actual process
self._previous_parser_process(value, state)
Expand Down Expand Up @@ -90,3 +97,7 @@ def main(ctx: click.Context, *, threads: int = 1, verbose: int = 0, debug_info:


# module import required after main definition
from variantplaner.cli.metadata import *
from variantplaner.cli.struct import *
from variantplaner.cli.transmission import *
from variantplaner.cli.vcf2parquet import *
85 changes: 85 additions & 0 deletions src/variantplaner/cli/csv2parquet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
"""Module contains csv2parquet subcommand entry point function."""

# std import
from __future__ import annotations

import logging
import pathlib
import sys

# 3rd party import
import click

# project import
from variantplaner import cli, exception, extract, io


@cli.main.group("csv2parquet", chain=True)
@click.pass_context
@click.option(
"-i",
"--input-path",
help="Path to vcf input file",
type=click.Path(exists=True, dir_okay=False, readable=True, allow_dash=True, path_type=pathlib.Path),
required=True,
)
@click.option(
"-c",
"--chrom2length-path",
help="CSV file that associates a chromosome name with its size",
type=click.Path(dir_okay=False, writable=True, path_type=pathlib.Path),
)
@click.option(
"-a",
"--append",
help="Switch in append mode",
type=bool,
is_flag=True,
)
@click.option(
"-C",
"--chrom-col",
type=str,
default="chr",
show_default=True,
)
@click.option(
"-p",f
"--position-col",
type=str,
default="pos",
show_default=True,
)
@click.option(
"-r",
"--ref-col",
type=str,
default="ref",
show_default=True,
)
@click.option(
"-a",
"--alt-col",
type=str,
default="alt",
show_default=True,
)
@click.option(
"-s",
"--separator",
type=str,
default=","
show_default=True,
)
def csv2parquet(
ctx: click.Context,
input_path: pathlib.Path,
chrom2length_path: pathlib.Path,
*,
append: bool,
chrom_col: str | None,
pos_col: str | None,
ref_col: str | None,
alt_col: str | None,
separator_col: str | None,
)
2 changes: 1 addition & 1 deletion src/variantplaner/cli/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
"-t",
"--input-type",
help="Type of input file",
type=click.Choices(["csv", "tsv", "ljson", "json"]),
type=click.Choice(["csv", "tsv", "ljson", "json"]),
required=True,
)
def metadata(
Expand Down
44 changes: 33 additions & 11 deletions src/variantplaner/cli/vcf2parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def vcf2parquet(

try:
logger.debug("Start extract header")
header = io.vcf.extract_header(input_path)
headers = io.vcf.extract_header(input_path)
logger.debug("End extract header")
except exception.NotAVCFError:
logger.exception("")
Expand All @@ -65,7 +65,7 @@ def vcf2parquet(
ctx.obj["vcf_path"] = input_path
ctx.obj["lazyframe"] = lf
ctx.obj["append"] = append
ctx.obj["header"] = header
ctx.obj["headers"] = headers


@vcf2parquet.command("variants")
Expand All @@ -84,8 +84,6 @@ def variants(
"""Write variants."""
logger = logging.getLogger("vcf2parquet.variants")

ctx.ensure_object(dict)

lf = ctx.obj["lazyframe"]
append = ctx.obj["append"] # noqa: F841 not used now

Expand Down Expand Up @@ -144,16 +142,15 @@ def genotypes(
@click.option(
"-o",
"--output-path",
help="Path where variants will be written",
help="Path where genotypes will be written",
type=click.Path(writable=True, path_type=pathlib.Path),
required=True,
)
@click.pass_context
@click.option(
"-i",
"--info",
multiple=True,
help="List of info fields that are kept if this list is empty all fields are kept only the first vcf file header is read",
cls=cli.MultipleValueOption,
type=str,
)
@click.option(
Expand All @@ -171,17 +168,15 @@ def annotations(
"""Write annotations."""
logger = logging.getLogger("vcf2parquet.annotations")

ctx.ensure_object(dict)

lf = ctx.obj["lazyframe"]
append = ctx.obj["append"] # noqa: F841 not used now
header = ctx.obj["header"]
headers = ctx.obj["headers"]
input_path = ctx.obj["vcf_path"]

logger.debug(f"parameter: {output_path=}")

logger.info("Start extract annotations")
annotations_lf = lf.with_columns(io.vcf.info2expr(header, input_path, info))
annotations_lf = lf.with_columns(io.vcf.info2expr(headers, input_path, info))
annotations_lf = annotations_lf.drop(["chr", "pos", "ref", "alt", "filter", "qual", "info"])
if rename_id:
logger.info(f"Rename vcf variant id in {rename_id}")
Expand All @@ -191,3 +186,30 @@ def annotations(
logger.info(f"Start write annotations in {output_path}")
annotations_lf.sink_parquet(output_path, maintain_order=False)
logger.info(f"End write annotations in {output_path}")


@vcf2parquet.command("headers")
@click.pass_context
@click.option(
"-o",
"--output-path",
help="Path where header will be written",
type=click.Path(writable=True, path_type=pathlib.Path),
required=True,
)
def headers(
ctx: click.Context,
output_path: pathlib.Path,
) -> None:
"""Write vcf headers."""
logger = logging.getLogger("vcf2parquet.headers")

headers = ctx.obj["headers"]

logger.debug(f"parameter: {output_path=}")

logger.info(f"Start write headers in {output_path}")
with open(output_path, "w") as fh_out:
for line in headers:
print(line, file=fh_out)
logger.info(f"End write headers in {output_path}")
2 changes: 1 addition & 1 deletion src/variantplaner/io/vcf.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@ def into_lazyframe(

if extension == IntoLazyFrameExtension.MANAGE_SV:
drop_column = {"SVTYPE", "SVLEN"}
lf = lf.collect().select([col for col in lf.columns if col not in drop_column]).lazy()
lf = lf.select([col for col in lf.columns if col not in drop_column]).collect().lazy()

return lf

Expand Down
Loading

0 comments on commit 3723e9b

Please sign in to comment.