In [5]:
sys.stderr = open(snakemake.log[0], "w")
import pandas as pd
from pathlib import Path
import pysam

min_fraction = snakemake.params.get("min_fraction", 0.01)

quant = pd.read_csv(Path(snakemake.input.quant) / "abundance.tsv", sep="\t")

# calculate total number of reads
with pysam.FastxFile(snakemake.input.fq1) as fq1:
    total_counts = sum(1 for _ in fq1)

# calculate fraction
quant["fraction"] = quant["est_counts"] / total_counts

# clean up dataframe
quant = quant[["target_id", "fraction", "est_counts"]]

# summarize noise
other = quant.loc[quant["fraction"] < min_fraction, ["fraction", "est_counts"]].sum()
other["target_id"] = "other"
other.name = "other"

# filter dataframe and add noise row
quant = quant.loc[quant["fraction"] >= min_fraction].append(other)

# add unmapped row
quant = quant.append(
    {
        "target_id": "unmapped",
        "fraction": 1.0 - quant["fraction"].sum(),
        "est_counts": total_counts - quant["est_counts"].sum(),
    },
    ignore_index=True,
)

quant = quant.set_index("target_id", drop=True)

# store results
quant.to_csv(snakemake.output[0], sep="\t")