In [None]:
import bibtexparser


def get_year_from_bibtex(bibtex: str) -> int | None:
    """Fetches the year attribute from the given BibTex entry. Returns None if no year can be found."""
    parsed_bibtex = bibtexparser.parse_string(bibtex)

    if not parsed_bibtex.entries:
        return None

    if not (
        year := next(
            (
                entry
                for key, entry in parsed_bibtex.entries[0].fields_dict.items()
                if key.lower() == "year"
            ),
            None,
        )
    ):
        return None

    if not year.value.isdigit():
        return None  # this also catches negative years

    return int(year.value)

In [None]:
from phylodata.process.paper.bibtex import get_year_from_bibtex


def generate_id(title: str, authors: list[str], bibtex: str) -> str:
    author = authors[0]
    title_excerpt = next((word for word in title.split() if word not in STOP_WORDS), "")
    year = get_year_from_bibtex(bibtex) or ""
    return f"{author}{year}{title_excerpt}"


STOP_WORDS = [
    "i",
    "me",
    "my",
    "myself",
    "we",
    "our",
    "ours",
    "ourselves",
    "you",
    "your",
    "yours",
    "yourself",
    "yourselves",
    "he",
    "him",
    "his",
    "himself",
    "she",
    "her",
    "hers",
    "herself",
    "it",
    "its",
    "itself",
    "they",
    "them",
    "their",
    "theirs",
    "themselves",
    "what",
    "which",
    "who",
    "whom",
    "this",
    "that",
    "these",
    "those",
    "am",
    "is",
    "are",
    "was",
    "were",
    "be",
    "been",
    "being",
    "have",
    "has",
    "had",
    "having",
    "do",
    "does",
    "did",
    "doing",
    "a",
    "an",
    "the",
    "and",
    "but",
    "if",
    "or",
    "because",
    "as",
    "until",
    "while",
    "of",
    "at",
    "by",
    "for",
    "with",
    "about",
    "against",
    "between",
    "into",
    "through",
    "during",
    "before",
    "after",
    "above",
    "below",
    "to",
    "from",
    "up",
    "down",
    "in",
    "out",
    "on",
    "off",
    "over",
    "under",
    "again",
    "further",
    "then",
    "once",
    "here",
    "there",
    "when",
    "where",
    "why",
    "how",
    "all",
    "any",
    "both",
    "each",
    "few",
    "more",
    "most",
    "other",
    "some",
    "such",
    "no",
    "nor",
    "not",
    "only",
    "own",
    "same",
    "so",
    "than",
    "too",
    "very",
    "s",
    "t",
    "can",
    "will",
    "just",
    "don",
    "should",
    "now",
]
"""The list of stop words taken from NLTK."""

In [3]:
import csv
from io import BytesIO, TextIOWrapper

In [4]:
with open(
    "/Users/tobiaochsner/Documents/Bayesian Total-Evidence Dating Reveals the Recent Crown Radiation of Penguins-phylodata/yule-n10-52.log",
    "rb",
) as fh:
    file = BytesIO(fh.read())

In [5]:
wrapper = TextIOWrapper(file)
tsv_file = csv.DictReader(wrapper, delimiter="\t")

In [65]:
lines = list(tsv_file)
num_rows = len(lines)

In [66]:
preview_lines = lines[:5]

In [67]:
preview_file = BytesIO()
preview_wrapper = TextIOWrapper(preview_file, encoding="utf-8", newline="")

In [68]:
preview_writer = csv.DictWriter(
    preview_wrapper, fieldnames=preview_lines[0].keys(), delimiter="\t"
)

In [69]:
preview_writer.writeheader()
preview_writer.writerows(preview_lines)

In [70]:
preview_wrapper.detach()

<_io.BytesIO at 0x10c508860>

In [61]:
preview_file.getbuffer().nbytes

1327

In [1]:
import csv
from io import BytesIO, TextIOWrapper

from phylodata.process.utils.bytesio_utils import get_nexus_from_bytesio

In [2]:
with open(
    "/Users/tobiaochsner/Documents/Bayesian Total-Evidence Dating Reveals the Recent Crown Radiation of Penguins-phylodata/yule-n10-9.trees",
    "rb",
) as fh:
    file = BytesIO(fh.read())

In [3]:
nexus = get_nexus_from_bytesio(file)

In [4]:
from commonnexus.blocks.trees import Trees

preview_trees_commands = []
num_previewed_trees = 0

for command in nexus.TREES:
    ...

In [5]:
for command in nexus.TREES:
    # test if it is a TREE command
    if command.name.lower() == "tree":
        if num_previewed_trees < 10:
            preview_trees_commands.append(command)
            num_previewed_trees += 1
    else:
        # this is not a tree command
        # we keep it
        preview_trees_commands.append(command)

In [6]:
nexus.replace_block(nexus.TREES, Trees(nexus, preview_trees_commands))

In [7]:
preview_file = BytesIO()
preview_file.write(str(nexus).encode("utf-8"))

4622