In [None]:
# NOTE: this notebook is executed and rendered to markdown,
# which displays as the `REAMDE.md` on the corresponding output branch.
# Input cells are hidden from the output.

In [None]:
import os
import subprocess
import pandas as pd
from IPython.display import display, Markdown
from datetime import datetime

from ensembl_genes import ensembl_genes

In [None]:
# parameters cell
species = "human"
release = "104"

In [None]:
ensg = ensembl_genes.Ensembl_Gene_Catalog_Writer(species=species, release=release)
display(Markdown(f"# {ensg.output_directory.as_posix()}"))

In [None]:
commit = subprocess.check_output(["git", "rev-parse", "HEAD"], text=True)
action_url = "local"
if os.environ.get("CI", "false").lower() == "true":
    repo_slug = os.environ["GITHUB_REPOSITORY"]
    run_id = os.environ["GITHUB_RUN_ID"]
    action_url = f"<https://github.com/{repo_slug}/actions/runs/{run_id}>"

In [None]:
display(Markdown(f'''\
- common name: {ensg.species.common_name}
- species: {ensg.species.name}
- database: `{ensg.database}`
- release: {release}
- assembly: {ensg.species.assembly}
- export date: {datetime.utcnow().isoformat()}
- source commit: `{commit}`
- created in action: {action_url}
'''))

## Table heads

The first 10 rows of each exported table is shown below.

In [None]:
for export in ensg.exports:
    path = ensg.output_directory.joinpath(f"{export.name}.snappy.parquet")
    df = pd.read_parquet(path).convert_dtypes()
    df_markdown = df.head(10).to_markdown(index=False)
    display(Markdown(f"### {export.name}\n\n{export.description}\nContains {len(df):,} rows.\n\n{df_markdown}\n"))