> [!IMPORTANT]  
> This is jsut a collection of random code from anndata, dask.array, and xarray to make some initial sketches for the `AnnData` display object.


In [1]:
import anndata
import numpy as np
import pooch
import scanpy as sc

In [2]:
datapath = pooch.retrieve(
    url="https://figshare.com/ndownloader/files/40067737",
    known_hash="md5:b80deb0997f96b45d06f19c694e46243",
    path="../data",
    fname="scverse-getting-started-anndata-pbmc3k_processed.h5ad",
)

In [3]:
adata = anndata.read_h5ad(datapath)

In [4]:
adata

AnnData object with n_obs × n_vars = 2638 × 11505
    obs: 'n_genes', 'percent_mito', 'n_counts', 'louvain_cell_types'
    var: 'gene_names', 'n_cells', 'gene_ids'
    uns: 'louvain', 'louvain_colors', 'pca'
    obsm: 'X_pca', 'X_tsne', 'X_umap'
    layers: 'raw'
    obsp: 'distances_all'

In [5]:
adata.obs

Unnamed: 0_level_0,n_genes,percent_mito,n_counts,louvain_cell_types
cell_barcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AAACATACAACCAC-1,781,0.030178,2419.0,CD4 T cells
AAACATTGAGCTAC-1,1352,0.037936,4903.0,B cells
AAACATTGATCAGC-1,1131,0.008897,3147.0,CD4 T cells
AAACCGTGCTTCCG-1,960,0.017431,2639.0,CD14+ Monocytes
AAACCGTGTATGCG-1,522,0.012245,980.0,NK cells
...,...,...,...,...
TTTCGAACTCTCAT-1,1155,0.021104,3459.0,CD14+ Monocytes
TTTCTACTGAGGCA-1,1227,0.009294,3443.0,B cells
TTTCTACTTCCTCG-1,622,0.021971,1684.0,B cells
TTTGCATGAGAGGC-1,454,0.020548,1022.0,B cells


In [6]:
import jinja2
from IPython.display import HTML

def generate_svg(n_obs, n_vars, max_dimension=300, fill_color="lightgray", text_content="AnnData Object"):
    aspect_ratio = n_vars / n_obs

    if aspect_ratio > 1:
        scaled_width = max_dimension
        scaled_height = max_dimension / aspect_ratio
    else:
        scaled_height = max_dimension
        scaled_width = max_dimension * aspect_ratio

    svg_template = jinja2.Template("""
    <svg
        xmlns="http://www.w3.org/2000/svg"
        xmlns:xlink="http://www.w3.org/1999/xlink"
        width="{{ width }}"
        height="{{ height }}"
    >
        <rect x="0" y="0" width="{{ width }}" height="{{ height }}" fill="{{ fill_color }}"/>
        <text
            x="{{ width / 2 }}"
            y="{{ height / 2 }}"
            dominant-baseline="middle"
            text-anchor="middle"
            fill="white"
        >
            {{ text }}
        </text>
    </svg>
    """)

    svg = svg_template.render(width=scaled_width, height=scaled_height, fill_color=fill_color, text=text_content)

    return svg

svg = generate_svg(
    n_obs=adata.n_obs,
    n_vars=adata.n_vars,
    max_dimension=300,
    fill_color="lightgreen",
    text_content="X"
)
display(HTML(svg))

In [7]:
import numpy as np
import pandas as pd
import xarray as xr

import dask.array as da

data = np.random.randn(2, 10_000)
ddata = da.array(data)

In [8]:
data = xr.DataArray(ddata, dims=("x", "y"), coords={"x": [10, 20]})
data

Unnamed: 0,Array,Chunk
Bytes,156.25 kiB,156.25 kiB
Shape,"(2, 10000)","(2, 10000)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 156.25 kiB 156.25 kiB Shape (2, 10000) (2, 10000) Dask graph 1 chunks in 1 graph layer Data type float64 numpy.ndarray",10000  2,

Unnamed: 0,Array,Chunk
Bytes,156.25 kiB,156.25 kiB
Shape,"(2, 10000)","(2, 10000)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [9]:
_svg = """
<svg style="position: absolute; width: 0; height: 0; overflow: hidden">
    <path d="M16 0c-8.837 0-16 2.239-16 5v4c0 2.761 7.163 5 16 5s16-2.239 16-5v-4c0-2.761-7.163-5-16-5z"></path>
    <path d="M16 17c-8.837 0-16-2.239-16-5v6c0 2.761 7.163 5 16 5s16-2.239 16-5v-6c0 2.761-7.163 5-16 5z"></path>
    <path d="M16 26c-8.837 0-16-2.239-16-5v6c0 2.761 7.163 5 16 5s16-2.239 16-5v-6c0 2.761-7.163 5-16 5z"></path>
</svg>
"""

display(HTML(_svg))

In [10]:
adata_small = adata[:5, ["LYZ", "FOS", "MALAT1"]]
adata_small

View of AnnData object with n_obs × n_vars = 5 × 3
    obs: 'n_genes', 'percent_mito', 'n_counts', 'louvain_cell_types'
    var: 'gene_names', 'n_cells', 'gene_ids'
    uns: 'louvain', 'louvain_colors', 'pca'
    obsm: 'X_pca', 'X_tsne', 'X_umap'
    layers: 'raw'
    obsp: 'distances_all'

In [11]:
print(ddata)

dask.array<array, shape=(2, 10000), dtype=float64, chunksize=(2, 10000), chunktype=numpy.ndarray>


In [12]:
ddata.__repr__()

'dask.array<array, shape=(2, 10000), dtype=float64, chunksize=(2, 10000), chunktype=numpy.ndarray>'

In [13]:
ddata._meta

array([], shape=(0, 0), dtype=float64)

In [14]:
# mapping of tuple[modulename, classname] to repr
_KNOWN_TYPE_REPRS = {
    ("numpy", "ndarray"): "np.ndarray",
    ("sparse._coo.core", "COO"): "sparse.COO",
}

def inline_dask_repr(array):
    """Similar to dask.array.DataArray.__repr__, but without
    redundant information that's already printed by the repr
    function of the xarray wrapper.
    """
    chunksize = tuple(c[0] for c in array.chunks)

    if hasattr(array, "_meta"):
        meta = array._meta
        identifier = (type(meta).__module__, type(meta).__name__)
        meta_repr = _KNOWN_TYPE_REPRS.get(identifier, ".".join(identifier))
        meta_string = f", meta={meta_repr}"
    else:
        meta_string = ""

    return f"dask.array<chunksize={chunksize}{meta_string}>"


def inline_sparse_repr(array):
    """Similar to sparse.COO.__repr__, but without the redundant shape/dtype."""
    return "<{}: nnz={:d}, fill_value={!s}>".format(
        type(array).__name__, array.nnz, getattr(array, "fill_value", None)
    )

inline_dask_repr(ddata)

'dask.array<chunksize=(2, 10000), meta=np.ndarray>'

In [15]:
inline_sparse_repr(adata.X)

'<csr_matrix: nnz=2076576, fill_value=None>'

In [16]:
def svg_lines(x1, y1, x2, y2, max_n=20):
    """Convert points into lines of text for an SVG plot

    Examples
    --------
    >>> svg_lines([0, 1], [0, 0], [10, 11], [1, 1])  # doctest: +NORMALIZE_WHITESPACE
    ['  <line x1="0" y1="0" x2="10" y2="1" style="stroke-width:2" />',
     '  <line x1="1" y1="0" x2="11" y2="1" style="stroke-width:2" />']
    """
    n = len(x1)

    if n > max_n:
        indices = np.linspace(0, n - 1, max_n, dtype="int")
    else:
        indices = range(n)

    lines = [
        '  <line x1="%d" y1="%d" x2="%d" y2="%d" />' % (x1[i], y1[i], x2[i], y2[i])
        for i in indices
    ]

    lines[0] = lines[0].replace(" /", ' style="stroke-width:2" /')
    lines[-1] = lines[-1].replace(" /", ' style="stroke-width:2" /')
    return lines


def svg_grid(x, y, offset=(0, 0), skew=(0, 0), size=200):
    """Create lines of SVG text that show a grid

    Parameters
    ----------
    x: numpy.ndarray
    y: numpy.ndarray
    offset: tuple
        translational displacement of the grid in SVG coordinates
    skew: tuple
    """
    # Horizontal lines
    x1 = np.zeros_like(y) + offset[0]
    y1 = y + offset[1]
    x2 = np.full_like(y, x[-1]) + offset[0]
    y2 = y + offset[1]

    if skew[0]:
        y2 += x.max() * skew[0]
    if skew[1]:
        x1 += skew[1] * y
        x2 += skew[1] * y

    min_x = min(x1.min(), x2.min())
    min_y = min(y1.min(), y2.min())
    max_x = max(x1.max(), x2.max())
    max_y = max(y1.max(), y2.max())
    max_n = size // 6

    h_lines = ["", "  <!-- Horizontal lines -->"] + svg_lines(x1, y1, x2, y2, max_n)

    # Vertical lines
    x1 = x + offset[0]
    y1 = np.zeros_like(x) + offset[1]
    x2 = x + offset[0]
    y2 = np.full_like(x, y[-1]) + offset[1]

    if skew[0]:
        y1 += skew[0] * x
        y2 += skew[0] * x
    if skew[1]:
        x2 += skew[1] * y.max()

    v_lines = ["", "  <!-- Vertical lines -->"] + svg_lines(x1, y1, x2, y2, max_n)

    # lightgreen
    color = "4fba6f" if len(x) < max_n and len(y) < max_n else "16a34a"
    # orange
    # color = "ECB172" if len(x) < max_n and len(y) < max_n else "8B4903"
    corners = f"{x1[0]},{y1[0]} {x1[-1]},{y1[-1]} {x2[-1]},{y2[-1]} {x2[0]},{y2[0]}"
    rect = [
        "",
        "  <!-- Colored Rectangle -->",
        f'  <polygon points="{corners}" style="fill:#{color}A0;stroke-width:0"/>',
    ]

    return h_lines + v_lines + rect, (min_x, max_x, min_y, max_y)

def draw_sizes(shape, size=200):
    """Get size in pixels for all dimensions"""
    mx = max(shape)
    ratios = [mx / max(0.1, d) for d in shape]
    ratios = [ratio_response(r) for r in ratios]
    return tuple(size / r for r in ratios)

In [17]:
import math

def svg_2d(chunks, offset=(0, 0), skew=(0, 0), size=200, sizes=None):
    shape = tuple(map(sum, chunks))
    sizes = sizes or draw_sizes(shape, size=size)
    y, x = grid_points(chunks, sizes)

    lines, (min_x, max_x, min_y, max_y) = svg_grid(
        x, y, offset=offset, skew=skew, size=size
    )

    header = (
        '<svg width="%d" height="%d" style="stroke:rgb(0,0,0);stroke-width:1" >\n'
        % (max_x + 50, max_y + 50)
    )
    footer = "\n</svg>"

    if shape[0] >= 100:
        rotate = -90
    else:
        rotate = 0

    text = [
        "",
        "  <!-- Text -->",
        f'  <text x="{max_x / 2}" y="{max_y + 20}" {text_style} >{shape[1]:,} vars</text>',
        f'  <text x="{max_x + 20}" y="{max_y / 2}" {text_style} transform="rotate({rotate},{max_x + 20},{max_y / 2})">{shape[0]:,} obs</text>',
    ]

    return header + "\n".join(lines + text) + footer


def ratio_response(x):
    """How we display actual size ratios

    Common ratios in sizes span several orders of magnitude,
    which is hard for us to perceive.

    We keep ratios in the 1-3 range accurate, and then apply a logarithm to
    values up until about 100 or so, at which point we stop scaling.
    """
    if x < math.e:
        return x
    elif x <= 100:
        return math.log(x + 12.4)  # f(e) == e
    else:
        return math.log(100 + 12.4)

def grid_points(chunks, sizes):
    cumchunks = [np.cumsum((0,) + c) for c in chunks]
    points = [x * size / x[-1] for x, size in zip(cumchunks, sizes)]
    return points

text_style = 'font-size="1.0rem" font-weight="100" text-anchor="middle"'

In [18]:
HTML(svg_2d(
    tuple((dim, ) for dim in adata.X.shape)
))

In [19]:
ds = xr.tutorial.load_dataset("air_temperature")
ds.air