In [2]:
import glob
import pathlib

import celltypist
import matplotlib as mpl
import numpy as np
import pandas as pd
import scanpy as sc
import seaborn as sns
from celltypist import models

%matplotlib inline
from matplotlib import pyplot as plt

params = {
    "font.size": 12,
    "axes.titlesize": 12,
    "axes.labelsize": 12,
    "legend.fontsize": 12,
    "xtick.labelsize": 8,
    "ytick.labelsize": 10,
    "font.family": "Arial",
    "pdf.fonttype": 42,
    "ps.fonttype": 42,
    "figure.dpi": 100,
}

tissue = "LN"
mpl.rcParams.update(params)
sns.set_style("ticks")
sns.set_context(context="paper")
savefig_args = {
    "dpi": 300,
    "bbox_inches": "tight",
    "pad_inches": 0,
    "transparent": True,
}
mpl.rc("savefig", dpi=300)
output_dir = "figures/QCandAnnotation/{}".format(tissue)
pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True)
output_suffix = ""
output_formats = [".png", ".svg"]
sc.settings.figdir = output_dir
sc.set_figure_params(
    format="pdf",
    transparent=True,
)


def save_figure(
    fig,
    name,
    output_dir=output_dir,
    output_suffix=output_suffix,
    output_formats=output_formats,
    savefig_args=savefig_args,
):
    for output_format in output_formats:
        fig.savefig(
            output_dir + "/" + name + output_suffix + output_format, **savefig_args
        )
    return None


pd.set_option("display.max_rows", 50)
pd.set_option("display.max_columns", 20)
pd.set_option("display.width", 100)
%load_ext autoreload
%autoreload 2

In [2]:
%run ../_helper.py

In [3]:
def merge_vdj(obs_df, vdj_df):
    vdj_df = vdj_df.reset_index()
    # prepare merge
    vdj_df["cb_suid"] = vdj_df["cb"] + "_" + vdj_df["sample_uid"]
    # remove duplicates
    vdj_df = vdj_df[~vdj_df["cb_suid"].duplicated()]
    obs_df["cb"] = pd.Series(obs_df.index).str.split("-", expand=True)[0].values
    obs_df["cb_suid"] = (
        obs_df["cb"].astype(str) + "_" + obs_df["sample_uid"].astype(str)
    )
    obs_df.set_index("cb_suid", inplace=True)
    vdj_df.set_index("cb_suid", inplace=True)

    # merge
    obs_df = pd.merge(
        obs_df,
        vdj_df,
        left_index=True,
        right_index=True,
        how="left",
        suffixes=[None, "_vdj"],
    )
    return obs_df

In [4]:
vdj_df = pd.read_table(
    "/home/michaelswift/repos/shared_data/tbd_share/vdj_integrated_with_flags.tsv.gz",
    index_col=0,
)

  vdj_df = pd.read_table(


In [5]:
obs_df = pd.read_table("gex_labels.tsv.gz", index_col=0)

In [6]:
obs_df = merge_vdj(obs_df, vdj_df)

In [7]:
import gc
del vdj_df
gc.collect()

18

In [13]:
obs_df = obs_df[~obs_df.c_call.isna()]
# Create Switched Columns
s = {"IGHM":"IGHM|D", "IGHD":"IGHM|D"}
obs_df['switched'] = obs_df.c_call.map(lambda x: s.get(x, 'switched'))

In [39]:
obs_df = obs_df[obs_df.probable_hq_single_b_cell == True]

In [10]:
import matplotlib.patches as patches
import packcircles as pc


radii =  list(np.ones(1000) * 5) #[28,12,51,26,10,16,24,25,59,11,29,40,16,11,10,26,39,16,48,36,28]
circles = pc.pack(radii)
df = pd.DataFrame(list(circles))

df.columns = ['x', 'y', 'radius']

df.loc[:100,'color'] = 'k'
df.loc[100:,'color'] = 'khaki'

fig, ax = plt.subplots(figsize=(6,6))

for idx, row in df.iterrows():
    patch = patches.CirclePolygon(
        (row['x'],row['y']),
        row['radius'], resolution=6,
        color=row['color'],
        alpha=1
    )
    ax.add_patch(patch)
    
fig.set_figheight(15)
fig.set_figwidth(15)
ax.set(xlim=(-150, 150), ylim=(-170, 170))
plt.axis('off')

# Attempted Port of Honey Comb Plots to Python

In [3]:
# list of clonotypes / lineages
exacts = []
# list of identical vdjs
exact_clonotypes = []

In [4]:
# The purpose of this file is the function plot_clonotypes.  It plots clonotypes as partial
# hexagonal closest packings.  This is visually kind of satisfying, but also a bit weird looking.
# In some cases, by eye, you can see rounder forms that could be created by relocating some of
# the cells.

import io_utils
import enclone_core
import vdj_ann
import ansi_escape
import string_utils
import vector_utils
from assign_cell_color import VAR_HIGH, VAR_LOW
from cat_var import setup_cat_var
from circles_to_svg import circles_to_svg
from colors import *
from convert_svg_to_png import convert_svg_to_png
from group_colors import make_group_colors
from legend import add_legend_for_color_by_variable
from pack_circles import pack_circles
from plot_utils import build_clusters
from polygon import enclosing_polygon
from string_width import arial_width
from get_svg_height import get_svg_height
from set_svg_height import set_svg_height
from set_svg_width import set_svg_width
from substitute_enclone_color import substitute_enclone_color
from BOUNDARY import BOUNDARY
from cell_color import CellColor
from defs import EncloneControl, ExactClonotype, PlotOpt, POUT_SEP

import collections
import time

def plot_clonotypes(
    ctl: EncloneControl,
    plot_opt: PlotOpt, # overrides ctl
    refdata: vdj_ann.refx.RefData,
    # exacts: One entry for each clonotype.
    exacts: List[List[int]],
    exact_clonotypes: List[ExactClonotype],
    out_datas: List[List[collections.defaultdict[str, str]]],
    # groups: There is one entry for each group of clonotypes.  The first entries of the inner
    # vectors indexes into exacts, and the second entry (String) is not used here.
    groups: List[List[Tuple[int, str]]],
    svg: io.StringIO,
) -> None:
    t = time.time()
    if plot_opt.plot_file == '':
        return

    # Give up if no clonotypes.
    if len(exacts) == 0:
        raise Exception("\nThere are no clonotypes to plot, giving up.\n")
    
    need_default_colors = False
    if plot_opt.cell_color.color_by_dataset:
        need_default_colors = True
    # generates constant region names? 
    const_names = []
    for id in refdata.cs:
        if refdata.rtype[id] == 0:
            const_names.append(refdata.name[id])
    const_names = list(set(const_names))

    if len(plot_opt.plot_by_isotype_color) != 0:
        if len(plot_opt.plot_by_isotype_color) < len(const_names) + 1:
            raise Exception(f"\nUsing the PLOT_BY_ISOTYPE_COLOR argument, you specified {len(plot_opt.plot_by_isotype_color)} colors, but there are {len(const_names)} constant region\nnames, and one more color is needed for the \"undetermined\" case.  Please add more colors.\n")
    elif plot_opt.plot_by_isotype and len(const_names) > 12:
        raise Exception("\nCurrently PLOT_BY_ISOTYPE only works if there are at most 12 constant region names.  If this is a problem, please let us know and we will generalize it.\n")            
            
    # Get origins.
    origins = []
    for i in range(len(exacts)):
        for j in range(len(exacts[i])):
            ex = exact_clonotypes[exacts[i][j]]
            for k in range(len(ex.clones)):
                if ex.clones[k][0].origin_index is not None:
                    s = ctl.origin_info.origin_list[ex.clones[k][0].origin_index]
                    origins.append(s)
    origins = sorted(set(origins))

    # Determine if we are coloring cells by categorical variable value, and if so, assign colors.
    by_cat_var = False
    barcode_to_cat_var_color = {}
    cat_var_labels = []
    setup_cat_var(plot_opt, exacts, exact_clonotypes, out_datas, by_cat_var, barcode_to_cat_var_color, cat_var_labels)

    # Build one cluster for each clonotype.
    clusters = build_clusters(ctl, plot_opt, refdata, exacts, exact_clonotypes, out_datas, const_names, by_cat_var, barcode_to_cat_var_color, cat_var_labels)
    radii = [cluster.radius for cluster in clusters]

    # Set group specification, if CLONOTYPE_GROUP_NAMES was specified.
    # Note that CLONOTYPE_GROUP_NAMES is probably broken now.
    group_id = [0] * len(radii)
    group_color = [""]
    group_name = [""]
    if ctl.gen_opt.clonotype_group_names is not None:
        with open(ctl.gen_opt.clonotype_group_names) as f:
            first = True
            group_id_field = 0
            new_group_name_field = 0
            new_group_names = [None] * len(radii)
            for line in f:
                s = line.strip()
                if s.startswith('\ufeff'):
                    s = s[1:] # remove BOM
                fields = s.split(',')
                if first:
                    group_id_field = fields.index('group_id')
                    new_group_name_field = fields.index('new_group_name')
                    first = False
                else:
                    group_id = int(fields[group_id_field]) - 1
                    if not (1 <= group_id <= len(radii)):
                        raise ValueError(f"The group_id {group_id} in your CLONOTYPE_GROUP_NAMES file is out of range.")
                    new_group_name = fields[new_group_name_field]
                    if new_group_names[group_id] is None:
                        new_group_names[group_id] = new_group_name
                    elif new_group_names[group_id] != new_group_name:
                        raise ValueError(f"The group_id {group_id} in your CLONOTYPE_GROUP_NAMES file is assigned the different new_group_names {new_group_names[group_id]} and {new_group_name}.")
                        

    # Reverse sort by total number of cells associated to a name. This defines group names.
    nx = []
    for i in range(len(new_group_names)):
        if new_group_names[i] is not None:
            nx.append((new_group_names[i], len(clusters[i].coords)))
    nx.sort()
    ny = []
    i = 0
    while i < len(nx):
        j = next_diff1_2(nx, i)
        n = sum(nx[k][1] for k in range(i, j))
        ny.append((n, nx[i][0]))
        i = j
    reverse_sort(ny)
    group_name = [ny[i][1] for i in range(len(ny))]

    # Define group ids and colors.
    group_id = []
    for i in range(len(new_group_names)):
        if new_group_names[i] is not None:
            p = position(group_name, new_group_names[i])
            group_id.append(p)
    group_color = make_group_colors(len(group_name))

    ngroups = len(group_color)  # THESE ARE SHADING GROUPS!

    ctl.perf_stats(t, "in preamble to plotting clonotypes")

    # Traverse the shading groups. In the default case, there is just one!!!!!!!!!!!!!!!!!!!!!!!!

    t = Instant.now()
    using_shading = ngroups > 1 or not group_color[0]

    blacklist = []
    shades = []
    shade_colors = []
    shade_enclosures = []
    centers = [(0.0, 0.0) for i in range(len(radii))]
    for g in range(ngroups):
        # Gather the group. In the default case, ids = 0..(number of clusters).
        ids = []
        radiix = []
        for i in range(len(group_id)):
            if group_id[i] == g:
                ids.append(i)
                radiix.append(radii[i])

        # Find circle centers.
        centersx = []
        xshift = []
        if plot_opt.split_plot_by_origin or plot_opt.split_plot_by_dataset:
            passes = ctl.origin_info.origin_list_len() if plot_opt.split_plot_by_origin else ctl.origin_info.n()
            xstart = 0.0
            for pass_ in range(passes):
                radiiy = []
                indices = []
                for i in range(len(radiix)):
                    li = clusters[i].barcodes[0][0]
                    p = bin_position(ctl.origin_info.origin_list, ctl.origin_info.origin_id[li]) if plot_opt.split_plot_by_origin else li
                    if pass_ != p:
                        continue
                    radiiy.append(radiix[i])
                    indices.append(i)
                centersy = pack_circles(radiiy, blacklist, plot_opt.plot_quad)
                left = max(-centersy[j][0] + radiiy[j] for j in range(len(centersy)))
                xstart += left
                for j in range(len(centersy)):
                    centersx.append((centersy[j][0] + xstart, centersy[j][1]))
                    xshift.append(xstart)
                right = max(centersy[j][0] + radiiy[j] for j in range(len(centersy)))
                HSEP = 10.0
                xstart += right + HSEP
        else:
            centersx = pack_circles(radiix, blacklist, plot_opt.plot_quad)
        for i in range(len(ids)):
            centers[ids[i]] = centersx[i]

 

    # Find polygon around the group.

    if using_shading:
        z = [(radiix[i], centersx[i][0], centersx[i][1]) for i in range(len(centersx))]
        d = 5.0 # distance of polygon from the circles
        n = 35 # number of vertices on polygon
        p = enclosing_polygon(z, d, n)
        shades.append(p.copy())
        shade_colors.append(group_color[g].copy())

        # Build an enlarged polygon that includes the smoothed polygonal curve.

        POLYGON_ENLARGEMENT = 22.5 # must be larger than BOUNDING_CURVE_BOUND
        p.enlarge(POLYGON_ENLARGEMENT)
        p.precompute()
        shade_enclosures.append(p.copy())
        blacklist.append(p)

    # Reorganize constant-color clusters so that like-colored clusters are proximate,
    # We got this idea from Ganesh Phad, who showed us a picture!  The primary effect is on
    # single-cell clonotypes.
    #
    # We do the split_plot_by_origin/dataset case second.  It is a more complicated version of
    # the same algorithm.  The second part definitely does not work with grouping.

    if not plot_opt.split_plot_by_origin and not plot_opt.split_plot_by_dataset:
        ccc = [] # (cluster size, color, index)
        clusters2 = clusters.copy()
        for i, id in enumerate(ids):
            c = clusters[id].colors.copy()
            c = sorted(list(set(c)))
            if len(c) == 1:
                ccc.append((len(clusters[id].colors), c[0], i))
        ccc.sort()
        i = 0
        while i < len(ccc):
            # On a given iteration of the while loop, we process all the constant-color
            # clusters that have the same size.  First we do the clusters that contains
            # just one cell, and so forth.
            j = next_diff1_3(ccc, i)
            j = j if j is not None else len(ccc)
            angle = [(centersx[ccc[k][2]][1].atan2(centersx[ccc[k][2]][0]), k-i) for k in range(i, j)]
            angle.sort()
            for k in range(i, j):
                new_id = angle[k-i][1]
                id = ccc[k][2]
                clusters2[ids[new_id]].colors = clusters[id].colors.copy()
                clusters2[ids[new_id]].clonotype_index = clusters[id].clonotype_index
                clusters2[ids[new_id]].barcodes = clusters[id].barcodes.copy()
            i = j
        clusters = clusters2
    else:
        clusters2 = clusters.copy()
        centersp = centersx.copy()
        for i in range(len(centersp)):
            centersp[i][0] -= xshift[i]
        passes = len(ctl.origin_info.origin_list) if plot_opt.split_plot_by_origin else ctl.origin_info


ModuleNotFoundError: No module named 'io_utils'