In [None]:
import pandas as pd
import numpy as np
import os
from os import path
import matplotlib.pyplot as plt
import bokeh
from bokeh import palettes
from bokeh.palettes import Set1
from bokeh.palettes import Set2
from bokeh.palettes import Set3
from bokeh.palettes import Dark2
import matplotlib as mpl

### For mice

In [None]:
geno1 = 'DKD'
geno2 = 'WT'
cell_type = 'glom_podo'
section = 'cortex'
select = ['glom_endo','ren1','glom_podo','CDPC','CDIC','DCT']

In [None]:
# in_path is path to file with umap_1,umap_2 coordinates
in_path = '{geno1}_{geno2}_{cell_type}_{section}_standard_embedding.csv'.format(geno1=geno1,geno2=geno2,cell_type=cell_type,section=section)
standard_embedding = np.array(pd.read_csv(in_path,index_col=0))

In [None]:
# in_path is path to file with cluster labels
in_path = '{geno1}_{geno2}_{cell_type}_{section}_labels.csv'.format(geno1=geno1,geno2=geno2,cell_type=cell_type,section=section)
labels = np.array(pd.read_csv(in_path,index_col=0))

In [None]:
clustered = (labels >= 0)
nonzero_clusters = np.unique(labels[clustered])
n_clusters = len(nonzero_clusters)

#### Plot cluster ids in UMAP-space

In [None]:
out_path = os.path.join(out_dir,'{geno1}_{geno2}_{cell_type}_{section}_UMAP_colored_by_clustid.pdf'.format(geno1=geno1,geno2=geno2,cell_type=cell_type,section=section))
plt.figure(figsize=(5,5))
plt.rcParams["font.family"] = "Arial"
plt.rcParams['font.size'] = 20

colors = ['#1f77b4', '#ff7f0e','mediumseagreen','r','blueviolet','brown']
plt.scatter(standard_embedding[~clustered, 0],
            standard_embedding[~clustered, 1],
            c=(0.5,0.5,0.5),
            s=15,
            alpha=0.5)
for i in nonzero_clusters:
    plt.scatter(standard_embedding[(labels==i), 0],
                standard_embedding[(labels==i), 1],
                c=colors[i],
                ec = 'black',
                linewidth = 0.5,
                s=15);


plt.gca().spines['right'].set_color('none')
plt.gca().spines['top'].set_color('none')
plt.xlabel('UMAP-1')
plt.ylabel('UMAP-2')
plt.rcParams["axes.grid"] = False

if celltype in select:
    colors = ['#1f77b4','#ff7f0e','lightgrey']
    texts = ['cluster 0','cluster 1', 'not clustered']
    outlines = ['black','black','grey']
    patches = [ plt.plot([],[], marker="o", ms=10, mec=outlines[i],ls='none', linewidth=0.6,color=colors[i], 
                    label="{:s}".format(texts[i]) )[0]  for i in range(len(texts)) ]
else:
    colors = ['#1f77b4','#ff7f0e']
    texts = ['cluster 0','cluster 1']
    patches = [ plt.plot([],[], marker="o", ms=10, mec='black',ls='none', linewidth=0.6,color=colors[i], 
                    label="{:s}".format(texts[i]) )[0]  for i in range(len(texts)) ]
plt.legend(handles=patches, loc='center left', framealpha=1,frameon=False,bbox_to_anchor=(1, 0.5),title='Cluster id')

plt.savefig(out_path,dpi=300,bbox_inches='tight')

#### Plot genotypes in UMAP-space

In [None]:
# input_path is path to file with all data points to-be-clustered (aggregated structures or beads) for specified section and cell type
# in specified genotypes that were analyzed
input_path = '{geno1}_{geno2}_{cell_type}_{section}_combined_cts.pkl'.format(geno1=geno1,geno2=geno2,cell_type=cell_type,section=section)
combined_cts = pd.read_pickle(input_path)
geno1_ct = sum(combined_cts['pheno'][clustered] == geno1)
geno2_ct = sum(combined_cts['pheno'][clustered] == geno2)

In [None]:
out_path = os.path.join(out_dir,'{geno1}_{geno2}_{cell_type}_{section}_UMAP_colored_by_pheno.pdf'.format(geno1=geno1,geno2=geno2,cell_type=cell_type,section=section))
clustered = (labels >= 0)
plt.figure(figsize=(5,5))
plt.rcParams["font.family"] = "Arial"
plt.rcParams['font.size'] = 20

cmap = mpl.colors.ListedColormap(['#1f77b4', '#ff7f0e'])
plt.scatter(standard_embedding[~clustered, 0],
            standard_embedding[~clustered, 1],
            c=(0.5,0.5,0.5),
            s=15,
            alpha=0.5)
plt.scatter(standard_embedding[clustered,0][0:geno1_ct],
            standard_embedding[clustered,1][0:geno1_ct],
            c='r',
            ec='black',
            linewidth=0.5,
            s=15);
plt.scatter(standard_embedding[clustered,0][geno1_ct:],
            standard_embedding[clustered,1][geno1_ct:],
            c='b',
            ec='black',
            linewidth=0.5,
            s=15);

plt.gca().spines['right'].set_color('none')
plt.gca().spines['top'].set_color('none')
plt.xlabel('UMAP-1')
plt.ylabel('UMAP-2')
plt.rcParams["axes.grid"] = False

# texts depend on geno1 and geno2
if celltype in select+select2:
    colors = ['b','r','lightgrey']
    #texts = ['WT','UMOD_KI','not clustered'] 
    texts = ['BTBR $\it{wt/wt}$','BTBR $\it{ob/ob}$','not clustered']
    outlines = ['black','black','grey']
    patches = [ plt.plot([],[], marker="o", ms=10, mec=outlines[i],ls='none', linewidth=0.6,color=colors[i], 
                    label="{:s}".format(texts[i]) )[0] for i in range(len(texts)) ]
else:
    colors = ['b','r','lightgrey']
    #texts = ['WT','UMOD_KI']
    outlines = ['black','black']
    patches = [ plt.plot([],[], marker="o", ms=10, mec=outlines[i],ls='none', linewidth=0.6,color=colors[i], 
                    label="{:s}".format(texts[i]) )[0]  for i in range(len(texts)) ]
plt.legend(handles=patches, loc='center left', framealpha=1,frameon=False,bbox_to_anchor=(1, 0.5),title='Genotype')
plt.savefig(out_path,dpi=300,bbox_inches='tight')

#### Plot batches in UMAP-space

In [None]:
# batches is file with batch numbers
batches = pd.read_csv('{geno1}_{geno2}_{cell_type}_{section}_batches.csv'.format(geno1=geno1,geno2=geno2,cell_type=cell_type,section=section),index_col=0)
batches = batches.reset_index()
batches = batches.drop(columns={'index'})

temp = pd.DataFrame(standard_embedding)
temp = temp.rename(columns={0:'x',1:'y'})
temp = temp.reset_index()
temp = temp.drop(columns={'index'})
temp['batch'] = batches
temp['batch'] = temp.batch.astype('str')
temp['batch'] = temp.batch.astype('category')
temp['labels'] = labels

if cell_type in select:
    unclustered = temp[~clustered]
    temp = temp[clustered]

n =len(np.unique(temp['batch']))

t = Set1[n]
colors = {}
for i,puckid in enumerate(np.unique(temp['batch'])):
    colors[puckid] = t[i]

In [None]:
out_path = os.path.join(out_dir,'{geno1}_{geno2}_{cell_type}_{section}_UMAP_colored_by_batch.pdf'.format(geno1=geno1,geno2=geno2,cell_type=cell_type,section=section))
plt.figure(figsize=(5,5))
plt.rcParams["font.family"] = "Arial"
plt.rcParams['font.size'] = 20
if cell_type in select:
    plt.scatter(unclustered.x,unclustered.y,c='grey',alpha=0.5,s=15,ec='black',linewidth=0.5)
plt.scatter(temp.x,temp.y,c=temp['batch'].map(colors),s=15,ec='black',linewidth=0.5)
plt.gca().spines['right'].set_color('none')
plt.gca().spines['top'].set_color('none')
plt.xlabel('UMAP-1')
plt.ylabel('UMAP-2')
plt.rcParams["axes.grid"] = False

colors = t
texts = list(np.unique(temp['batch']))
patches = [ plt.plot([],[], marker="o", ms=10, mec='black',ls='none', linewidth=0.6,color=colors[i], 
                label="{:s}".format(texts[i]) )[0]  for i in range(len(texts)) ]
plt.legend(handles=patches, loc='center left', framealpha=1,frameon=False,bbox_to_anchor=(1, 0.5),title='Batch')


plt.savefig(out_path,dpi=300,bbox_inches='tight')
plt.show()