In [None]:
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pathlib

from ALLCools.plot import *
from wmb import *

In [None]:
dataset = 'AIBS_TENX'

In [None]:
mc_annot = cemba.get_mc_annot()
if dataset == 'AIBS_SMART':
    rna_annot = aibs.get_smart_annot()
elif dataset == 'AIBS_TENX':
    rna_annot = aibs.get_tenx_annot()
else:
    rna_annot = broad.get_tenx_annot()

In [None]:
mc_l4_to_inte_group = pd.read_csv('mc_l4_to_integration_group.csv', index_col=0).squeeze()
mc_l4_to_inte_l1 = mc_l4_to_inte_group.apply(lambda i: '_'.join(str(i).split('_')[:1]))
mc_l4_to_inte_l2 = mc_l4_to_inte_group.apply(lambda i: '_'.join(str(i).split('_')[:2]))
mc_l4_to_inte_l4 = mc_l4_to_inte_group.apply(lambda i: '_'.join(str(i).split('_')[:3]))

rna_l4_to_inte_group = pd.read_csv('rna_l4_to_integration_group.csv', index_col=0).squeeze()
rna_l4_to_inte_l1 = rna_l4_to_inte_group.apply(lambda i: '_'.join(str(i).split('_')[:1]))
rna_l4_to_inte_l2 = rna_l4_to_inte_group.apply(lambda i: '_'.join(str(i).split('_')[:2]))
rna_l4_to_inte_l4 = rna_l4_to_inte_group.apply(lambda i: '_'.join(str(i).split('_')[:3]))

## Read Confusion Matrix

- Load L1 to L4 confusion matrix
- Turn all the confusion matrix into mC and RNA L4 resolution

### L1 Confusion Matrix

In [None]:
l1_confusion_matrix = pd.read_hdf(f'L1_confusion_matrx.L4_clusters.hdf')

## L2 Confuion Matrix

In [None]:
l2_confusion_matrix = pd.read_hdf('L2_confusion_matrx.L4_clusters.hdf')

## L4 Confusion Matrix
- read all L4 overlap score matrix
- replace value of l2_confusion_matrix with L4 values

In [None]:
l4_confusion_matrix = pd.read_hdf('L4_confusion_matrx.L4_clusters.hdf')

## Plot Confusion matrix 

- Three confusion matrix showing the process from L1 to L4 integration

In [None]:
palette = 'magma_r'

cmap = plt.get_cmap(palette).copy()
norm = mpl.colors.Normalize(vmin=0, vmax=1)

scalar_mappable = mpl.cm.ScalarMappable(norm=norm, cmap=cmap)

In [None]:
fig, ax = plt.subplots(figsize=(0.3, 2), dpi=300)
fig.colorbar(scalar_mappable,
             cax=ax, label='Overlap Score')

### L1

In [None]:
mat = l1_confusion_matrix.loc[rna_l4_to_inte_l1.sort_values().index, mc_l4_to_inte_l1.sort_values().index]

In [None]:
mat

In [None]:
# tidy data
plot_data = mat.unstack().reset_index()
plot_data.columns = ['mC', 'RNA', 'OS']
plot_data = plot_data[plot_data['OS'] > 0].copy()

y_map = {c: i for i, c in enumerate(mat.index)}
plot_data['y'] = plot_data['RNA'].map(y_map)

x_map = {c: i for i, c in enumerate(mat.columns)}
plot_data['x'] = plot_data['mC'].map(x_map)

In [None]:
fig, ax = plt.subplots(figsize=(5, 5), dpi=300)
sns.scatterplot(data=plot_data.sample(100000),
                x='x',
                y='y',
                linewidth=0,
                size='OS',
                size_norm=(0, 1),
                sizes=(0, 1),
                hue='OS',
                hue_norm=(0, 1),
                palette=palette,
                cmap=cmap,
                legend=None)
ax.set(xticks=[],
       xlabel='mC Clusters',
       yticks=[],
       ylabel='RNA Clusters')

ax.set_title(f'L1 Integration Overlap Scores\n'
             f'{rna_l4_to_inte_l1.unique().size} Integration Groups',
             fontsize=10)
fig.savefig("'L1_Integration.png")
pass

In [None]:
ax.get_xlim()

### L2

In [None]:
columns_to_add = set(l1_confusion_matrix.columns) ^ set(l2_confusion_matrix.columns )
for i in columns_to_add:
    l2_confusion_matrix[i] = l1_confusion_matrix[i]

In [None]:
columns_to_add

In [None]:
mat = l2_confusion_matrix.loc[rna_l4_to_inte_l2.sort_values().index, mc_l4_to_inte_l2.sort_values().index]

In [None]:
mat

In [None]:
# tidy data
plot_data = mat.unstack().reset_index()
plot_data.columns = ['mC', 'RNA', 'OS']
plot_data = plot_data[plot_data['OS'] > 0].copy()

y_map = {c: i for i, c in enumerate(mat.index)}
plot_data['y'] = plot_data['RNA'].map(y_map)

x_map = {c: i for i, c in enumerate(mat.columns)}
plot_data['x'] = plot_data['mC'].map(x_map)

In [None]:
fig, ax = plt.subplots(figsize=(5, 5), dpi=300)
sns.scatterplot(data=plot_data.sample(100000),
                x='x',
                y='y',
                linewidth=0,
                size='OS',
                size_norm=(0, 1),
                sizes=(0, 1),
                hue='OS',
                hue_norm=(0, 1),
                palette=palette,
                cmap=cmap,
                legend=None)
ax.set(xticks=[],
       xlabel='mC Clusters',
       yticks=[],
       ylabel='RNA Clusters')

ax.set_title(f'L2 Integration Overlap Scores\n'
             f'{rna_l4_to_inte_l2.unique().size} Integration Groups',
             fontsize=10)

fig.savefig("L2_Integration.png")

pass

## L4

In [None]:
columns_to_add = set(l1_confusion_matrix.columns) ^ set(l4_confusion_matrix.columns )
for i in columns_to_add:
    l4_confusion_matrix[i] = l1_confusion_matrix[i]

In [None]:
mat = l4_confusion_matrix.loc[rna_l4_to_inte_l4.sort_values().index, mc_l4_to_inte_l4.sort_values().index]

In [None]:
# tidy data
plot_data = mat.unstack().reset_index()
plot_data.columns = ['mC', 'RNA', 'OS']
plot_data = plot_data[plot_data['OS'] > 0].copy()

y_map = {c: i for i, c in enumerate(mat.index)}
plot_data['y'] = plot_data['RNA'].map(y_map)

x_map = {c: i for i, c in enumerate(mat.columns)}
plot_data['x'] = plot_data['mC'].map(x_map)

In [None]:
fig, ax = plt.subplots(figsize=(5, 5), dpi=300)
sns.scatterplot(data=plot_data.sample(100000),
                x='x',
                y='y',
                linewidth=0,
                size='OS',
                size_norm=(0, 1),
                sizes=(0, 1),
                hue='OS',
                hue_norm=(0, 1),
                palette=palette,
                cmap=cmap,
                legend=None)
ax.set(xticks=[],
       xlabel='mC Clusters',
       yticks=[],
       ylabel='RNA Clusters')

ax.set_title(f'L4 Integration Overlap Scores\n'
             f'{rna_l4_to_inte_l4.unique().size} Integration Groups',
             fontsize=10)

fig.savefig("'L4_Integration.png")
pass