In [1]:
import pandas as pd
import json

In [2]:
integration_table = pd.read_csv('label_transfer_between_modality/CrossModalityIntegration.tsv', sep='\t')
integration_table.columns = ['CellClass', 'MajorType', 'SubType', 'AIBS_RNA', 'ATAC']

## RNA Cluster level map

In [3]:
rna_data = pd.read_msgpack('/home/hanliu/project/allen/mouse_scrna/raw/Cortex_HPF/Cortex_HPF.74967.cell_tidy_data.msg')

It is recommended to use pyarrow for on-the-wire transmission of pandas objects.
  exec(code_obj, self.user_global_ns, self.user_ns)


In [4]:
rna_map_to_subtype_list = {}

d = rna_data.groupby('CellClass')['SubType'].apply(lambda i: i.unique().tolist()).to_dict()
rna_map_to_subtype_list.update(d)

d = rna_data.groupby('MajorType')['SubType'].apply(lambda i: i.unique().tolist()).to_dict()
rna_map_to_subtype_list.update(d)

d = {i: [i] for i in rna_data['SubType'].unique()}
rna_map_to_subtype_list.update(d)

rna_map_to_subtype_list[''] = []

## mC Cluster to AIBS RNA (HPF & Cortex Only)

- Key name is different methylation level
- Value names is all at subtype level

In [5]:
def get_cluster_map(cluster_col):
    cluster_map = integration_table.groupby(cluster_col).apply(lambda i: ','.join(
        i['AIBS_RNA'].dropna().unique()).split(',')).to_dict()
    
    final_map = {}
    for k, v in cluster_map.items():
        tl = []
        for c in v:
            tl += rna_map_to_subtype_list[c]
        final_map[k] = tl
    
    return final_map

In [6]:
cluster_col = 'CellClass'

cluster_map = get_cluster_map(cluster_col)

with open(f'label_transfer_between_modality/mC-RNA.{cluster_col}.json', 'w') as f:
    json.dump(cluster_map, f)

for k, v in cluster_map.items():
    print(k, len(v))

Exc 142
Inh 87
NonN 15


In [7]:
cluster_col = 'MajorType'

cluster_map = get_cluster_map(cluster_col)

with open(f'label_transfer_between_modality/mC-RNA.{cluster_col}.json', 'w') as f:
    json.dump(cluster_map, f)
for k, v in cluster_map.items():
    print(k, len(v))

ANP 0
ASC 1
CA1 15
CA3 4
CA3-St18 2
CGE-Lamp5 8
CGE-Vip 33
CLA 7
CT-L6 17
Chd7 0
D1L-Fstl4 0
D1L-PAL 0
DG 3
DG-po 1
EC 2
EP 0
Foxp2 0
Gfra1 2
IG-CA2 2
IT-L23 13
IT-L4 4
IT-L5 30
IT-L6 10
L6b 11
LSX-Inh 0
MGC 1
MGE-Pvalb 8
MGE-Sst 33
MSN-D1 0
MSN-D2 0
NP-L6 6
ODC 3
OLF 0
OLF-Exc 0
OPC 3
PAL-Inh 0
PC 1
PT-L5 15
Unc5c 5
VLMC 3
VLMC-Pia 1


In [8]:
cluster_col = 'SubType'

cluster_map = get_cluster_map(cluster_col)

with open(f'label_transfer_between_modality/mC-RNA.{cluster_col}.json', 'w') as f:
    json.dump(cluster_map, f)
for k, v in cluster_map.items():
    print(k, len(v))

ANP anp-dg 0
ANP anp-olf-cnu 0
ASC cortex-olf 1
ASC mid 1
ASC str-hpf 1
CA1 Ak5 3
CA1 Chrm3 4
CA1 Kif26a 2
CA1 Lingo2 0
CA1 Ptprg 6
CA3 Cadm2 3
CA3 Efnb2 1
CA3-St18 Epha5 2
CA3-St18 Nuak1 2
CA3-St18 Tead1 2
CGE-Lamp5 Dock5 1
CGE-Lamp5 Grid1 2
CGE-Lamp5 Grk5 1
CGE-Lamp5 Nrxn3 1
CGE-Lamp5 Sorcs1 3
CGE-Vip Ccser1 4
CGE-Vip Clstn2 2
CGE-Vip Fstl4 4
CGE-Vip Galnt17 4
CGE-Vip Grm8 3
CGE-Vip Ntng1 2
CGE-Vip Ptprm 10
CGE-Vip Robo1 4
CLA Bcl11a 7
CLA Cdh8 7
CLA Nrp2 7
CT-L6 Hcrtr2 17
CT-L6 Il1rap 17
CT-L6 Map4 17
CT-L6 Megf9 17
Chd7 Kcnc2 0
Chd7 Megf11 0
Chd7 Trpc7 0
D1L-Fstl4 Cadm1 0
D1L-Fstl4 Crim1 0
D1L-Fstl4 Grm3 0
D1L-Fstl4 Sipa1l2 0
D1L-Fstl4 Trps1 0
D1L-PAL Flrt2 0
D1L-PAL Plcxd3 0
DG dg-all 3
DG-po Bcl11a 1
DG-po Calb2 1
DG-po Kctd8 1
EC Abhd2 2
EC Sema3g 2
EP Adcy8 0
EP Rgs8 0
EP Tspan5 0
Foxp2 Dchs2 0
Foxp2 Homer2 0
Foxp2 Inpp4b 0
Foxp2 Trpc7 0
Gfra1 Gfra1 2
IG-CA2 Chrm3 1
IG-CA2 Peak1 1
IG-CA2 Xpr1 1
IT-L23 Cux1 13
IT-L23 Foxp1 13
IT-L23 Ptprt 13
IT-L23 Tenm2 13
IT-L4 Astn2 4
IT-L4 S

## mC Cluster to ATAC

In [9]:
def get_cluster_map(cluster_col):
    cluster_map = integration_table.groupby(cluster_col).apply(lambda i: ','.join(
        i['ATAC'].dropna().unique()).split(',')).to_dict()
    cluster_map = {k: [i for i in v if i != ''] for k, v in cluster_map.items()}
    return cluster_map

In [10]:
cluster_col = 'CellClass'

cluster_map = get_cluster_map(cluster_col)

with open(f'label_transfer_between_modality/mC-ATAC.{cluster_col}.json', 'w') as f:
    json.dump(cluster_map, f)

for k, v in cluster_map.items():
    print(k, len(v))

Exc 30
Inh 41
NonN 21


In [11]:
cluster_col = 'MajorType'

cluster_map = get_cluster_map(cluster_col)

with open(f'label_transfer_between_modality/mC-ATAC.{cluster_col}.json', 'w') as f:
    json.dump(cluster_map, f)

for k, v in cluster_map.items():
    print(k, len(v))

ANP 4
ASC 3
CA1 5
CA3 1
CA3-St18 1
CGE-Lamp5 3
CGE-Vip 3
CLA 1
CT-L6 3
Chd7 2
D1L-Fstl4 3
D1L-PAL 1
DG 1
DG-po 2
EC 1
EP 1
Foxp2 1
Gfra1 1
IG-CA2 2
IT-L23 1
IT-L4 1
IT-L5 1
IT-L6 1
L6b 1
LSX-Inh 1
MGC 1
MGE-Pvalb 5
MGE-Sst 6
MSN-D1 1
MSN-D2 1
NP-L6 3
ODC 3
OLF 8
OLF-Exc 5
OPC 2
PAL-Inh 6
PC 1
PT-L5 2
Unc5c 1
VLMC 4
VLMC-Pia 2


In [12]:
cluster_col = 'SubType'

cluster_map = get_cluster_map(cluster_col)

with open(f'label_transfer_between_modality/mC-ATAC.{cluster_col}.json', 'w') as f:
    json.dump(cluster_map, f)

for k, v in cluster_map.items():
    print(k, len(v))

ANP anp-dg 2
ANP anp-olf-cnu 2
ASC cortex-olf 3
ASC mid 3
ASC str-hpf 3
CA1 Ak5 1
CA1 Chrm3 1
CA1 Kif26a 1
CA1 Lingo2 2
CA1 Ptprg 1
CA3 Cadm2 1
CA3 Efnb2 1
CA3-St18 Epha5 1
CA3-St18 Nuak1 1
CA3-St18 Tead1 1
CGE-Lamp5 Dock5 2
CGE-Lamp5 Grid1 2
CGE-Lamp5 Grk5 2
CGE-Lamp5 Nrxn3 1
CGE-Lamp5 Sorcs1 1
CGE-Vip Ccser1 1
CGE-Vip Clstn2 1
CGE-Vip Fstl4 1
CGE-Vip Galnt17 1
CGE-Vip Grm8 1
CGE-Vip Ntng1 1
CGE-Vip Ptprm 1
CGE-Vip Robo1 1
CLA Bcl11a 1
CLA Cdh8 1
CLA Nrp2 1
CT-L6 Hcrtr2 1
CT-L6 Il1rap 1
CT-L6 Map4 1
CT-L6 Megf9 2
Chd7 Kcnc2 1
Chd7 Megf11 1
Chd7 Trpc7 1
D1L-Fstl4 Cadm1 2
D1L-Fstl4 Crim1 2
D1L-Fstl4 Grm3 2
D1L-Fstl4 Sipa1l2 1
D1L-Fstl4 Trps1 1
D1L-PAL Flrt2 1
D1L-PAL Plcxd3 1
DG dg-all 1
DG-po Bcl11a 1
DG-po Calb2 1
DG-po Kctd8 1
EC Abhd2 1
EC Sema3g 1
EP Adcy8 1
EP Rgs8 1
EP Tspan5 1
Foxp2 Dchs2 1
Foxp2 Homer2 1
Foxp2 Inpp4b 1
Foxp2 Trpc7 1
Gfra1 Gfra1 1
IG-CA2 Chrm3 1
IG-CA2 Peak1 1
IG-CA2 Xpr1 1
IT-L23 Cux1 1
IT-L23 Foxp1 1
IT-L23 Ptprt 1
IT-L23 Tenm2 1
IT-L4 Astn2 1
IT-L4 Shc3 1
IT-