In [1]:
from io import StringIO
import os

import scanpy
import pandas as pd

# Mouse data (Tabula Muris Senis) from AWS

In [2]:
! aws s3 ls s3://czb-tabula-muris-senis/Data-objects/

2019-10-12 14:14:29          0 
2020-01-26 12:48:37 12915199084 tabula-muris-senis-bbknn-processed-official-annotations.h5ad
2019-12-05 10:38:08 4056529818 tabula-muris-senis-droplet-official-raw-obj.h5ad
2019-12-05 10:47:56  434573994 tabula-muris-senis-droplet-processed-official-annotations-Bladder.h5ad
2019-12-05 10:47:56  286898922 tabula-muris-senis-droplet-processed-official-annotations-Fat.h5ad
2019-12-05 10:47:56  335138122 tabula-muris-senis-droplet-processed-official-annotations-Heart_and_Aorta.h5ad
2019-12-05 10:47:56  776671418 tabula-muris-senis-droplet-processed-official-annotations-Kidney.h5ad
2019-12-05 10:47:56  103216122 tabula-muris-senis-droplet-processed-official-annotations-Large_Intestine.h5ad
2019-12-05 10:48:03  826884650 tabula-muris-senis-droplet-processed-official-annotations-Limb_Muscle.h5ad
2019-12-05 10:48:14  308116386 tabula-muris-senis-droplet-processed-official-annotations-Liver.h5ad
2019-12-05 10:48:17  679944698 tabula-muris-senis-droplet-processed-

## Sync raw counts object from all tissues

In [3]:
# ! aws s3 cp s3://czb-tabula-muris-senis/Data-objects/tabula-muris-senis-droplet-official-raw-obj.h5ad \
#     /home/olga/data_lg/data_sm_copy/czb-tabula-muris-senis/Data-objects/

## Sync processed Lung object with annotations from AWS

In [4]:
# ! aws s3 cp s3://czb-tabula-muris-senis/Data-objects/tabula-muris-senis-droplet-processed-official-annotations-Lung.h5ad \
#     /home/olga/data_lg/data_sm_copy/czb-tabula-muris-senis/Data-objects/tabula-muris-senis-droplet-processed-official-annotations-Lung.h5ad

## Load into Scanpy

### Mouse Raw object with all tissues

In [3]:
%%time

h5ad = '/home/olga/data_lg/data_sm_copy/czb-tabula-muris-senis/Data-objects/tabula-muris-senis-droplet-official-raw-obj.h5ad'
mouse = scanpy.read_h5ad(h5ad)
mouse

CPU times: user 1.39 s, sys: 1min 15s, total: 1min 16s
Wall time: 1min 20s


AnnData object with n_obs × n_vars = 245389 × 20138 
    obs: 'age', 'cell', 'cell_ontology_class', 'cell_ontology_id', 'free_annotation', 'method', 'mouse.id', 'n_genes', 'sex', 'subtissue', 'tissue', 'tissue_free_annotation'
    var: 'n_cells'

## Prepare for contatenation, add 1:1 orthologs

### Add "species" column to observations

In [4]:
mouse.obs['species'] = 'Mouse'
mouse.obs['species_latin'] = 'Mus musculus'
mouse.obs.head()

Unnamed: 0_level_0,age,cell,cell_ontology_class,cell_ontology_id,free_annotation,method,mouse.id,n_genes,sex,subtissue,tissue,tissue_free_annotation,species,species_latin
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
AAACCTGCAGGGTACA-1-0-0-0,24m,MACA_24m_M_TONGUE_60_AAACCTGCAGGGTACA,keratinocyte,,filiform,droplet,24-M-60,2107.0,male,,Tongue,Tongue,Mouse,Mus musculus
AAACCTGCAGTAAGCG-1-0-0-0,24m,MACA_24m_M_TONGUE_60_AAACCTGCAGTAAGCG,keratinocyte,,suprabasal,droplet,24-M-60,3481.0,male,,Tongue,Tongue,Mouse,Mus musculus
AAACCTGTCATTATCC-1-0-0-0,24m,MACA_24m_M_TONGUE_60_AAACCTGTCATTATCC,keratinocyte,,suprabasal,droplet,24-M-60,2599.0,male,,Tongue,Tongue,Mouse,Mus musculus
AAACGGGGTACAGTGG-1-0-0-0,24m,MACA_24m_M_TONGUE_60_AAACGGGGTACAGTGG,keratinocyte,,suprabasal differentiating,droplet,24-M-60,3468.0,male,,Tongue,Tongue,Mouse,Mus musculus
AAACGGGGTCTTCTCG-1-0-0-0,24m,MACA_24m_M_TONGUE_60_AAACGGGGTCTTCTCG,keratinocyte,,suprabasal differentiating,droplet,24-M-60,3189.0,male,,Tongue,Tongue,Mouse,Mus musculus


In [5]:
mouse.obs['channel'] = mouse.obs['cell'].map(lambda x: '_'.join(x.split('_')[:-1]))
mouse.obs.head()

Unnamed: 0_level_0,age,cell,cell_ontology_class,cell_ontology_id,free_annotation,method,mouse.id,n_genes,sex,subtissue,tissue,tissue_free_annotation,species,species_latin,channel
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
AAACCTGCAGGGTACA-1-0-0-0,24m,MACA_24m_M_TONGUE_60_AAACCTGCAGGGTACA,keratinocyte,,filiform,droplet,24-M-60,2107.0,male,,Tongue,Tongue,Mouse,Mus musculus,MACA_24m_M_TONGUE_60
AAACCTGCAGTAAGCG-1-0-0-0,24m,MACA_24m_M_TONGUE_60_AAACCTGCAGTAAGCG,keratinocyte,,suprabasal,droplet,24-M-60,3481.0,male,,Tongue,Tongue,Mouse,Mus musculus,MACA_24m_M_TONGUE_60
AAACCTGTCATTATCC-1-0-0-0,24m,MACA_24m_M_TONGUE_60_AAACCTGTCATTATCC,keratinocyte,,suprabasal,droplet,24-M-60,2599.0,male,,Tongue,Tongue,Mouse,Mus musculus,MACA_24m_M_TONGUE_60
AAACGGGGTACAGTGG-1-0-0-0,24m,MACA_24m_M_TONGUE_60_AAACGGGGTACAGTGG,keratinocyte,,suprabasal differentiating,droplet,24-M-60,3468.0,male,,Tongue,Tongue,Mouse,Mus musculus,MACA_24m_M_TONGUE_60
AAACGGGGTCTTCTCG-1-0-0-0,24m,MACA_24m_M_TONGUE_60_AAACGGGGTCTTCTCG,keratinocyte,,suprabasal differentiating,droplet,24-M-60,3189.0,male,,Tongue,Tongue,Mouse,Mus musculus,MACA_24m_M_TONGUE_60


In [6]:
startswith_10x = [x for x in mouse.obs['channel'].unique() if x.startswith('10X')]
len(startswith_10x)

77

In [7]:
mouse.obs['channel'].nunique()

166

In [8]:
mouse.obs.groupby(['age', 'mouse.id']).size()

age  mouse.id
1m   1-M-62       9809
     1-M-63      16171
3m   3-F-56      15218
     3-F-57       8475
     3-M-5/6      4929
     3-M-7/8      5296
     3-M-8        6265
     3-M-8/9       460
     3-M-9        4959
18m  18-F-50     12062
     18-F-51      9779
     18-M-52     17857
     18-M-53      4947
21m  21-F-54     16844
     21-F-55     18984
24m  24-M-58     12166
     24-M-59     12107
     24-M-60      9656
     24-M-61      3731
30m  30-M-2      25405
     30-M-3       9757
     30-M-4       7863
     30-M-5      12649
dtype: int64

In [9]:
mouse.obs['channel_tissue'] = mouse.obs['channel'].astype(str) + "__" + mouse.obs['tissue'].astype(str)
mouse.obs['channel_tissue'].nunique()

166

In [10]:
sorted(mouse.obs['channel_tissue'].unique())

['10X_P1_10__Spleen',
 '10X_P1_11__Heart_and_Aorta',
 '10X_P1_12__Heart_and_Aorta',
 '10X_P1_13__Marrow',
 '10X_P1_14__Lung',
 '10X_P1_16__Liver',
 '10X_P1_1__Kidney',
 '10X_P1_2__Spleen',
 '10X_P1_3__Heart_and_Aorta',
 '10X_P1_4__Heart_and_Aorta',
 '10X_P1_5__Marrow',
 '10X_P1_6__Lung',
 '10X_P1_7__Pancreas',
 '10X_P1_8__Large_Intestine',
 '10X_P1_9__Kidney',
 '10X_P2_0__Kidney',
 '10X_P2_10__Liver',
 '10X_P2_11__Liver',
 '10X_P2_13__Limb_Muscle',
 '10X_P2_1__Spleen',
 '10X_P2_2__Pancreas',
 '10X_P2_3__Liver',
 '10X_P2_4__Limb_Muscle',
 '10X_P2_5__Lung',
 '10X_P2_7__Large_Intestine',
 '10X_P3_0__Lung',
 '10X_P3_1__Pancreas',
 '10X_P3_2__Fat',
 '10X_P3_3__Fat',
 '10X_P3_4__Large_Intestine',
 '10X_P3_5__Marrow',
 '10X_P3_6__Spleen',
 '10X_P3_7__Kidney',
 '10X_P4_0__Tongue',
 '10X_P4_1__Tongue',
 '10X_P4_2__Liver',
 '10X_P4_3__Bladder',
 '10X_P4_4__Bladder',
 '10X_P4_5__Kidney',
 '10X_P4_6__Kidney',
 '10X_P4_7__Spleen',
 '10X_P5_0__Liver',
 '10X_P5_10__Marrow',
 '10X_P5_11__Spleen',
 '10

## Add sequencing run

### Run folder to 10x run pool name

In [11]:
s = '''Run folder	10x pool#
170606_A00111_0041_BH27FCDMXX	Pool 1 MACA10x
170727_A00111_0044_BH25HKDMXX	Pool 2 MACA10x
170808_A00111_0045_AH2HMCDMXX	Pool 3 MACA10x
170810_A00111_0047_AH2HT7DMXX	Pool 4 MACA10x
170817_A00111_0048_AH2H72DMXX	Pool 5 MACA10x
170827_A00111_0049_AH2H7CDMXX	Pool 6 MACA10x
170827_A00111_0050_BH2H5YDMXX	Pool 7 MACA10x
171005_A00111_0072_AH3YLGDMXX	MACA 10x pool 8 [#17-32]
171005_A00111_0073_BH3YHNDMXX	MACA 10x pool 9 [#33-48]
171019_A00111_0077_BH3YKNDMXX	MACA 10x pool 10 [#49-64]
171024_A00111_0078_AH3YHCDMXX	MACA 10x pool 11 [#65-81]
171024_A00111_0079_BH3YKJDMXX	MACA 10x pool 12 [#82-88, 105-112]
171103_A00111_0081_AH52KGDMXX	MACA 10x pool 13 [#113-128]
171103_A00111_0082_BH523JDMXX	MACA 10x pool 14 + 4 BC samples [#129-144 +1-4]
170919_A00111_0061_BH3FYJDMXX	Pool 15 - 10x 3m/mouselemur
171001_A00111_0071_BH3TTFDMXX	Pool 16 - MACA mouse lemur (Stumpy)
'''
seq_run_to_pool = pd.read_csv(StringIO(s), sep='\t')
seq_run_to_pool

Unnamed: 0,Run folder,10x pool#
0,170606_A00111_0041_BH27FCDMXX,Pool 1 MACA10x
1,170727_A00111_0044_BH25HKDMXX,Pool 2 MACA10x
2,170808_A00111_0045_AH2HMCDMXX,Pool 3 MACA10x
3,170810_A00111_0047_AH2HT7DMXX,Pool 4 MACA10x
4,170817_A00111_0048_AH2H72DMXX,Pool 5 MACA10x
5,170827_A00111_0049_AH2H7CDMXX,Pool 6 MACA10x
6,170827_A00111_0050_BH2H5YDMXX,Pool 7 MACA10x
7,171005_A00111_0072_AH3YLGDMXX,MACA 10x pool 8 [#17-32]
8,171005_A00111_0073_BH3YHNDMXX,MACA 10x pool 9 [#33-48]
9,171019_A00111_0077_BH3YKNDMXX,MACA 10x pool 10 [#49-64]


### Dictionary of pools

In [12]:
pools = {'pool1': '''30-m2-1	KIDNEY	A3
30-m2-2	SPLEEN	B3
30-m2-3	HEART (ALL MINUS AORTA)	A4
30-m2-4	HEART (LV+RV ONLY)	B4
30-m2-5	BM (NON-STC)	A5
30-m2-6	LUNG	B5
30-m2-7	PANCREASE	A6
30-m2-8	COLON - "PROM Tm"	B6
30-m2-9	KIDNEY	A7
30-m2-10	SPLEEN	B7
30-m2-11	HEART (ALL 4:4:4:1:1)	A8
30-m2-12	HEART (LV+RV ONLY)	B8
30-m2-13	BM (NON-STC)	A9
30-m2-14	LUNG	B9
30-m2-15	CORTEX MICROGLIA	A10
30-m2-16	LIVER	B10''',
         
        'pool2': '''30-M-4 #1	KIDNEY	A5
30-M-4 #2	SPLEEN	B5
30-M-4 #3	PANCREASE	C5
30-M-4 #4	LIVER HEPATOCYTES	D5
30-M-4 #5	MUSCLE	E5
30-M-4 #6	LUNG	F5
30-M-4 #7	CORTEX	G5
30-M-4 #8	COLON PROXIMAL	H5
30-M-4 #9	BRAIN: CB,HIP,STR	B1
30-M-4 #10	BRAIN: CB,HIP,STR	C1
30-M-5 #1	LIVER_HEP	A6
30-M-5 #2	LIVER_NPC	B6
30-M-5 #3	BRAIN_STR	C6
30-M-5 #4	MUSCLE	D6
30-M-5 #5	BRAIN1_HIP	E6
30-M-5 #6	BRAIN1_CTX	F6''',
         
         'pool3': '''30-M-5 #7	LUNG	G6
30-M-5 #8	PANCREASE	H6
30-M-5 #9	BAT,GAT,MAT	A7
30-M-5 #10	SCAT	B7
30-M-5 #11	COLON:P+D	C7
30-M-5 #12	BM	D7
30-M-5 #13	SPLEEN	E7
30-M-5 #14	KIDNEY	F7
30-M-5 #15	BRAIN2_CTX	G7
30-M-5 #16	BRAIN2_HIP,CB,STR	H7
Mouse Lemur	BLOOD	D1
Mouse Lemur	BRAIN	E1
Mouse Lemur	LUNG	F1''',
         
         'pool4': '''3-M-8 #1	TONGUE	A8
3-M-9 #2	TONGUE	B8
3-M-8/9 #3	HEPATOCYTES	C8
3-M-8 #4	BLADDER	D8
3-M-9 #5	BLADDER	E8
3-M-8 #6	KIDNEY	F8
3-M-9 #7	KIDNEY	G8
3-M-8 #8	SPLEEN	H8
4-M-12-P1 #1	BM (T AND B CELLS)	A9
4-M-12-P1 #2	KIDNEY 	B9
4-M-13-P1 #3	KIDNEY 	C9
4-M-13-P1 #4	LIVER 	D9
4-M-12-P1 #5	LUNG 	E9
4-M-13-P1 #6	LUNG	F9
4-M-12-P1 #7	SPLEEN	G9
4-M-13-P1 #8	SPLEEN	H9''',
         
         'pool5': '''1-M-62 #1	HEPATOCYTES-62	A1
1-M-63 #2	HEPATOCYTES-63	B1
1-M-62 #3	TONGUE-62	C1
1-M-63 #4	TONGUE-63	D1
1-M-63 #5	BLADDER-63	E1
1-M-62 #6	MUSCLE-62	F1
1-M-63 #7	MUSCLE-63	G1
1-M-63 #8	LIVER-NPC-63	H1
1-M-62 #9	KIDNEY-62	A2
1-M-63 #10	KIDNEY-63	B2
1-M-62 #11	BM-62	C2
1-M-63 #12	SPLEEN-63	D2
1-M-63 #13	HEART-63	E2
1-M-62 #14	LUNG-62	F2
1-M-63 #15	LUNG-63	G2
1-M-63 #16	BM-63	H2''',
         
#          'pool6': 'repeat mistake of 4',
         
         'pool7': '''3-F-56 #1	HEPATOCYTES-56		A7
3-F-57 #2	HEPATOCYTES-57		B7
3-F-56 #3	BM-56		C7
3-F-57 #4	BM-57		D7
3-F-56 #5	HEART-56		E7
3-F-57 #6	KIDNEY-57		F7
3-F-56 #7	SPLEEN-56		G7
3-F-56 #8	BLADDER-56		H7
3-F-56 #9	LUNG-56		A3
3-F-57 #10	LUNG-57		B3
3-F-56 #11	TONGUE-56		C3
3-F-56 #12	THYMUS "FW"-56		D3
3-F-56 #13	MAMMARY-56		E3
3-F-57 #14	MAMMARY-57		F3
3-F-56 #15	MUSCLE-56		G3
3-F-57 #16	MUSCLE-57		H3''',
         
         'pool8': '''18-F-50 #1	KIDNEY-50		A10
18-F-51 #2	KIDNEY-51		B10
18-F-50 #3	SPLEEN-50		C10
18-F-51 #4	SPLEEN-51		D10
18-F-50 #5	LUNG-50		E10
18-F-51 #6	LUNG-51		F10
18-F-50 #7	PANCREASE-EXOCRINE-50		G10
18-F-50 #8	PANCREASE-ENDOCRINE-50		H10
18-F-51 #9	LIVER_NPC-51		A11
18-F-51 #10	LIVER_HEPATOCYTES-51		B11
18-F-50 #11	BM_50		C11
18-F-51 #12	BM_51		D11
18-F-50 #13	MUSCLE_50 (pre-sort)		E11
18-F-51 #14	MUSCLE_51 (pre-sort)		F11
18-F-50 #15	BREAST_50		G11
18-F-51 #16	BREAST_51		H11''',
         
         'pool9': '''18-F-50 #17	THYMUS_50		A12
18-F-51 #18	THYMUS_51		B12
18-F-50 #19	TONGUE-50		C12
18-F-51 #20	TONGUE-51		D12
18-F-50 #21	AORTA-50		E12
18-F-50 #22	GAT-50		F12
18-F-50 #23	MAT-50		G12
18-F-50 #24	SCAT-50		H12
18-M-52 #1	KIDNEY-52		A1
18-M-52 #2	SPLEEN-52		B1
18-M-53 #3	HEPATOCYTES-53		C1
18-M-52 #4	HEART-4CHAMBERS-52		D1
18-M-53 #5	HEART-4CHAMBERS-53		E1
18-M-52 #6	PANCREASE-ENDOCRINE-52		F1
18-M-52 #7	PANCREASE-EXOCRINE-52		G1
18-M-52 #8	THYMUS-52		H1''',
         
         'pool10': '''18-M-52 #9	CORTEX-52		A2
18-M-52 #10	AORTA-52		B2
18-M-52 #11	BLADDER-52 (unstain)		C2
18-M-52 #12	TONGUE-52  (unstain)		D2
18-M-52 #13	BM-52		E2
18-M-53 #14	BM-53		F2
18-M-52 #15	MUSCLE-52		G2
18-M-53 #16	MUSCLE-53		H2
18-M-52 #17	LUNG-52		A3
18-M-53 #18	LUNG-53		B3
18-M-52 #19	SKIN-52		C3
18-M-52 #20	SCAT-52		D3
18-M-53 #21	SCAT-53		E3
18-M-52 #22	MAT-52		F3
18-M-52 #23	BAT-52		G3
18-M-52 #24	GAT-52		H3''',
         
         'pool11': '''21-F-54  #1	KIDNEY-54		A4
21-F-55  #2	KIDNEY-55		B4
21-F-54  #3	SPLEEN-54		C4
21-F-54  #4	HEPATOCYTES-54		D4
21-F-54 #5	HEART-4CHAMBERS-54		E4
21-F-55 #6	HEART-4CHAMBERS-55		F4
21-F-54 #7	LUNG-54		G4
21-F-55 #8	LUNG-55		H4
21-F-54 #9	SCAT-54		A5
21-F-55 #10	SCAT-55		B5
21-F-54 #11	SKIN-54		C5
21-F-55 #12	SKIN-55		D5
21-F-?- #13	PANCREASE-EXOCRINE		E5
21-F-54 #14	NPC-54		F5
21-F-54 #15	BM-54		G5
21-F-55 #16	BM-55		H5
21-F-54 #17	BREAST-54		A6''',
         
         'pool12': '''21-F-55 #18	BREAST-55		B6
21-F-54 #19	THYMUS-54		C6
21-F-55 #20	THYMUS-55		D6
21-F-54 #21	MUSCLE-54		E6
21-F-55 #22	MUSCLE-55		F6
21-F-55 #23	SPLEEN-55		G6
21-F-?- #24	PANCREASE-ENDOCRINE		H6
24-M-58 #1	KIDNEY-58		A4
24-M-59 #2	KIDNEY-59		B4
24-M-58 #3	SPLEEN-58		C4
24-M-59 #4	SPLEEN-59		D4
24-M-58 #5	HEART-58		E4
24-M-59 #6	HEART-59		F4
24-M-58 #7	HEPATOCYTES-58		G4
24-M-59 #8	HEPATOCYTES-59		H4''',
         
         'pool13': '''24-M-58 #9	BM-58		A5
24-M-59 #10	BM-59		B5
24-M-58 #11	TONGUE-58		C5
24-M-59 #12	TONGUE-59		D5
24-M-58 #13	BLADDER-58		E5
24-M-59 #14	BLADDER-59		F5
24-M-58 #15	MUSCLE-58		G5
24-M-59 #16	MUSCLE-59		H5
24-M-58 #17	THYMUS-58		A6
24-M-59 #18	THYMUS-59		B6
24-M-58 #19	BAT-58 (CRAP)		C6
24-M-58 #20	SCAT-58(CRAP)		D6
24-M-58 #21	MAT-58		E6
24-M-59 #22	MAT-59		F6
24-M-58 #23	GAT-58		G6
24-M-59 #24	GAT-59		H6''',
         
         'pool14': '''24-M-60 #1	KIDNEY-60		A7
24-M-61 #2	KIDNEY-61		B7
24-M-60 #3	SPLEEN-60		C7
24-M-61 #4	SPLEEN-61		D7
24-M-60 #5	HEART-60		E7
24-M-61 #6	HEART-61		F7
24-M-60 #7	LUNG-60		G7
24-M-61 #8	LUNG-61		H7
24-M-60 #9	THYMUS-60		A8
24-M-60 #10	PANCREASE-EXO-60		B8
24-M-60 #11	TONGUE-60		C8
24-M-60 #12	BLADDER-60		D8
24-M-60 #13	MUSCLE-60		E8
24-M-61 #14	MUSCLE-61		F8
24-M-60 #15	BM-60		G8
24-M-61 #16	BM-61		H8''',
         
        'pool15': '''Mouse Lemur (Stumpy)	BM	A10
Mouse Lemur (Stumpy)	PB	B10
Mouse Lemur (Stumpy)	SMALL INTESTINE	C10
Mouse Lemur (Stumpy)	SKIN	D10
Mouse Lemur (Stumpy)	SCAT	E10
Mouse Lemur (Stumpy)	GAT	F10
Mouse Lemur (Stumpy)	BAT	G10
Mouse Lemur (Stumpy)	MAT	H10
		
HUMAN PATIENT	HEPATOCYTES 8/8	A9
HUMAN PATIENT	HEPATOCYTES 8/8	B9
Mouse Lemur (Stumpy)	BONE (CELLS 1DAY OLD) 9/7	A11
Mouse Lemur (Stumpy)	BONE (CELLS 1DAY OLD) 9/7	B11
MACA 3MONTH	LUNG (immune, endo depleted) #1 9/8	E11
MACA 3MONTH	LUNG (immune, endo depleted) #2 9/8	F11
MACA 3MONTH	TRACHEA #1 9/7	C11
MACA 3MONTH	TRACHEA #1 9/8	G11''',
         
        }


dfs = []

for name, s in pools.items():
    print(f'name: {name}')
    df = pd.read_csv(StringIO(s), sep='\t', header=None)
    # Some have empty columns
    df = df.dropna(how='all', axis=1)
    
    # Standardize columns
    df.columns = ['sample_name', 'tissue_cells', 'sample_index']
    df['pool_name'] = name
    try:
        df['pool_number'] = int(name.split('pool')[-1])
    except ValueError:
        df['pool_number'] = 'A'
    df['index_in_pool'] = range(len(df))
    dfs.append(df)
pool_df = pd.concat(dfs, ignore_index=True)
print(pool_df.shape)
pool_df.head()

name: pool1
name: pool2
name: pool3
name: pool4
name: pool5
name: pool7
name: pool8
name: pool9
name: pool10
name: pool11
name: pool12
name: pool13
name: pool14
name: pool15
(222, 6)


Unnamed: 0,sample_name,tissue_cells,sample_index,pool_name,pool_number,index_in_pool
0,30-m2-1,KIDNEY,A3,pool1,1,0
1,30-m2-2,SPLEEN,B3,pool1,1,1
2,30-m2-3,HEART (ALL MINUS AORTA),A4,pool1,1,2
3,30-m2-4,HEART (LV+RV ONLY),B4,pool1,1,3
4,30-m2-5,BM (NON-STC),A5,pool1,1,4


In [13]:
pool_df.tail()

Unnamed: 0,sample_name,tissue_cells,sample_index,pool_name,pool_number,index_in_pool
217,Mouse Lemur (Stumpy),BONE (CELLS 1DAY OLD) 9/7,B11,pool15,15,12
218,MACA 3MONTH,"LUNG (immune, endo depleted) #1 9/8",E11,pool15,15,13
219,MACA 3MONTH,"LUNG (immune, endo depleted) #2 9/8",F11,pool15,15,14
220,MACA 3MONTH,TRACHEA #1 9/7,C11,pool15,15,15
221,MACA 3MONTH,TRACHEA #1 9/8,G11,pool15,15,16


In [14]:
pool_df['pool_number'].value_counts()

15    17
11    17
14    16
13    16
10    16
9     16
8     16
7     16
5     16
4     16
2     16
1     16
12    15
3     13
Name: pool_number, dtype: int64

### Make `10x_PX_X` for pools 1-8

In [15]:
def make_10x_pool_id(row):
    if row.pool_number <= 7 or row.pool_number == 15:
        if row.pool_number == 15:
            pool_number = 8
        else:
            pool_number = row.pool_number
        
        i = row.index_in_pool if row.pool_number > 1 else row.index_in_pool + 1
        channel = f'10X_P{pool_number}_{i}'
    else:
        split = row.sample_name.split('-')
        months = split[0]
        sex = split[1]
        tissue = row.tissue_cells.replace('  ', ' ').replace('(', '').replace(')', '').replace('-', '_').replace(' ', '_')
        if tissue == 'PANCREASE_ENDOCRINE':
            tissue = 'PANCREAS_ENDOCRINE'
        
        channel = f'MACA_{months}m_{sex}_{tissue}'
    return channel

pool_df['channel'] = pool_df.apply(make_10x_pool_id, axis=1)
pool_df['channel'].sample(10)

151           MACA_21m_F_SKIN_54
80                      10X_P7_3
144    MACA_21m_F_HEPATOCYTES_54
62                      10X_P5_1
115            MACA_18m_F_MAT_50
174             MACA_24m_M_BM_59
118         MACA_18m_M_SPLEEN_52
132         MACA_18m_M_MUSCLE_53
17                      10X_P2_1
35                      10X_P3_3
Name: channel, dtype: object

In [16]:
pool_df.query('tissue_cells == "PANCREASE-ENDOCRINE"')

Unnamed: 0,sample_name,tissue_cells,sample_index,pool_name,pool_number,index_in_pool,channel
164,21-F-?- #24,PANCREASE-ENDOCRINE,H6,pool12,12,6,MACA_21m_F_PANCREAS_ENDOCRINE


In [17]:
set(startswith_10x).difference(pool_df['channel'].unique())

set()

In [18]:
pool_df['channel'].isin(mouse.obs['channel']).sum()

166

In [19]:
len(set(mouse.obs['channel'].unique()).intersection(pool_df['channel']))

166

In [20]:
set(mouse.obs['channel'].unique()).difference(pool_df['channel'])

set()

In [21]:
len(set(mouse.obs['channel'].unique()).difference(pool_df['channel']))

0

In [22]:
pool_df.pool_name.unique()

array(['pool1', 'pool2', 'pool3', 'pool4', 'pool5', 'pool7', 'pool8',
       'pool9', 'pool10', 'pool11', 'pool12', 'pool13', 'pool14',
       'pool15'], dtype=object)

### Join pool df with sequencing run df

regex: https://regex101.com/r/ODKX4f/1

In [23]:
seq_run_to_pool['pool_name'] = seq_run_to_pool['10x pool#'].str.extract('([pP]ool \d+)')[0].str.replace(' ', '').str.lower()
seq_run_to_pool.head()

Unnamed: 0,Run folder,10x pool#,pool_name
0,170606_A00111_0041_BH27FCDMXX,Pool 1 MACA10x,pool1
1,170727_A00111_0044_BH25HKDMXX,Pool 2 MACA10x,pool2
2,170808_A00111_0045_AH2HMCDMXX,Pool 3 MACA10x,pool3
3,170810_A00111_0047_AH2HT7DMXX,Pool 4 MACA10x,pool4
4,170817_A00111_0048_AH2H72DMXX,Pool 5 MACA10x,pool5


In [24]:
pool_to_seq_run = seq_run_to_pool.set_index('pool_name')['Run folder']
pool_to_seq_run.name = 'sequencing_run'
pool_to_seq_run

pool_name
pool1     170606_A00111_0041_BH27FCDMXX
pool2     170727_A00111_0044_BH25HKDMXX
pool3     170808_A00111_0045_AH2HMCDMXX
pool4     170810_A00111_0047_AH2HT7DMXX
pool5     170817_A00111_0048_AH2H72DMXX
pool6     170827_A00111_0049_AH2H7CDMXX
pool7     170827_A00111_0050_BH2H5YDMXX
pool8     171005_A00111_0072_AH3YLGDMXX
pool9     171005_A00111_0073_BH3YHNDMXX
pool10    171019_A00111_0077_BH3YKNDMXX
pool11    171024_A00111_0078_AH3YHCDMXX
pool12    171024_A00111_0079_BH3YKJDMXX
pool13    171103_A00111_0081_AH52KGDMXX
pool14    171103_A00111_0082_BH523JDMXX
pool15    170919_A00111_0061_BH3FYJDMXX
pool16    171001_A00111_0071_BH3TTFDMXX
Name: sequencing_run, dtype: object

In [25]:
pool_df_with_seq = pool_df.join(pool_to_seq_run, on='pool_name')
print(pool_df_with_seq.shape)
pool_df_with_seq.head()

(222, 8)


Unnamed: 0,sample_name,tissue_cells,sample_index,pool_name,pool_number,index_in_pool,channel,sequencing_run
0,30-m2-1,KIDNEY,A3,pool1,1,0,10X_P1_1,170606_A00111_0041_BH27FCDMXX
1,30-m2-2,SPLEEN,B3,pool1,1,1,10X_P1_2,170606_A00111_0041_BH27FCDMXX
2,30-m2-3,HEART (ALL MINUS AORTA),A4,pool1,1,2,10X_P1_3,170606_A00111_0041_BH27FCDMXX
3,30-m2-4,HEART (LV+RV ONLY),B4,pool1,1,3,10X_P1_4,170606_A00111_0041_BH27FCDMXX
4,30-m2-5,BM (NON-STC),A5,pool1,1,4,10X_P1_5,170606_A00111_0041_BH27FCDMXX


In [26]:
pool_df_with_seq.sequencing_run.isnull().sum()

0

### Make channel to sequencing run

In [27]:
channel_to_seq_run = pool_df_with_seq.set_index('channel')['sequencing_run']
print(channel_to_seq_run.shape)
channel_to_seq_run.head()

(222,)


channel
10X_P1_1    170606_A00111_0041_BH27FCDMXX
10X_P1_2    170606_A00111_0041_BH27FCDMXX
10X_P1_3    170606_A00111_0041_BH27FCDMXX
10X_P1_4    170606_A00111_0041_BH27FCDMXX
10X_P1_5    170606_A00111_0041_BH27FCDMXX
Name: sequencing_run, dtype: object

In [28]:
mouse_obs_seq_run = mouse.obs.join(channel_to_seq_run, on='channel')
print(mouse_obs_seq_run.shape)
mouse_obs_seq_run.head()

(245389, 17)


Unnamed: 0_level_0,age,cell,cell_ontology_class,cell_ontology_id,free_annotation,method,mouse.id,n_genes,sex,subtissue,tissue,tissue_free_annotation,species,species_latin,channel,channel_tissue,sequencing_run
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
AAACCTGCAGGGTACA-1-0-0-0,24m,MACA_24m_M_TONGUE_60_AAACCTGCAGGGTACA,keratinocyte,,filiform,droplet,24-M-60,2107.0,male,,Tongue,Tongue,Mouse,Mus musculus,MACA_24m_M_TONGUE_60,MACA_24m_M_TONGUE_60__Tongue,171103_A00111_0082_BH523JDMXX
AAACCTGCAGTAAGCG-1-0-0-0,24m,MACA_24m_M_TONGUE_60_AAACCTGCAGTAAGCG,keratinocyte,,suprabasal,droplet,24-M-60,3481.0,male,,Tongue,Tongue,Mouse,Mus musculus,MACA_24m_M_TONGUE_60,MACA_24m_M_TONGUE_60__Tongue,171103_A00111_0082_BH523JDMXX
AAACCTGTCATTATCC-1-0-0-0,24m,MACA_24m_M_TONGUE_60_AAACCTGTCATTATCC,keratinocyte,,suprabasal,droplet,24-M-60,2599.0,male,,Tongue,Tongue,Mouse,Mus musculus,MACA_24m_M_TONGUE_60,MACA_24m_M_TONGUE_60__Tongue,171103_A00111_0082_BH523JDMXX
AAACGGGGTACAGTGG-1-0-0-0,24m,MACA_24m_M_TONGUE_60_AAACGGGGTACAGTGG,keratinocyte,,suprabasal differentiating,droplet,24-M-60,3468.0,male,,Tongue,Tongue,Mouse,Mus musculus,MACA_24m_M_TONGUE_60,MACA_24m_M_TONGUE_60__Tongue,171103_A00111_0082_BH523JDMXX
AAACGGGGTCTTCTCG-1-0-0-0,24m,MACA_24m_M_TONGUE_60_AAACGGGGTCTTCTCG,keratinocyte,,suprabasal differentiating,droplet,24-M-60,3189.0,male,,Tongue,Tongue,Mouse,Mus musculus,MACA_24m_M_TONGUE_60,MACA_24m_M_TONGUE_60__Tongue,171103_A00111_0082_BH523JDMXX


In [29]:
mouse_obs_seq_run['cell_barcode'] = mouse_obs_seq_run.cell.str.split('_').str[-1]
mouse_obs_seq_run.head()

Unnamed: 0_level_0,age,cell,cell_ontology_class,cell_ontology_id,free_annotation,method,mouse.id,n_genes,sex,subtissue,tissue,tissue_free_annotation,species,species_latin,channel,channel_tissue,sequencing_run,cell_barcode
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
AAACCTGCAGGGTACA-1-0-0-0,24m,MACA_24m_M_TONGUE_60_AAACCTGCAGGGTACA,keratinocyte,,filiform,droplet,24-M-60,2107.0,male,,Tongue,Tongue,Mouse,Mus musculus,MACA_24m_M_TONGUE_60,MACA_24m_M_TONGUE_60__Tongue,171103_A00111_0082_BH523JDMXX,AAACCTGCAGGGTACA
AAACCTGCAGTAAGCG-1-0-0-0,24m,MACA_24m_M_TONGUE_60_AAACCTGCAGTAAGCG,keratinocyte,,suprabasal,droplet,24-M-60,3481.0,male,,Tongue,Tongue,Mouse,Mus musculus,MACA_24m_M_TONGUE_60,MACA_24m_M_TONGUE_60__Tongue,171103_A00111_0082_BH523JDMXX,AAACCTGCAGTAAGCG
AAACCTGTCATTATCC-1-0-0-0,24m,MACA_24m_M_TONGUE_60_AAACCTGTCATTATCC,keratinocyte,,suprabasal,droplet,24-M-60,2599.0,male,,Tongue,Tongue,Mouse,Mus musculus,MACA_24m_M_TONGUE_60,MACA_24m_M_TONGUE_60__Tongue,171103_A00111_0082_BH523JDMXX,AAACCTGTCATTATCC
AAACGGGGTACAGTGG-1-0-0-0,24m,MACA_24m_M_TONGUE_60_AAACGGGGTACAGTGG,keratinocyte,,suprabasal differentiating,droplet,24-M-60,3468.0,male,,Tongue,Tongue,Mouse,Mus musculus,MACA_24m_M_TONGUE_60,MACA_24m_M_TONGUE_60__Tongue,171103_A00111_0082_BH523JDMXX,AAACGGGGTACAGTGG
AAACGGGGTCTTCTCG-1-0-0-0,24m,MACA_24m_M_TONGUE_60_AAACGGGGTCTTCTCG,keratinocyte,,suprabasal differentiating,droplet,24-M-60,3189.0,male,,Tongue,Tongue,Mouse,Mus musculus,MACA_24m_M_TONGUE_60,MACA_24m_M_TONGUE_60__Tongue,171103_A00111_0082_BH523JDMXX,AAACGGGGTCTTCTCG


In [30]:
mouse_seq_run_to_barcode_counts = mouse_obs_seq_run.groupby(['sequencing_run', 'cell_barcode']).size()
mouse_seq_run_to_barcode_counts.name = 'n_barcodes'
print(mouse_seq_run_to_barcode_counts.shape)
mouse_seq_run_to_barcode_counts.head()

(242080,)


sequencing_run                 cell_barcode    
170606_A00111_0041_BH27FCDMXX  AAACCTGAGAAACGCC    1
                               AAACCTGAGACAAGCC    1
                               AAACCTGAGACTGGGT    1
                               AAACCTGAGAGGACGG    2
                               AAACCTGAGATCTGCT    1
Name: n_barcodes, dtype: int64

In [31]:
mouse_seq_run_to_barcode_counts.value_counts().sort_index()

1     238915
2       3111
3         37
4          3
5          2
6          1
7          2
8          1
9          1
10         2
11         1
12         1
13         3
Name: n_barcodes, dtype: int64

In [32]:
mouse_obs_seq_run_barcode_counts = mouse_obs_seq_run.join(mouse_seq_run_to_barcode_counts, on=['sequencing_run', 'cell_barcode'])
print(mouse_obs_seq_run_barcode_counts.shape)
mouse_obs_seq_run_barcode_counts.head()

(245389, 19)


Unnamed: 0_level_0,age,cell,cell_ontology_class,cell_ontology_id,free_annotation,method,mouse.id,n_genes,sex,subtissue,tissue,tissue_free_annotation,species,species_latin,channel,channel_tissue,sequencing_run,cell_barcode,n_barcodes
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
AAACCTGCAGGGTACA-1-0-0-0,24m,MACA_24m_M_TONGUE_60_AAACCTGCAGGGTACA,keratinocyte,,filiform,droplet,24-M-60,2107.0,male,,Tongue,Tongue,Mouse,Mus musculus,MACA_24m_M_TONGUE_60,MACA_24m_M_TONGUE_60__Tongue,171103_A00111_0082_BH523JDMXX,AAACCTGCAGGGTACA,2
AAACCTGCAGTAAGCG-1-0-0-0,24m,MACA_24m_M_TONGUE_60_AAACCTGCAGTAAGCG,keratinocyte,,suprabasal,droplet,24-M-60,3481.0,male,,Tongue,Tongue,Mouse,Mus musculus,MACA_24m_M_TONGUE_60,MACA_24m_M_TONGUE_60__Tongue,171103_A00111_0082_BH523JDMXX,AAACCTGCAGTAAGCG,1
AAACCTGTCATTATCC-1-0-0-0,24m,MACA_24m_M_TONGUE_60_AAACCTGTCATTATCC,keratinocyte,,suprabasal,droplet,24-M-60,2599.0,male,,Tongue,Tongue,Mouse,Mus musculus,MACA_24m_M_TONGUE_60,MACA_24m_M_TONGUE_60__Tongue,171103_A00111_0082_BH523JDMXX,AAACCTGTCATTATCC,1
AAACGGGGTACAGTGG-1-0-0-0,24m,MACA_24m_M_TONGUE_60_AAACGGGGTACAGTGG,keratinocyte,,suprabasal differentiating,droplet,24-M-60,3468.0,male,,Tongue,Tongue,Mouse,Mus musculus,MACA_24m_M_TONGUE_60,MACA_24m_M_TONGUE_60__Tongue,171103_A00111_0082_BH523JDMXX,AAACGGGGTACAGTGG,1
AAACGGGGTCTTCTCG-1-0-0-0,24m,MACA_24m_M_TONGUE_60_AAACGGGGTCTTCTCG,keratinocyte,,suprabasal differentiating,droplet,24-M-60,3189.0,male,,Tongue,Tongue,Mouse,Mus musculus,MACA_24m_M_TONGUE_60,MACA_24m_M_TONGUE_60__Tongue,171103_A00111_0082_BH523JDMXX,AAACGGGGTCTTCTCG,1


## Make mouse adata with barcode counts

In [33]:
mouse_with_barcode_counts = mouse.copy()
mouse_with_barcode_counts.obs = mouse_obs_seq_run_barcode_counts
mouse_with_barcode_counts

AnnData object with n_obs × n_vars = 245389 × 20138 
    obs: 'age', 'cell', 'cell_ontology_class', 'cell_ontology_id', 'free_annotation', 'method', 'mouse.id', 'n_genes', 'sex', 'subtissue', 'tissue', 'tissue_free_annotation', 'species', 'species_latin', 'channel', 'channel_tissue', 'sequencing_run', 'cell_barcode', 'n_barcodes'
    var: 'n_cells'

### Write mouse adata with barcode counts

In [34]:
h5ad = '/home/olga/data_lg/data_sm_copy/czb-tabula-muris-senis/Data-objects/tabula-muris-senis-droplet-official-raw-obj--seq-run-barcode-counts.h5ad'
mouse_with_barcode_counts.write(h5ad)

... storing 'species' as categorical
... storing 'species_latin' as categorical
... storing 'channel' as categorical
... storing 'channel_tissue' as categorical
... storing 'sequencing_run' as categorical
... storing 'cell_barcode' as categorical


## Make mouse adata with no duplicates

In [35]:
mouse_no_duplicates = mouse_with_barcode_counts[mouse_with_barcode_counts.obs.n_barcodes == 1]
mouse_no_duplicates

View of AnnData object with n_obs × n_vars = 238915 × 20138 
    obs: 'age', 'cell', 'cell_ontology_class', 'cell_ontology_id', 'free_annotation', 'method', 'mouse.id', 'n_genes', 'sex', 'subtissue', 'tissue', 'tissue_free_annotation', 'species', 'species_latin', 'channel', 'channel_tissue', 'sequencing_run', 'cell_barcode', 'n_barcodes'
    var: 'n_cells'

In [36]:
mouse_no_duplicates.obs.n_barcodes.describe()

count    238915.0
mean          1.0
std           0.0
min           1.0
25%           1.0
50%           1.0
75%           1.0
max           1.0
Name: n_barcodes, dtype: float64

### Write data with no duplicates

In [37]:
h5ad = "/home/olga/data_lg/data_sm_copy/czb-tabula-muris-senis/Data-objects/tabula-muris-senis-droplet-official-raw-obj--no-duplicate-barcodes-per-seq-run.h5ad"
mouse_no_duplicates.write(h5ad)



## Get channel to tissue mapping

In [38]:
base_folder = "/home/olga/data_lg/tabula-muris-senis/10x/"


channel_to_tissue = mouse_no_duplicates.obs[
    ["channel", "tissue", "age"]
].drop_duplicates()
channel_to_tissue["age_long"] = channel_to_tissue.age.map(
    lambda x: x.replace("m", "_month")
)
channel_to_tissue["tgz_path"] = channel_to_tissue.apply(
    lambda x: os.path.join(
        base_folder, x["age_long"], x["channel"], x["channel"] + ".tgz"
    ),
    axis=1
)

channel_to_tissue = channel_to_tissue.set_index("channel")


channel_to_tissue

Unnamed: 0_level_0,tissue,age,age_long,tgz_path
channel,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
MACA_24m_M_TONGUE_60,Tongue,24m,24_month,/home/olga/data_lg/tabula-muris-senis/10x/24_m...
MACA_18m_F_AORTA_50,Heart_and_Aorta,18m,18_month,/home/olga/data_lg/tabula-muris-senis/10x/18_m...
MACA_18m_F_BM_50,Marrow,18m,18_month,/home/olga/data_lg/tabula-muris-senis/10x/18_m...
MACA_18m_F_BM_51,Marrow,18m,18_month,/home/olga/data_lg/tabula-muris-senis/10x/18_m...
MACA_18m_F_BREAST_50,Mammary_Gland,18m,18_month,/home/olga/data_lg/tabula-muris-senis/10x/18_m...
...,...,...,...,...
10X_P7_9,Lung,3m,3_month,/home/olga/data_lg/tabula-muris-senis/10x/3_mo...
10X_P8_12,Lung,3m,3_month,/home/olga/data_lg/tabula-muris-senis/10x/3_mo...
10X_P8_13,Lung,3m,3_month,/home/olga/data_lg/tabula-muris-senis/10x/3_mo...
10X_P8_14,Trachea,3m,3_month,/home/olga/data_lg/tabula-muris-senis/10x/3_mo...


In [39]:
channel_to_tissue.to_csv('/home/olga/googledrive/ImmuneEvolution/data/tabula_muris_senis_channel_to_tissue.csv')

In [40]:
per_tissue_folder = os.path.join(base_folder, 'per-tissue-softlinks')
! mkdir $per_tissue_folder

mkdir: cannot create directory '/home/olga/data_lg/tabula-muris-senis/10x/per-tissue-softlinks': File exists


In [41]:
channel_to_tissue['per_tissue_location'] = channel_to_tissue.apply(
    lambda x: os.path.join(per_tissue_folder, x['tissue'], x['age'], x.name + '.tgz'),
    axis=1
)
channel_to_tissue

Unnamed: 0_level_0,tissue,age,age_long,tgz_path,per_tissue_location
channel,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
MACA_24m_M_TONGUE_60,Tongue,24m,24_month,/home/olga/data_lg/tabula-muris-senis/10x/24_m...,/home/olga/data_lg/tabula-muris-senis/10x/per-...
MACA_18m_F_AORTA_50,Heart_and_Aorta,18m,18_month,/home/olga/data_lg/tabula-muris-senis/10x/18_m...,/home/olga/data_lg/tabula-muris-senis/10x/per-...
MACA_18m_F_BM_50,Marrow,18m,18_month,/home/olga/data_lg/tabula-muris-senis/10x/18_m...,/home/olga/data_lg/tabula-muris-senis/10x/per-...
MACA_18m_F_BM_51,Marrow,18m,18_month,/home/olga/data_lg/tabula-muris-senis/10x/18_m...,/home/olga/data_lg/tabula-muris-senis/10x/per-...
MACA_18m_F_BREAST_50,Mammary_Gland,18m,18_month,/home/olga/data_lg/tabula-muris-senis/10x/18_m...,/home/olga/data_lg/tabula-muris-senis/10x/per-...
...,...,...,...,...,...
10X_P7_9,Lung,3m,3_month,/home/olga/data_lg/tabula-muris-senis/10x/3_mo...,/home/olga/data_lg/tabula-muris-senis/10x/per-...
10X_P8_12,Lung,3m,3_month,/home/olga/data_lg/tabula-muris-senis/10x/3_mo...,/home/olga/data_lg/tabula-muris-senis/10x/per-...
10X_P8_13,Lung,3m,3_month,/home/olga/data_lg/tabula-muris-senis/10x/3_mo...,/home/olga/data_lg/tabula-muris-senis/10x/per-...
10X_P8_14,Trachea,3m,3_month,/home/olga/data_lg/tabula-muris-senis/10x/3_mo...,/home/olga/data_lg/tabula-muris-senis/10x/per-...


In [42]:
for i, row in channel_to_tissue.iterrows():
    oldfile = row['tgz_path']
    newplace = row['per_tissue_location']
    new_folder = os.path.dirname(newplace)
    if not os.path.exists(new_folder):
        ! mkdir -p $new_folder
    ! ln -s $oldfile $newplace

ln: failed to create symbolic link '/home/olga/data_lg/tabula-muris-senis/10x/per-tissue-softlinks/Tongue/24m/MACA_24m_M_TONGUE_60.tgz': File exists
ln: failed to create symbolic link '/home/olga/data_lg/tabula-muris-senis/10x/per-tissue-softlinks/Heart_and_Aorta/18m/MACA_18m_F_AORTA_50.tgz': File exists
ln: failed to create symbolic link '/home/olga/data_lg/tabula-muris-senis/10x/per-tissue-softlinks/Marrow/18m/MACA_18m_F_BM_50.tgz': File exists
ln: failed to create symbolic link '/home/olga/data_lg/tabula-muris-senis/10x/per-tissue-softlinks/Marrow/18m/MACA_18m_F_BM_51.tgz': File exists
ln: failed to create symbolic link '/home/olga/data_lg/tabula-muris-senis/10x/per-tissue-softlinks/Mammary_Gland/18m/MACA_18m_F_BREAST_50.tgz': File exists
ln: failed to create symbolic link '/home/olga/data_lg/tabula-muris-senis/10x/per-tissue-softlinks/Mammary_Gland/18m/MACA_18m_F_BREAST_51.tgz': File exists
ln: failed to create symbolic link '/home/olga/data_lg/tabula-muris-senis/10x/per-tissue-sof

# Make minimal obs

## START HERE -- Read adata with no duplicates

In [2]:
h5ad = "/home/olga/data_lg/data_sm_copy/czb-tabula-muris-senis/Data-objects/tabula-muris-senis-droplet-official-raw-obj--no-duplicate-barcodes-per-seq-run.h5ad"
mouse_no_duplicates = scanpy.read(h5ad)
mouse_no_duplicates

AnnData object with n_obs × n_vars = 238915 × 20138 
    obs: 'age', 'cell', 'cell_ontology_class', 'cell_ontology_id', 'free_annotation', 'method', 'mouse.id', 'n_genes', 'sex', 'subtissue', 'tissue', 'tissue_free_annotation', 'species', 'species_latin', 'channel', 'channel_tissue', 'sequencing_run', 'cell_barcode', 'n_barcodes'
    var: 'n_cells'

## Add "individual" prefixed with "mouse"

In [3]:
id_to_individual = {mouse_id: f'mouse_{i+1}_{mouse_id}' for i, ((age, mouse_id), df) in enumerate(mouse_no_duplicates.obs.groupby(['age', 'mouse.id']))}
id_to_individual

{'1-M-62': 'mouse_1_1-M-62',
 '1-M-63': 'mouse_2_1-M-63',
 '3-F-56': 'mouse_3_3-F-56',
 '3-F-57': 'mouse_4_3-F-57',
 '3-M-5/6': 'mouse_5_3-M-5/6',
 '3-M-7/8': 'mouse_6_3-M-7/8',
 '3-M-8': 'mouse_7_3-M-8',
 '3-M-8/9': 'mouse_8_3-M-8/9',
 '3-M-9': 'mouse_9_3-M-9',
 '18-F-50': 'mouse_10_18-F-50',
 '18-F-51': 'mouse_11_18-F-51',
 '18-M-52': 'mouse_12_18-M-52',
 '18-M-53': 'mouse_13_18-M-53',
 '21-F-54': 'mouse_14_21-F-54',
 '21-F-55': 'mouse_15_21-F-55',
 '24-M-58': 'mouse_16_24-M-58',
 '24-M-59': 'mouse_17_24-M-59',
 '24-M-60': 'mouse_18_24-M-60',
 '24-M-61': 'mouse_19_24-M-61',
 '30-M-2': 'mouse_20_30-M-2',
 '30-M-3': 'mouse_21_30-M-3',
 '30-M-4': 'mouse_22_30-M-4',
 '30-M-5': 'mouse_23_30-M-5'}

In [4]:
list(id_to_individual.values())

['mouse_1_1-M-62',
 'mouse_2_1-M-63',
 'mouse_3_3-F-56',
 'mouse_4_3-F-57',
 'mouse_5_3-M-5/6',
 'mouse_6_3-M-7/8',
 'mouse_7_3-M-8',
 'mouse_8_3-M-8/9',
 'mouse_9_3-M-9',
 'mouse_10_18-F-50',
 'mouse_11_18-F-51',
 'mouse_12_18-M-52',
 'mouse_13_18-M-53',
 'mouse_14_21-F-54',
 'mouse_15_21-F-55',
 'mouse_16_24-M-58',
 'mouse_17_24-M-59',
 'mouse_18_24-M-60',
 'mouse_19_24-M-61',
 'mouse_20_30-M-2',
 'mouse_21_30-M-3',
 'mouse_22_30-M-4',
 'mouse_23_30-M-5']

In [5]:
mouse_no_duplicates.obs['individual'] = mouse_no_duplicates.obs['mouse.id'].map(id_to_individual)
mouse_no_duplicates.obs['individual'].value_counts()

mouse_20_30-M-2     24369
mouse_15_21-F-55    18359
mouse_12_18-M-52    17441
mouse_14_21-F-54    16285
mouse_2_1-M-63      15653
mouse_3_3-F-56      14787
mouse_23_30-M-5     12484
mouse_16_24-M-58    11883
mouse_17_24-M-59    11838
mouse_10_18-F-50    11808
mouse_11_18-F-51     9571
mouse_1_1-M-62       9500
mouse_18_24-M-60     9497
mouse_21_30-M-3      9286
mouse_4_3-F-57       8233
mouse_22_30-M-4      7768
mouse_7_3-M-8        6189
mouse_6_3-M-7/8      5235
mouse_9_3-M-9        4897
mouse_5_3-M-5/6      4880
mouse_13_18-M-53     4837
mouse_19_24-M-61     3665
mouse_8_3-M-8/9       450
Name: individual, dtype: int64

## Subset to only minimal obs

In [6]:
obs_minimal = mouse_no_duplicates.obs.rename(
    columns={
        "age": "age",
        "cell": "cell_id",
        "cell_barcode": "cell_barcode",
        "cell_ontology_class": "cell_ontology_class",
        "cell_ontology_id": "cell_ontology_id",
        "channel": "channel",
        "channel_tissue": None,
        "free_annotation": "free_annotation",
        "method": None,
        "mouse.id": None,
        "individual": "individual",
        "n_barcodes": "n_counts",
        "n_genes": "n_genes",
        "sequencing_run": "sequencing_run",
        "sex": "sex",
        "species": "species",
        "species_latin": "species_latin",
        "subtissue": "subtissue",
        "tissue": "tissue",
        "tissue_free_annotation": None,
    }
)
obs_minimal = obs_minimal.loc[:, obs_minimal.columns.notnull()]
obs_minimal.head()

Unnamed: 0_level_0,age,cell_id,cell_ontology_class,cell_ontology_id,free_annotation,n_genes,sex,subtissue,tissue,species,species_latin,channel,sequencing_run,cell_barcode,n_counts,individual
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
AAACCTGCAGTAAGCG-1-0-0-0,24m,MACA_24m_M_TONGUE_60_AAACCTGCAGTAAGCG,keratinocyte,,suprabasal,3481.0,male,,Tongue,Mouse,Mus musculus,MACA_24m_M_TONGUE_60,171103_A00111_0082_BH523JDMXX,AAACCTGCAGTAAGCG,1,mouse_18_24-M-60
AAACCTGTCATTATCC-1-0-0-0,24m,MACA_24m_M_TONGUE_60_AAACCTGTCATTATCC,keratinocyte,,suprabasal,2599.0,male,,Tongue,Mouse,Mus musculus,MACA_24m_M_TONGUE_60,171103_A00111_0082_BH523JDMXX,AAACCTGTCATTATCC,1,mouse_18_24-M-60
AAACGGGGTACAGTGG-1-0-0-0,24m,MACA_24m_M_TONGUE_60_AAACGGGGTACAGTGG,keratinocyte,,suprabasal differentiating,3468.0,male,,Tongue,Mouse,Mus musculus,MACA_24m_M_TONGUE_60,171103_A00111_0082_BH523JDMXX,AAACGGGGTACAGTGG,1,mouse_18_24-M-60
AAACGGGGTCTTCTCG-1-0-0-0,24m,MACA_24m_M_TONGUE_60_AAACGGGGTCTTCTCG,keratinocyte,,suprabasal differentiating,3189.0,male,,Tongue,Mouse,Mus musculus,MACA_24m_M_TONGUE_60,171103_A00111_0082_BH523JDMXX,AAACGGGGTCTTCTCG,1,mouse_18_24-M-60
AAAGATGAGCTATGCT-1-0-0-0,24m,MACA_24m_M_TONGUE_60_AAAGATGAGCTATGCT,keratinocyte,,suprabasal,3419.0,male,,Tongue,Mouse,Mus musculus,MACA_24m_M_TONGUE_60,171103_A00111_0082_BH523JDMXX,AAAGATGAGCTATGCT,1,mouse_18_24-M-60


## Add unified cell type groups

In [7]:
import unified_annotations

muscle_grouping = unified_annotations.get_celltype_converter('Muscle', ("Mouse", "Tabula Muris Senis", "cell_ontology_class"))
print(muscle_grouping.shape)
muscle_grouping

(9, 4)


Unnamed: 0_level_0,narrow_group,broad_group,compartment_group,tissue
"(Mouse, Tabula Muris Senis, cell_ontology_class)",Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
B cell,B cell,B cell,lymphoid,Muscle
T cell,T cell,T cell,lymphoid,Muscle
macrophage,macrophage,macrophage,myeloid,Muscle
endothelial cell,endothelial cell,endothelial cell,endothelial,Muscle
skeletal muscle satellite cell,skeletal muscle satellite stem cell,skeletal muscle satellite stem cell,stromal,Muscle
skeletal muscle cell,skeletal muscle cell,skeletal muscle cell,stromal,Muscle
smooth muscle cell,pericyte cell_smooth muscle cell,pericyte cell_smooth muscle cell,stromal,Muscle
mesenchymal stem cell,mesenchymal cell,mesenchymal cell,stromal,Muscle
Schwann cell,Schwann cell,Schwann cell,neural,Muscle



### Join minimal obs with muscle

In [8]:
obs_muscle_joined = obs_minimal.merge(
    muscle_grouping.drop('tissue', axis=1), how="left", left_on="cell_ontology_class", right_index=True
)
print(obs_muscle_joined.shape)
obs_muscle_joined.head()

(238915, 19)


Unnamed: 0_level_0,age,cell_id,cell_ontology_class,cell_ontology_id,free_annotation,n_genes,sex,subtissue,tissue,species,species_latin,channel,sequencing_run,cell_barcode,n_counts,individual,narrow_group,broad_group,compartment_group
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
AAACCTGCAGTAAGCG-1-0-0-0,24m,MACA_24m_M_TONGUE_60_AAACCTGCAGTAAGCG,keratinocyte,,suprabasal,3481.0,male,,Tongue,Mouse,Mus musculus,MACA_24m_M_TONGUE_60,171103_A00111_0082_BH523JDMXX,AAACCTGCAGTAAGCG,1,mouse_18_24-M-60,,,
AAACCTGTCATTATCC-1-0-0-0,24m,MACA_24m_M_TONGUE_60_AAACCTGTCATTATCC,keratinocyte,,suprabasal,2599.0,male,,Tongue,Mouse,Mus musculus,MACA_24m_M_TONGUE_60,171103_A00111_0082_BH523JDMXX,AAACCTGTCATTATCC,1,mouse_18_24-M-60,,,
AAACGGGGTACAGTGG-1-0-0-0,24m,MACA_24m_M_TONGUE_60_AAACGGGGTACAGTGG,keratinocyte,,suprabasal differentiating,3468.0,male,,Tongue,Mouse,Mus musculus,MACA_24m_M_TONGUE_60,171103_A00111_0082_BH523JDMXX,AAACGGGGTACAGTGG,1,mouse_18_24-M-60,,,
AAACGGGGTCTTCTCG-1-0-0-0,24m,MACA_24m_M_TONGUE_60_AAACGGGGTCTTCTCG,keratinocyte,,suprabasal differentiating,3189.0,male,,Tongue,Mouse,Mus musculus,MACA_24m_M_TONGUE_60,171103_A00111_0082_BH523JDMXX,AAACGGGGTCTTCTCG,1,mouse_18_24-M-60,,,
AAAGATGAGCTATGCT-1-0-0-0,24m,MACA_24m_M_TONGUE_60_AAAGATGAGCTATGCT,keratinocyte,,suprabasal,3419.0,male,,Tongue,Mouse,Mus musculus,MACA_24m_M_TONGUE_60,171103_A00111_0082_BH523JDMXX,AAAGATGAGCTATGCT,1,mouse_18_24-M-60,,,


## Get lung to cell grouping

In [9]:
lung_grouping = unified_annotations.get_celltype_converter('Lung', ("Mouse", "Tabula Muris Senis", "free_annotation"))
lung_grouping = lung_grouping.drop('tissue', axis=1)
lung_grouping.loc['B', :] = lung_grouping.loc['B cell']
print(lung_grouping.shape)
lung_grouping.tail(20)

(40, 3)


Unnamed: 0_level_0,narrow_group,broad_group,compartment_group
"(Mouse, Tabula Muris Senis, free_annotation)",Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Ly6g5b+ T,LY6G5B+ T cell,T cell,lymphoid
Proliferating NK,proliferating natural killer cell_T cell,proliferating natural killer cell_T cell,lymphoid
Proliferating T,proliferating natural killer cell_T cell,proliferating natural killer cell_T cell,lymphoid
Natural Killer,natural killer cell,natural killer cell,lymphoid
Natural Killer T,natural killer T cell,natural killer T cell,lymphoid
Myeloid Dendritic Type 1,conventional dendritic cell,dendritic cell,myeloid
Myeloid Dendritic Type 2,conventional dendritic cell,dendritic cell,myeloid
Plasmacytoid Dendritic,plasmacytoid dendritic cell,dendritic cell,myeloid
Ccr7+ Dendritic,CCR7+ dendritic cell,dendritic cell,myeloid
Proliferating Dendritic,proliferating dendritic cell,dendritic cell,myeloid


## Concatenate grouping

In [10]:
obs_muscle_joined_lung = obs_muscle_joined.query('tissue == "Lung"')
obs_muscle_joined_lung.free_annotation.cat.remove_unused_categories(inplace=True)
obs_muscle_joined_lung.free_annotation.value_counts()

Classical Monocyte                   5269
Proliferating Classical Monocyte     2393
Capillary                            1743
Intermediate Monocyte                1686
Alveolar Fibroblast                  1499
Alveolar Macrophage                  1217
Natural Killer                       1141
Interstitial Macrophage              1115
B                                    1052
Nonclassical Monocyte                 971
CD8+ T                                854
Neutrophil                            543
CD4+ T                                534
Adventitial Fibroblast                517
Capillary Aerocyte                    512
Natural Killer T                      406
Zbtb32+ B                             405
Vein                                  306
Myofibroblast                         217
Myeloid Dendritic Type 1              161
Ly6g5b+ T                             154
Basophil                              128
Alveolar Epithelial Type 2            122
Regulatory T                      

In [11]:
sorted(obs_muscle_joined_lung.free_annotation.unique())

['Adventitial Fibroblast',
 'Airway Smooth Muscle',
 'Alveolar Epithelial Type 2',
 'Alveolar Fibroblast',
 'Alveolar Macrophage',
 'Artery',
 'B',
 'Basophil',
 'CD4+ T',
 'CD8+ T',
 'Capillary',
 'Capillary Aerocyte',
 'Ccr7+ Dendritic',
 'Ciliated',
 'Classical Monocyte',
 'Club',
 'Intermediate Monocyte',
 'Interstitial Macrophage',
 'Ly6g5b+ T',
 'Lympatic',
 'Myeloid Dendritic Type 1',
 'Myeloid Dendritic Type 2',
 'Myofibroblast',
 'Natural Killer',
 'Natural Killer T',
 'Neuroendocrine',
 'Neutrophil',
 'Nonclassical Monocyte',
 'Pericyte',
 'Plasma',
 'Plasmacytoid Dendritic',
 'Proliferating Alveolar Macrophage',
 'Proliferating Classical Monocyte',
 'Proliferating Dendritic',
 'Proliferating NK',
 'Proliferating T',
 'Regulatory T',
 'Vein',
 'Zbtb32+ B']

In [12]:
lung_grouping

Unnamed: 0_level_0,narrow_group,broad_group,compartment_group
"(Mouse, Tabula Muris Senis, free_annotation)",Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Adventitial Fibroblast,adventitial fibroblast,fibroblast,stromal
Alveolar Fibroblast,adventitial fibroblast,fibroblast,stromal
Airway Smooth Muscle,airway associated smooth muscle cell,airway associated smooth muscle cell,stromal
Myofibroblast,myofibroblast cell,myofibroblast cell,stromal
Pericyte,pericyte cell,pericyte cell,stromal
Capillary,capillary cell,capillary cell,endothelial
Capillary Aerocyte,capillary aerocyte cell,capillary cell,endothelial
Vein,vein cell,vein cell,endothelial
Artery,artery cell,artery cell,endothelial
Lympatic,lymphatic cell,lymphatic cell,endothelial


### Set narrow group, broad group as strings

In [13]:
obs_muscle_joined[lung_grouping.columns] = obs_muscle_joined[lung_grouping.columns].astype(str)

In [14]:
for free_annotation, df in obs_muscle_joined_lung.groupby("free_annotation"):
    for group_name, group_value in lung_grouping.loc[free_annotation].items():
        obs_muscle_joined.loc[df.index, group_name] = obs_muscle_joined.loc[
            df.index, group_name
        ].replace({"nan": group_value}).astype(str)

### Make sure narrow group and broad group actually got replaced

In [15]:
obs_muscle_joined.tissue.value_counts()

Marrow             39125
Spleen             34853
Limb_Muscle        28176
Lung               23802
Kidney             21038
Tongue             20271
Mammary_Gland      11954
Thymus              9082
Bladder             8752
Heart_and_Aorta     8253
Trachea             7901
Liver               7052
Fat                 6534
Pancreas            5969
Skin                4308
Large_Intestine     1845
Name: tissue, dtype: int64

In [16]:
obs_muscle_joined.query('tissue == "Limb_Muscle"').narrow_group.value_counts()

mesenchymal cell                       12712
endothelial cell                        6738
macrophage                              2415
skeletal muscle satellite stem cell     2018
B cell                                  1472
T cell                                  1234
pericyte cell_smooth muscle cell        1129
Schwann cell                             270
skeletal muscle cell                     188
Name: narrow_group, dtype: int64

In [17]:
obs_muscle_joined.query('tissue == "Lung"').broad_group.value_counts()

monocyte                                    10319
macrophage                                   2434
capillary cell                               2255
fibroblast                                   2016
T cell                                       1752
B cell                                       1457
natural killer cell                          1141
neutrophil                                    543
natural killer T cell                         406
dendritic cell                                354
vein cell                                     306
myofibroblast cell                            217
basophil                                      128
alveolar epithelial cell type 2               122
artery cell                                    95
pericyte cell                                  59
ciliated cell                                  55
plasma cell                                    47
lymphatic cell                                 40
proliferating natural killer cell_T cell       28


In [18]:
obs_muscle_joined.query('tissue == "Lung"').compartment_group.value_counts()

myeloid        13778
lymphoid        4831
endothelial     2696
stromal         2305
epithelial       188
neural             4
Name: compartment_group, dtype: int64

## Add compartment group for Bladder

In [19]:
bladder_compartment = {'bladder urothelial cell': 'epithelial',
 'bladder cell': 'stromal',
 'endothelial cell': 'endothelial',
 'leukocyte': 'immune'}

obs_muscle_joined.query("tissue == 'Bladder'").cell_ontology_class.unique()

array(['bladder urothelial cell', 'bladder cell', 'endothelial cell',
       'leukocyte'], dtype=object)

In [20]:
dict.fromkeys(['bladder urothelial cell', 'bladder cell', 'endothelial cell', 'leukocyte'])

{'bladder urothelial cell': None,
 'bladder cell': None,
 'endothelial cell': None,
 'leukocyte': None}

## Add new obs

In [21]:
mouse_no_duplicates_new_obs = mouse_no_duplicates.copy()
mouse_no_duplicates_new_obs.obs = obs_muscle_joined

# Write mouse adata with minimal adata and new grouping

In [22]:
h5ad = os.path.join(
    "/home/olga/data_lg/data_sm_copy/czb-tabula-muris-senis/Data-objects/",
    "tabula-muris-senis-droplet-official-raw-obj--no-duplicate-barcodes-per-seq-run--minimal-obs-unified-celltypes.h5ad",
) 
mouse_no_duplicates_new_obs.write(h5ad)

... storing 'cell_ontology_class' as categorical
... storing 'narrow_group' as categorical
... storing 'broad_group' as categorical
... storing 'compartment_group' as categorical


In [23]:
mouse_no_duplicates_new_obs.obs.individual.value_counts()

mouse_20_30-M-2     24369
mouse_15_21-F-55    18359
mouse_12_18-M-52    17441
mouse_14_21-F-54    16285
mouse_2_1-M-63      15653
mouse_3_3-F-56      14787
mouse_23_30-M-5     12484
mouse_16_24-M-58    11883
mouse_17_24-M-59    11838
mouse_10_18-F-50    11808
mouse_11_18-F-51     9571
mouse_1_1-M-62       9500
mouse_18_24-M-60     9497
mouse_21_30-M-3      9286
mouse_4_3-F-57       8233
mouse_22_30-M-4      7768
mouse_7_3-M-8        6189
mouse_6_3-M-7/8      5235
mouse_9_3-M-9        4897
mouse_5_3-M-5/6      4880
mouse_13_18-M-53     4837
mouse_19_24-M-61     3665
mouse_8_3-M-8/9       450
Name: individual, dtype: int64