### Convert loom file to DataFrame
This file is for converting loom files downloaded from http://mousebrain.org/ to DataFrame for easy work in downstream analysis. In here, I only downloaded hippocampus data. This file is converted for automated annotation analysis.  

In [1]:
import loompy
import pandas as pd
import numpy as np
import re

In [78]:
# File loading
# raw file
ds = loompy.connect("l1_hippocampus.loom")

# aggregate file
ds_agg = loompy.connect("l1_hippocampus.agg.loom")

### raw file

In [170]:
# Check the number of CellID
len(ds.ca.CellID)

AttributeError: '<class 'loompy.attribute_manager.AttributeManager'>' object has no attribute 'CellID'

In [34]:
# Check the number of genes
len(ds.ra.Gene)

27998

In [37]:
# Check the counts
ds[0:27998, 0:29519]

array([[6., 5., 6., ..., 0., 0., 0.],
       [1., 1., 1., ..., 0., 0., 0.],
       [2., 2., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [40]:
# Create DataFrame
ds.df = pd.DataFrame(data = ds[0:27998, 0:29519], index = ds.ra.Gene, columns= ds.ca.CellID)

In [43]:
# Check shape
ds.df.shape

(27998, 29519)

In [42]:
# Save DataFrame
ds.df.to_csv("/home/skim/Documents/Heppner_Microglia/Automation/l1_hippocampus.txt", sep= "\t", header=True, index=True)

In [50]:
# create meta data
ds.df.meta = pd.DataFrame({'ID': ds.ca.CellID, 'Cluster': ds.ca.Class})

  """Entry point for launching an IPython kernel.


In [52]:
# Save meta data
ds.df.meta.to_csv("/home/skim/Documents/Heppner_Microglia/Automation/l1_hippocampus_meta.txt", sep= "\t", header=True, index=True)

### aggregate file

In [36]:
Class = ds_agg.ca.Class 

In [37]:
AutoAnnotation = ds_agg.ca.AutoAnnotation

In [162]:
MarkerGenes = ds_agg.ca.MarkerGenes

In [163]:
# Check marker genes
MarkerGenes[1]

'Il23a Tmem163 Cnksr3 Adamts4 Gm15440 Fcnaos'

In [40]:
### Combine two array (Class , AutoAnnotation)
Class_AutoAnnotation = Class + AutoAnnotation

In [173]:
# Check ID
Class_AutoAnnotation[0:5]

array(['Oligos@OL,COP', 'Oligos@OL,OLIG',
       'Excluded@IEG,@OL,@VGLUT1,@XIN,DG-IGC,HC-CA1,HC-CA2,OLIG',
       'Excluded@OL,@VGLUT1,DG-GC,OLIG',
       'Oligos@NBL,@NIPC,@OL,@VGLUT1,DG-GC,HC-CA2,OLIG'], dtype=object)

In [10]:
# Check length of genes
len(ds.ra.Gene)

27998

In [45]:
# Create DataFrame
ds_df = pd.DataFrame(data = ds[0:27998, 0:74], index = ds.ra.Gene, columns= Class_AutoAnnotation)

In [46]:
# Check DataFrame
ds_df.shape

(27998, 74)

In [64]:
# Filtered out ID containing Excluded 
ds_df_filtered = ds_df.filter(regex= 'Oligos|Neurons|Immune|Astrocytes|Ependymal|Vascular')

Index(['Oligos@OL,COP', 'Oligos@OL,OLIG',
       'Excluded@IEG,@OL,@VGLUT1,@XIN,DG-IGC,HC-CA1,HC-CA2,OLIG',
       'Excluded@OL,@VGLUT1,DG-GC,OLIG',
       'Oligos@NBL,@NIPC,@OL,@VGLUT1,DG-GC,HC-CA2,OLIG', 'Oligos@OL,OLIG',
       'Excluded@AEP,@OL,AC-NFB,OLIG', 'Excluded@BMono,@IEG,@OL,MGL,OLIG',
       'Excluded@GABA,@IEG,@OL,CNC,HC-Cck-Cxcl14,HC-Vip-Calb2,OLIG',
       'Excluded@GABA,@IEG,@OL,CB-MLI,CNC,DG-IGC,HC-Cacna2d1-Lhx6,HC-Sst-Npy,OLIG,Rgl2c',
       'Neurons@GABA,DG-IGC,HC-Sst-Npy', 'Neurons@GABA,DG-IGC,HC-Sst-Npy',
       'Neurons@GABA,CB-MLI,HC-Pvalb-Tac1,HC-Sst-Npy',
       'Neurons@AEP,@GABA,@XIN,AC-NFB,CB-MLI,HC-Cacna2d1-Lhx6,HC-Sst-Npy,Rgl2',
       'Neurons@VGLUT1', 'Neurons@GABA,HC-Cck-Cxcl14,HC-Vip-Calb2',
       'Neurons@GABA,HC-Ntng1-Chrm2,HC-Sst-Npy,PERI',
       'Neurons@GABA,HC-Cacna2d1-Ndnf,HC-Ntng1-Chrm2,HC-Sst-Npy',
       'Neurons@GABA,HC-Cacna2d1-Ndnf,HC-Sst-Npy',
       'Neurons@GABA,@POMC,HC-Sst-Npy', 'Neurons@GABA,HC-Sst-Npy',
       'Neurons@GABA,@POMC

In [63]:
ds_df_filtered.shape

(27998, 60)

In [66]:
# Save aggregated DataFrame
ds_df_filtered.to_csv("/home/skim/Documents/Heppner_Microglia/Automation/l1_hippocampus_aggregate_DF.txt", sep= "\t", header=True, index=True)

### marker genes

In [164]:
# Marker genes remove whitespace and add comma
for i in range(0,74):
    MarkerGenes[i] = re.sub("\s+", ",", MarkerGenes[i].strip())
    #MarkerGenes[i] = re.sub(r'\"', "'", MarkerGenes[i].strip())

array(['Bmp4,Enpp6,Lims2,Bcas1,Bfsp2,Pak4',
       'Il23a,Tmem163,Cnksr3,Adamts4,Gm15440,Fcnaos',
       'Nov,Itgb4,Ermn,Arsg,Slain1,Myrf',
       'Ugt8a,Ccnjl,Fa2h,Hapln2,S1pr5,Ttyh2',
       'Gsn,Zpr1,Trim13,Ppp1r14a,Gjb1,Trim59',
       'Mal,Adssl1,Ermn,Ppp1r14a,Mog,Nkx6-2',
       'Ndrg1,Sdc4,Plekhh1,Gjc2,Tmem88b,Myrf',
       'Lag3,Prim2,Laptm5,Tmem119,C1qb,Vav1',
       'Opalin,Brinp3,Gab1,Ninj2,Pdlim2,Nipal4',
       'Opalin,Coro6,Lpar1,Mag,Gamt,Erbb3',
       'Chodl,Nos1,Sfrp1,Adgrg6,Tacr1,Npy',
       'Sst,Ccna1,Gpx3,Crhbp,Lypd6,Rbp4',
       'Pvalb,Tac1,Pthlh,Cox6a2,Tcap,Nek7',
       '2900052N01Rik,1700019G17Rik,Adhfe1,Rorb,Phka1,Slc9a3r1',
       'Lypd1,Id2,Gm8730,C1ql3,Icam5,Ramp1',
       'Vip,Penk,Cbln2,Asic4,Tac2,Adra1b',
       'Chrm2,Cyp26b1,Fst,Nell1,Rgs4,Rgs2',
       'Ntng1,Tmem132c,Stk32b,Gucy1a3,Rgs8,Pdzrn4',
       'Lamp5,Cplx3,Hapln1,Sv2c,Pnoc,Lhx6',
       'Ndnf,Pde3a,Rgs12,Cxcl14,Igf1,Igfbp5',
       'Col19a1,Gm16586,Rgs12,Trp53i11,Cpne5,Npas1',
       'Syt6,

In [165]:
# Create dictionary for CellID and markger genes
dicts = {}
for i in range(0,74):
    dicts[Class_AutoAnnotation[i]] = [MarkerGenes[i]]

{'Oligos@OL,COP': ['Bmp4,Enpp6,Lims2,Bcas1,Bfsp2,Pak4'],
 'Oligos@OL,OLIG': ['Mal,Adssl1,Ermn,Ppp1r14a,Mog,Nkx6-2'],
 'Excluded@IEG,@OL,@VGLUT1,@XIN,DG-IGC,HC-CA1,HC-CA2,OLIG': ['Nov,Itgb4,Ermn,Arsg,Slain1,Myrf'],
 'Excluded@OL,@VGLUT1,DG-GC,OLIG': ['Ugt8a,Ccnjl,Fa2h,Hapln2,S1pr5,Ttyh2'],
 'Oligos@NBL,@NIPC,@OL,@VGLUT1,DG-GC,HC-CA2,OLIG': ['Gsn,Zpr1,Trim13,Ppp1r14a,Gjb1,Trim59'],
 'Excluded@AEP,@OL,AC-NFB,OLIG': ['Ndrg1,Sdc4,Plekhh1,Gjc2,Tmem88b,Myrf'],
 'Excluded@BMono,@IEG,@OL,MGL,OLIG': ['Lag3,Prim2,Laptm5,Tmem119,C1qb,Vav1'],
 'Excluded@GABA,@IEG,@OL,CNC,HC-Cck-Cxcl14,HC-Vip-Calb2,OLIG': ['Opalin,Brinp3,Gab1,Ninj2,Pdlim2,Nipal4'],
 'Excluded@GABA,@IEG,@OL,CB-MLI,CNC,DG-IGC,HC-Cacna2d1-Lhx6,HC-Sst-Npy,OLIG,Rgl2c': ['Opalin,Coro6,Lpar1,Mag,Gamt,Erbb3'],
 'Neurons@GABA,DG-IGC,HC-Sst-Npy': ['Sst,Ccna1,Gpx3,Crhbp,Lypd6,Rbp4'],
 'Neurons@GABA,CB-MLI,HC-Pvalb-Tac1,HC-Sst-Npy': ['Pvalb,Tac1,Pthlh,Cox6a2,Tcap,Nek7'],
 'Neurons@AEP,@GABA,@XIN,AC-NFB,CB-MLI,HC-Cacna2d1-Lhx6,HC-Sst-Npy,Rgl2': 

In [166]:
# Create DataFrame
ds_df_markers = pd.DataFrame.from_dict(dicts)

In [167]:
# Check
ds_df_markers

Unnamed: 0,"Oligos@OL,COP","Oligos@OL,OLIG","Excluded@IEG,@OL,@VGLUT1,@XIN,DG-IGC,HC-CA1,HC-CA2,OLIG","Excluded@OL,@VGLUT1,DG-GC,OLIG","Oligos@NBL,@NIPC,@OL,@VGLUT1,DG-GC,HC-CA2,OLIG","Excluded@AEP,@OL,AC-NFB,OLIG","Excluded@BMono,@IEG,@OL,MGL,OLIG","Excluded@GABA,@IEG,@OL,CNC,HC-Cck-Cxcl14,HC-Vip-Calb2,OLIG","Excluded@GABA,@IEG,@OL,CB-MLI,CNC,DG-IGC,HC-Cacna2d1-Lhx6,HC-Sst-Npy,OLIG,Rgl2c","Neurons@GABA,DG-IGC,HC-Sst-Npy",...,"Neurons@VGLUT1,DG-GC",NeuronsDG-GC,"Neurons@NBL,@VGLUT1,DG-GC,DG-IGC,NblastM","Vascular@VGLUT1,DG-GC,VEC","Neurons@GNRH,@HCRT,@IEG,@NBL,@NIPC,@VGLUT1,@VGLUT2,@VGLUT3,HC-CA2","Excluded@AEP,AC-NFB,VEC",VascularVEC,"Vascular@IEG,VLMC","VascularERY,VEC",BloodERY
0,"Bmp4,Enpp6,Lims2,Bcas1,Bfsp2,Pak4","Mal,Adssl1,Ermn,Ppp1r14a,Mog,Nkx6-2","Nov,Itgb4,Ermn,Arsg,Slain1,Myrf","Ugt8a,Ccnjl,Fa2h,Hapln2,S1pr5,Ttyh2","Gsn,Zpr1,Trim13,Ppp1r14a,Gjb1,Trim59","Ndrg1,Sdc4,Plekhh1,Gjc2,Tmem88b,Myrf","Lag3,Prim2,Laptm5,Tmem119,C1qb,Vav1","Opalin,Brinp3,Gab1,Ninj2,Pdlim2,Nipal4","Opalin,Coro6,Lpar1,Mag,Gamt,Erbb3","Sst,Ccna1,Gpx3,Crhbp,Lypd6,Rbp4",...,"C1ql2,Rprm,Rasl10a,Bdnf,Rab40b,Grp","C1ql2,Fem1b,Cald1,C1ql3,Rab40b,Grp","Tmem114,Gm17750,Bhlhe22,Islr2,Fxyd7,Ppp1r14c","Adgrl4,Slc4a1ap,Ly6c1,Lsr,Ptprb,Adgrf5","Gpr17,Gpr17,Gpr17,Gpr17,Gpr17,Gpr17","Flt1,Gpr4,Ctla2a,Kdr,Cdh5,Ly6a","Higd1b,Kcnj8,Ndufa4l2,Rgs5,Vtn,Ly6a","Ptgds,Slc6a13,Clec3b,Col1a1,Pcolce,Gjb2","Hbb-bt,Fam46c,Hba-a1,Ube2l6,Isg20,Ly6a","Hba-a2,Alas2,Hbb-bs,Ube2l6,Fech,Mkrn1"


In [168]:
# Filtered out ID containing Excluded
ds_df_markers_filtered = ds_df_markers.filter(regex= 'Oligos|Neurons|Immune|Astrocytes|Ependymal|Vascular')

In [169]:
# Save DataFrame
ds_df_markers_filtered.to_csv("/home/skim/Documents/Heppner_Microglia/Automation/l1_hippocampus_aggregate_Markers.txt", sep= " ", header=True, index=True)