## Parse gene sets
Parse to gmt or convert to mouse EIDs.

In [1]:
import pandas as pd

In [2]:
path_gs='/storage/groups/ml01/workspace/karin.hrovatin//data/pancreas/gene_lists/'

In [3]:
orthologues=pd.read_table(path_gs+'orthologues_ORGmus_musculus_ORG2homo_sapiens_V103.tsv')
orthologues.rename({'Gene stable ID':'EID_m','Human gene name':'gene_symbol_h'},
                   axis=1,inplace=True)

## Panglao file to gmt

In [3]:
panglao=pd.read_table(path_gs+'PanglaoDB_markers_27_Mar_2020.tsv')

In [51]:
# Save all genes regardless of species to gmt: gs name, description, genes
file=open('PanglaoDB_markers_27_Mar_2020.gmt','w')
for ct, data in panglao.groupby('cell type'):
    file.write('\t'.join(['panglao_'+ct.replace(' ','_'),'/']+\
                         list(data['official gene symbol'].unique()))+'\n')
file.close()

Mouse Ensembl ID GMT: Map human gene names to mouse Ensembl ID. Include also genes that do not have unique mapping.

In [50]:
# Save all genes regardless of species to gmt: gs name, description, genes
file=open('PanglaoDB_markers_27_Mar_2020_mouseEID.gmt','w')
for ct, data in panglao.groupby('cell type'):
    genes_human=list(data['official gene symbol'].unique())
    eid_mouse=orthologues.query('gene_symbol_h in @genes_human').EID_m.unique()
    print("%-40s%-12s%-6i%-12s%-6i"%(
        ct,'N human:',len(genes_human),'N mouse:',len(eid_mouse)))
    file.write('\t'.join(['panglao_'+ct.replace(' ','_'),'/']+list(eid_mouse))+'\n')
file.close()

Acinar cells                            N human:    52    N mouse:    73    
Adipocyte progenitor cells              N human:    23    N mouse:    22    
Adipocytes                              N human:    120   N mouse:    118   
Adrenergic neurons                      N human:    8     N mouse:    8     
Airway epithelial cells                 N human:    4     N mouse:    4     
Airway goblet cells                     N human:    30    N mouse:    27    
Airway smooth muscle cells              N human:    4     N mouse:    4     
Alpha cells                             N human:    48    N mouse:    48    
Alveolar macrophages                    N human:    33    N mouse:    23    
Anterior pituitary gland cells          N human:    36    N mouse:    56    
Astrocytes                              N human:    63    N mouse:    61    
B cells                                 N human:    110   N mouse:    94    
B cells memory                          N human:    66    N mouse:    72    

Proximal tubule cells                   N human:    83    N mouse:    75    
Pulmonary alveolar type I cells         N human:    36    N mouse:    33    
Pulmonary alveolar type II cells        N human:    47    N mouse:    36    
Pulmonary vascular smooth muscle cells  N human:    2     N mouse:    2     
Purkinje fiber cells                    N human:    5     N mouse:    4     
Purkinje neurons                        N human:    60    N mouse:    58    
Pyramidal cells                         N human:    35    N mouse:    34    
Radial glia cells                       N human:    14    N mouse:    14    
Red pulp macrophages                    N human:    12    N mouse:    12    
Reticulocytes                           N human:    8     N mouse:    8     
Retinal ganglion cells                  N human:    70    N mouse:    64    
Retinal progenitor cells                N human:    13    N mouse:    13    
Salivary mucous cells                   N human:    14    N mouse:    6     

## Reactome to mouse EID

In [6]:
# Save all genes regardless of species to gmt: gs name, description, genes
gene_sets=open('c2.cp.reactome.v4.0.symbols.gmt','r').readlines()
file=open('c2.cp.reactome.v4.0_mouseEID.gmt','w')
for gs_info in gene_sets:
    gs_info=gs_info.split()
    ct=gs_info[0]
    notes=gs_info[1]
    genes_human=gs_info[2:]
    eid_mouse=orthologues.query('gene_symbol_h in @genes_human').EID_m.unique()
    print("%-40s%-12s%-6i%-12s%-6i"%(
        ct,'N human:',len(genes_human),'N mouse:',len(eid_mouse)))
    file.write('\t'.join([ct,notes]+list(eid_mouse))+'\n')
file.close()

REACTOME_GLYCOGEN_BREAKDOWN_GLYCOGENOLYSISN human:    18    N mouse:    15    
REACTOME_TRANSLATION                    N human:    222   N mouse:    123   
REACTOME_PYRIMIDINE_CATABOLISM          N human:    12    N mouse:    11    
REACTOME_RNA_POL_III_TRANSCRIPTION_INITIATION_FROM_TYPE_2_PROMOTERN human:    23    N mouse:    22    
REACTOME_INHIBITION_OF_THE_PROTEOLYTIC_ACTIVITY_OF_APC_C_REQUIRED_FOR_THE_ONSET_OF_ANAPHASE_BY_MITOTIC_SPINDLE_CHECKPOINT_COMPONENTSN human:    24    N mouse:    17    
REACTOME_PYRUVATE_METABOLISM_AND_CITRIC_ACID_TCA_CYCLEN human:    48    N mouse:    42    
REACTOME_EXTRINSIC_PATHWAY_FOR_APOPTOSISN human:    13    N mouse:    14    
REACTOME_PTM_GAMMA_CARBOXYLATION_HYPUSINE_FORMATION_AND_ARYLSULFATASE_ACTIVATIONN human:    27    N mouse:    21    
REACTOME_APOPTOTIC_CLEAVAGE_OF_CELLULAR_PROTEINSN human:    40    N mouse:    38    
REACTOME_RNA_POL_I_TRANSCRIPTION_TERMINATIONN human:    22    N mouse:    18    
REACTOME_FORMATION_OF_THE_TERNARY_COMPLEX_AN

REACTOME_CHONDROITIN_SULFATE_BIOSYNTHESISN human:    21    N mouse:    19    
REACTOME_HYALURONAN_UPTAKE_AND_DEGRADATIONN human:    10    N mouse:    10    
REACTOME_SIGNALING_BY_FGFR1_MUTANTS     N human:    30    N mouse:    28    
REACTOME_SYNTHESIS_OF_PIPS_AT_THE_PLASMA_MEMBRANEN human:    31    N mouse:    31    
REACTOME_THE_ACTIVATION_OF_ARYLSULFATASESN human:    12    N mouse:    7     
REACTOME_TRANSCRIPTIONAL_ACTIVITY_OF_SMAD2_SMAD3_SMAD4_HETEROTRIMERN human:    38    N mouse:    36    
REACTOME_HYALURONAN_METABOLISM          N human:    14    N mouse:    14    
REACTOME_DOWNREGULATION_OF_SMAD2_3_SMAD4_TRANSCRIPTIONAL_ACTIVITYN human:    20    N mouse:    18    
REACTOME_KERATAN_SULFATE_BIOSYNTHESIS   N human:    26    N mouse:    23    
REACTOME_REVERSIBLE_HYDRATION_OF_CARBON_DIOXIDEN human:    12    N mouse:    12    
REACTOME_SIGNALING_BY_FGFR1_FUSION_MUTANTSN human:    19    N mouse:    17    
REACTOME_ALPHA_LINOLENIC_ACID_ALA_METABOLISMN human:    12    N mouse:    12   

REACTOME_NUCLEAR_RECEPTOR_TRANSCRIPTION_PATHWAYN human:    49    N mouse:    47    
REACTOME_INSULIN_SYNTHESIS_AND_PROCESSINGN human:    21    N mouse:    20    
REACTOME_CYCLIN_E_ASSOCIATED_EVENTS_DURING_G1_S_TRANSITION_N human:    65    N mouse:    64    
REACTOME_HOMOLOGOUS_RECOMBINATION_REPAIR_OF_REPLICATION_INDEPENDENT_DOUBLE_STRAND_BREAKSN human:    17    N mouse:    14    
REACTOME_G1_PHASE                       N human:    38    N mouse:    33    
REACTOME_ELEVATION_OF_CYTOSOLIC_CA2_LEVELSN human:    10    N mouse:    8     
REACTOME_P53_DEPENDENT_G1_DNA_DAMAGE_RESPONSEN human:    57    N mouse:    53    
REACTOME_TRANSCRIPTION_COUPLED_NER_TC_NERN human:    45    N mouse:    42    
REACTOME_RNA_POL_II_TRANSCRIPTION_PRE_INITIATION_AND_PROMOTER_OPENINGN human:    41    N mouse:    37    
REACTOME_GLUCAGON_SIGNALING_IN_METABOLIC_REGULATIONN human:    34    N mouse:    31    
REACTOME_MRNA_PROCESSING                N human:    161   N mouse:    148   
REACTOME_SIGNALING_BY_PDGF    

REACTOME_MITOTIC_G1_G1_S_PHASES         N human:    137   N mouse:    129   
REACTOME_PI_3K_CASCADE                  N human:    56    N mouse:    54    
REACTOME_DOWNSTREAM_SIGNALING_OF_ACTIVATED_FGFRN human:    100   N mouse:    95    
REACTOME_REGULATION_OF_MITOTIC_CELL_CYCLEN human:    85    N mouse:    77    
REACTOME_TAK1_ACTIVATES_NFKB_BY_PHOSPHORYLATION_AND_ACTIVATION_OF_IKKS_COMPLEXN human:    23    N mouse:    20    
REACTOME_REGULATION_OF_AMPK_ACTIVITY_VIA_LKB1N human:    15    N mouse:    14    
REACTOME_MITOTIC_M_M_G1_PHASES          N human:    172   N mouse:    160   
REACTOME_MYOGENESIS                     N human:    28    N mouse:    26    
REACTOME_MAP_KINASE_ACTIVATION_IN_TLR_CASCADEN human:    50    N mouse:    49    
REACTOME_PHOSPHOLIPASE_C_MEDIATED_CASCADEN human:    54    N mouse:    51    
REACTOME_ACTIVATION_OF_KAINATE_RECEPTORS_UPON_GLUTAMATE_BINDINGN human:    31    N mouse:    30    
REACTOME_IONOTROPIC_ACTIVITY_OF_KAINATE_RECEPTORSN human:    11    N mous

REACTOME_ADVANCED_GLYCOSYLATION_ENDPRODUCT_RECEPTOR_SIGNALINGN human:    13    N mouse:    10    
REACTOME_GABA_RECEPTOR_ACTIVATION       N human:    52    N mouse:    51    
REACTOME_KINESINS                       N human:    24    N mouse:    24    
REACTOME_N_GLYCAN_ANTENNAE_ELONGATION_IN_THE_MEDIAL_TRANS_GOLGIN human:    18    N mouse:    18    
REACTOME_REGULATION_OF_IFNA_SIGNALING   N human:    24    N mouse:    26    
REACTOME_INTERFERON_SIGNALING           N human:    159   N mouse:    155   
REACTOME_NEGATIVE_REGULATORS_OF_RIG_I_MDA5_SIGNALINGN human:    31    N mouse:    28    
REACTOME_NFKB_AND_MAP_KINASES_ACTIVATION_MEDIATED_BY_TLR4_SIGNALING_REPERTOIREN human:    72    N mouse:    66    
REACTOME_TRANSFERRIN_ENDOCYTOSIS_AND_RECYCLINGN human:    25    N mouse:    25    
REACTOME_DSCAM_INTERACTIONS             N human:    11    N mouse:    11    
REACTOME_ION_CHANNEL_TRANSPORT          N human:    55    N mouse:    53    
REACTOME_DESTABILIZATION_OF_MRNA_BY_AUF1_HNRNP_D0N hu

REACTOME_G2_M_DNA_DAMAGE_CHECKPOINT     N human:    12    N mouse:    9     
REACTOME_S_PHASE                        N human:    109   N mouse:    108   
REACTOME_SCFSKP2_MEDIATED_DEGRADATION_OF_P27_P21N human:    56    N mouse:    54    
REACTOME_TRYPTOPHAN_CATABOLISM          N human:    11    N mouse:    9     
REACTOME_DNA_STRAND_ELONGATION          N human:    30    N mouse:    30    
REACTOME_RESOLUTION_OF_AP_SITES_VIA_THE_SINGLE_NUCLEOTIDE_REPLACEMENT_PATHWAYN human:    12    N mouse:    12    
REACTOME_FGFR_LIGAND_BINDING_AND_ACTIVATIONN human:    22    N mouse:    22    
REACTOME_CHOLESTEROL_BIOSYNTHESIS       N human:    24    N mouse:    22    
REACTOME_APOBEC3G_MEDIATED_RESISTANCE_TO_HIV1_INFECTIONN human:    12    N mouse:    3     
REACTOME_GAP_JUNCTION_TRAFFICKING       N human:    27    N mouse:    24    
REACTOME_FGFR2C_LIGAND_BINDING_AND_ACTIVATIONN human:    12    N mouse:    12    
REACTOME_METABOLISM_OF_PORPHYRINS       N human:    14    N mouse:    14    
REACTOME