Skip to content

Commit

Permalink
Rename GeneMatrixTransposed to GeneSets, add minimal tests
Browse files Browse the repository at this point in the history
  • Loading branch information
krassowski committed Mar 8, 2020
1 parent 1653ac1 commit fb8e265
Show file tree
Hide file tree
Showing 6 changed files with 31 additions and 8 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ Pandas API for Gene Set Enrichment Analysis in Python (GSEApy, cudaGSEA, GSEA)
from pandas import read_table
from gsea_api.expression_set import ExpressionSet
from gsea_api.gsea import GSEADesktop
from gsea_api.molecular_signatures_db import GeneMatrixTransposed
from gsea_api.molecular_signatures_db import GeneSets

reactome_pathways = GeneMatrixTransposed.from_gmt('ReactomePathways.gmt')
reactome_pathways = GeneSets.from_gmt('ReactomePathways.gmt')

gsea = GSEADesktop()

Expand Down
2 changes: 1 addition & 1 deletion gsea_api/gsea/java.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from IPython.core.display import display
from pandas import read_table, DataFrame

from ..molecular_signatures_db import GeneMatrixTransposed
from ..molecular_signatures_db import GeneSets
from .base import GSEA
from ..paths import tmp_dir, third_party_dir

Expand Down
17 changes: 12 additions & 5 deletions gsea_api/molecular_signatures_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def from_gmt_line(cls, line):
return cls(name, ids)


class GeneMatrixTransposed:
class GeneSets:

def __init__(self, gene_sets, name=''):
self.gene_sets = gene_sets
Expand All @@ -31,7 +31,7 @@ def from_gmt(cls, path, name=''):
}, name=name)

def trim(self, min_genes, max_genes: int):
return GeneMatrixTransposed({
return GeneSets({
gene_set
for gene_set in self.gene_sets
if min_genes <= len(gene_set.genes) <= max_genes
Expand All @@ -54,7 +54,7 @@ def to_gmt(self, path):
self._to_gmt(path)

def subset(self, genes: Set[str]):
return GeneMatrixTransposed({
return GeneSets({
GeneSet(name=gene_set.name, genes=gene_set.genes & genes)
for gene_set in self.gene_sets
})
Expand All @@ -68,6 +68,13 @@ def all_identifiers(self):

return all_identifiers

def __len__(self):
return len(self.gene_sets)


# for backwards compatibility
GeneMatrixTransposed = GeneSets


class MolecularSignaturesDatabase:
def __init__(self, path, version='6.2'):
Expand All @@ -90,7 +97,7 @@ def resolve(self, gene_sets, id_type):
else:
raise ValueError(f'Unknown library: {path}!')

def load(self, gene_sets, id_type) -> GeneMatrixTransposed:
def load(self, gene_sets, id_type) -> GeneSets:
path = self.resolve(gene_sets=gene_sets, id_type=id_type)

return GeneMatrixTransposed.from_gmt(path, name=gene_sets)
return GeneSets.from_gmt(path, name=gene_sets)
3 changes: 3 additions & 0 deletions tests/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
## Legal

A subset of Gene Ontology gene sets is used in tests. Gene Ontology is licenced under [Creative Commons Attribution 4.0 Unported License](https://creativecommons.org/licenses/by/4.0/legalcode).
4 changes: 4 additions & 0 deletions tests/gene_ontology_pathways.gmt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
GO:0061038 uterus morphogenesis ASH1L KDM5B STRA6 WNT7A WNT9B NIPBL
GO:0048265 response to pain TAC1 COMT TSPO SLC6A2 TRPA1 P2RX3 THBS4 TACR1 DBH PRKCG GCH1 NMUR2 P2RX4 EDNRB THBS1 CACNA1A CRH CACNA1B CAPN2 UCN RET SCN9A VWA1 LPAR5 GJA4 P2RX2 RELN TRPV1 NTRK1 PIRT
GO:0061366 behavioral response to chemical pain P2RX3 NTRK1
GO:0061368 behavioral response to formalin induced pain P2RX3 NTRK1
9 changes: 9 additions & 0 deletions tests/test_gene_sets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from gsea_api.molecular_signatures_db import GeneSets


def test_from_gmt():
pathways = GeneSets.from_gmt('tests/gene_ontology_pathways.gmt')
assert len(pathways) == len(pathways.gene_sets)
assert len(pathways.gene_sets) == 4

assert len(pathways.trim(min_genes=3, max_genes=100)) == 2

0 comments on commit fb8e265

Please sign in to comment.