Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added protein coding biotype tests #166

Merged
merged 2 commits into from
Sep 19, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyensembl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
)
from .transcript import Transcript

__version__ = '1.0.0'
__version__ = '1.0.1'

def cached_release(release, species="human"):
"""
Expand Down
32 changes: 20 additions & 12 deletions test/test_gene_objects.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,25 @@
from __future__ import absolute_import

from nose.tools import eq_

from .common import test_ensembl_releases
from .data import TP53_gene_id

@test_ensembl_releases()
def test_TP53_gene_object_by_id(ensembl):
def test_TP53_gene_object_by_id(genome):
# when we look up TP53 by its gene ID, we should get the
# correct gene back
gene = ensembl.gene_by_id(TP53_gene_id)
gene = genome.gene_by_id(TP53_gene_id)
assert gene.name == "TP53", \
"Incorrect gene name %s for gene ID %s in %s" % (
gene.name, gene.id, ensembl)
gene.name, gene.id, genome)
assert gene.contig == "17", \
"Incorrect gene contig %s for gene ID %s in %s" % (
gene.contig, gene.id, ensembl)
gene.contig, gene.id, genome)

@test_ensembl_releases()
def test_TP53_gene_object_by_name(ensembl):
genes = ensembl.genes_by_name("TP53")
def test_TP53_gene_object_by_name(genome):
genes = genome.genes_by_name("TP53")
# we should only have one TP53 gene (there aren't any copies)
assert len(genes) == 1, \
"Expected only one gene with name TP53, got %s" % (genes,)
Expand All @@ -26,17 +28,23 @@ def test_TP53_gene_object_by_name(ensembl):
"Expected gene to have ID %s, got %s" % (TP53_gene_id, genes[0].id)

@test_ensembl_releases()
def test_equal_genes(ensembl):
gene1 = ensembl.genes_by_name("TP53")[0]
def test_equal_genes(genome):
gene1 = genome.genes_by_name("TP53")[0]
# get an identical gene
gene2 = ensembl.gene_by_id(gene1.id)
gene2 = genome.gene_by_id(gene1.id)

assert hash(gene1) == hash(gene2)
assert gene1 == gene2

@test_ensembl_releases()
def test_not_equal_genes(release):
gene1 = release.genes_by_name("MUC1")[0]
gene2 = release.genes_by_name("BRCA1")[0]
def test_not_equal_genes(genome):
gene1 = genome.genes_by_name("MUC1")[0]
gene2 = genome.genes_by_name("BRCA1")[0]
assert hash(gene1) != hash(gene2)
assert gene1 != gene2

@test_ensembl_releases()
def test_BRCA1_protein_coding_biotype(genome):
gene = genome.genes_by_name("BRCA1")[0]
assert gene.is_protein_coding
eq_(gene.biotype, "protein_coding")
22 changes: 14 additions & 8 deletions test/test_transcript_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,10 @@ def test_transcript_exons():
# TODO: Add gene_id patching to gtf_parsing, add ensembl54 to the list
# below
@test_ensembl_releases(75, 77)
def test_sequence_parts(ensembl):
def test_sequence_parts(genome):
# Ensure that the UTRs and coding sequence can be
# combined to make the full transcript.
transcript = ensembl.transcript_by_id(FOXP3_001_transcript_id)
transcript = genome.transcript_by_id(FOXP3_001_transcript_id)

# The combined lengths of the upstream untranslated region,
# coding sequence, and downstream untranslated region
Expand Down Expand Up @@ -149,17 +149,17 @@ def test_transcript_cds_CTNNIP1_004():
eq_(cds, CTNNBIP1_004_CDS)

@test_ensembl_releases()
def test_equal_transcripts(ensembl):
t1 = ensembl.transcripts_by_name("TP53-001")[0]
def test_equal_transcripts(genome):
t1 = genome.transcripts_by_name("TP53-001")[0]
# get an identical gene
t2 = ensembl.transcript_by_id(t1.id)
t2 = genome.transcript_by_id(t1.id)
eq_(t1, t2)
eq_(hash(t1), hash(t2))

@test_ensembl_releases()
def test_not_equal_transcripts(release):
t1 = release.transcripts_by_name("MUC1-001")[0]
t2 = release.transcripts_by_name("BRCA1-001")[0]
def test_not_equal_transcripts(genome):
t1 = genome.transcripts_by_name("MUC1-001")[0]
t2 = genome.transcripts_by_name("BRCA1-001")[0]
assert_not_equal(t1, t2)

def test_protein_id():
Expand All @@ -174,3 +174,9 @@ def test_transcript_gene_should_match_parent_gene():
gene = ensembl77.gene_by_id(TP53_gene_id)
for transcript in gene.transcripts:
eq_(transcript.gene, gene)

@test_ensembl_releases()
def test_BRCA1_001_has_protein_coding_biotype(genome):
transcript = genome.transcripts_by_name("BRCA1-001")[0]
assert transcript.is_protein_coding
eq_(transcript.biotype, "protein_coding")