diff --git a/pyensembl/__init__.py b/pyensembl/__init__.py
index c019683..0c1fc3b 100644
--- a/pyensembl/__init__.py
+++ b/pyensembl/__init__.py
@@ -35,7 +35,7 @@
 )
 from .transcript import Transcript
 
-__version__ = '0.9.7'
+__version__ = '1.0.0'
 
 def cached_release(release, species="human"):
     """
diff --git a/pyensembl/biotypes.py b/pyensembl/biotypes.py
deleted file mode 100644
index 3c21c3f..0000000
--- a/pyensembl/biotypes.py
+++ /dev/null
@@ -1,345 +0,0 @@
-# Copyright (c) 2015. Mount Sinai School of Medicine
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Ensembl/GENCODE biotype classifications, for more information:
-http://useast.ensembl.org/Help/Faq?id=468
-
-Definitions for GENCODE biotypes from:
-http://www.gencodegenes.org/gencode_biotypes.html
-
-(not all of these are necessarily used in Ensembl)
-
-IG_C_gene
-IG_D_gene
-IG_J_gene
-IG_V_gene
-TR_C_gene
-TR_J_gene
-TR_V_gene
-TR_D_gene
-----------------
-Immunoglobulin (Ig) variable chain and T-cell receptor (TcR) genes imported
-or annotated according to the IMGT. IG_C_pseudogene
-
-IG_J_pseudogene
-IG_V_pseudogene
-TR_V_pseudogene
-TR_J_pseudogene
-----------------
-Inactivated immunoglobulin gene.
-
-Mt_rRNA
-Mt_tRNA
-miRNA
-misc_RNA
-rRNA
-snRNA
-snoRNA
-----------------
-Non-coding RNA predicted using sequences from RFAM and miRBase
-
-Mt_tRNA_pseudogene
-tRNA_pseudogene
-snoRNA_pseudogene
-snRNA_pseudogene
-scRNA_pseudogene
-rRNA_pseudogene
-misc_RNA_pseudogene
-miRNA_pseudogene
-Non-coding RNAs predicted to be pseudogenes by the Ensembl pipeline
-
-TEC
-----------------
-To be Experimentally Confirmed. This is used for non-spliced EST clusters that
-have polyA features. This category has been specifically created for the ENCODE
-project to highlight regions that could indicate the presence of protein coding
-genes that require experimental validation, either by 5' RACE or RT-PCR to
-extend the transcripts, or by confirming expression of the putatively-encoded
-peptide with specific antibodies.
-
-nonsense_mediated_decay
-----------------
-If the coding sequence (following the appropriate reference) of a transcript
-finishes >50bp from a downstream splice site then it is tagged as NMD.
-If the variant does not cover the full reference coding sequence then it is
-annotated as NMD if NMD is unavoidable i.e. no matter what the exon structure
-of the missing portion is the transcript will be subject to NMD.
-
-non_stop_decay
-----------------
-Transcripts that have polyA features (including signal) without a prior stop
-codon in the CDS, i.e. a non-genomic polyA tail attached directly to the CDS
-without 3' UTR. These transcripts are subject to degradation.
-
-
-retained_intron
-----------------
-Alternatively spliced transcript believed to contain intronic sequence relative
-to other, coding, variants. protein_coding Contains an open reading frame (ORF).
-
-processed_transcript
-----------------
-Doesn't contain an ORF.
-
-non_coding
-----------------
-Transcript which is known from the literature to not be protein coding.
-
-ambiguous_orf
-----------------
-Transcript believed to be protein coding, but with more than one possible
-open reading frame.
-
-sense_intronic
-----------------
-Long non-coding transcript in introns of a coding gene that does not overlap any
-exons.
-
-sense_overlapping
-----------------
-Long non-coding transcript that contains a coding gene in its intron on the same
-strand.
-
-antisense
-----------------
-Has transcripts that overlap the genomic span (i.e. exon or introns) of a
-protein-coding locus on the opposite strand.
-
-known_ncrna
-----------------
-
-pseudogene
-----------------
-Have homology to proteins but generally suffer from a disrupted coding sequence
-and an active homologous gene can be found at another locus. Sometimes these
-entries have an intact coding sequence or an open but truncated ORF, in which
-case there is other evidence used (for example genomic polyA stretches at
-the 3' end) to classify them as a pseudogene. Can be further classified as one
-of the following.
-
-processed_pseudogene
-----------------
-Pseudogene that lack introns and is thought to arise from reverse transcription
-of mRNA followed by reinsertion of DNA into the genome.
-
-polymorphic_pseudogene
-----------------
-Pseudogene owing to a SNP/DIP but in other individuals/haplotypes/strains the
-gene is translated.
-
-retrotransposed
-----------------
-Pseudogene owing to a reverse transcribed and re-inserted sequence.
-
-transcribed_processed_pseudogene
-transcribed_unprocessed_pseudogene
-transcribed_unitary_pseudogene
-----------------
-Pseudogene where protein homology or genomic structure indicates a pseudogene,
-but the presence of locus-specific transcripts indicates expression.
-
-translated_unprocessed_pseudogene
-----------------
-Pseudogene that has mass spec data suggesting that it is also translated.
-
-unitary_pseudogene
-----------------
-A species specific unprocessed pseudogene without a parent gene, as it has an
-active orthologue in another species.
-
-unprocessed_pseudogene
-----------------
-Pseudogene that can contain introns since produced by gene duplication.
-
-artifact
-----------------
-Used to tag mistakes in the public databases (Ensembl/SwissProt/Trembl)
-
-lincRNA
-----------------
-Long, intervening noncoding (linc) RNAs, that can be found in evolutionarily
-conserved, intergenic regions.
-
-LRG_gene
-----------------
-Gene in a "Locus Reference Genomic" region known to have disease-related
-sequence variations.
-
-3prime_overlapping_ncrna
-----------------
-Transcripts where ditag and/or published experimental data strongly supports the
-existence of short non-coding transcripts transcribed from the 3'UTR.
-
-disrupted_domain
-----------------
-Otherwise viable coding region omitted from this alternatively spliced
-transcript because the splice variation affects a region coding for a
-protein domain.
-"""
-
-TCR_biotypes = {
-    'TR_C_gene',
-    'TR_D_gene',
-    'TR_gene',
-    'TR_J_gene',
-    'TR_V_gene'
-}
-
-IG_biotypes = {
-    'IG_C_gene',
-    'IG_D_gene',
-    'IG_gene',
-    'IG_J_gene',
-    'IG_LV_gene',
-    'IG_M_gene',
-    'IG_V_gene',
-    'IG_Z_gene',
-}
-
-non_immune_protein_coding = {
-    # premature stop codon will cause degradation
-    'nonsense_mediated_decay',
-    # TODO: find out what this is
-    'nontranslating_CDS',
-    # no stop codon within transcript, should get degraded at translation
-    'non_stop_decay',
-    # ordinary protein coding transcript
-    'protein_coding',
-
-    # Gene in a "Locus Reference Genomic" region known to have disease-related
-    # sequence variations.
-    'LRG_gene',
-}
-
-protein_coding = set.union(
-    TCR_biotypes,
-    IG_biotypes,
-    non_immune_protein_coding
-)
-
-
-coding_pseudogenes = {
-    'IG_C_pseudogene',
-    'IG_J_pseudogene',
-    'IG_pseudogene',
-    'IG_V_pseudogene',
-    # Found in ftp://ftp.ensembl.org/pub/release-81/gtf/mus_musculus
-    'IG_D_pseudogene',
-    'processed_pseudogene',
-    'pseudogene',
-    'transcribed_processed_pseudogene',
-    'transcribed_unitary_pseudogene',
-    'transcribed_unprocessed_pseudogene',
-    'translated_processed_pseudogene',
-    'TR_J_pseudogene',
-    'TR_pseudogene',
-    'TR_V_pseudogene',
-    'unitary_pseudogene',
-    'unprocessed_pseudogene',
-    # to be experimentally confirmed
-    'TEC',
-    # TODO: should this be here or considered protein_coding?
-    'translated_unprocessed_pseudogene',
-    # pseudogene owing to a reverse transcribed and re-inserted sequence.
-    "retrotransposed",
-    # usually a non-coding pseudogene but can be translated in some individuals
-    # depending on common genetic variation
-    'polymorphic_pseudogene',
-    # Otherwise viable coding region omitted from this alternatively spliced
-    # transcript because the splice variation affects a region coding for a
-    # protein domain.
-    # TODO: why is disrupted_domain a pseudogene rather than
-    # a translated protein we expect to be dysfunctional?
-    'disrupted_domain',
-}
-
-long_noncoding = {
-    'lincRNA',
-    'ncrna_host',
-    '3prime_overlapping_ncrna',
-    # why is an ambiguous ORF noncoding? Isn't rather coding but we can't
-    # yet determine the sequence?
-    'ambiguous_orf',
-    'antisense',
-    'antisense_RNA',
-    'non_coding',
-    'processed_transcript',
-    'retained_intron',
-    'sense_intronic',
-    'sense_overlapping',
-    'known_ncrna',
-    # unspliced lncRNAs that are several kb in size.
-    'macro_lncRNA',
-    # seems to have been added around Ensembl 81
-    'bidirectional_promoter_lncrna',
-}
-
-mitochondrial = {
-    'Mt_rRNA',
-    'Mt_tRNA',
-    'Mt_tRNA_pseudogene',
-}
-
-# short RNAs homologous to functional but which themselves are expected to
-# be inert
-short_noncoding_pseudogene = {
-    'miRNA_pseudogene',
-    'misc_RNA_pseudogene',
-    'ncRNA_pseudogene',
-    'rRNA_pseudogene',
-    'scRNA_pseudogene',
-    'snoRNA_pseudogene',
-    'snRNA_pseudogene',
-    'tRNA_pseudogene'
-}
-
-# short RNAs expected to have some effect or function in the nucleus
-short_noncoding_functional = {
-    'miRNA',
-    'misc_RNA',
-    'ncRNA',
-    'rRNA',
-    'scRNA',
-    'snlRNA',
-    'snoRNA',
-    'snRNA',
-    'tRNA',
-    'sRNA',
-    # Small Cajal body-specific RNA
-    'scaRNA',
-    # Vault RNA (http://en.wikipedia.org/wiki/Vault_RNA)
-    'vaultRNA',
-    'ribozyme',
-}
-
-short_noncoding = set.union(
-    short_noncoding_functional,
-    short_noncoding_pseudogene,
-    mitochondrial)
-
-valid_biotypes = set.union(
-    protein_coding,
-    coding_pseudogenes,
-    long_noncoding,
-    short_noncoding,
-    # used to tag mistakes in the annotation database
-    {"artifact"},
-)
-
-def is_valid_biotype(biotype):
-    return biotype in valid_biotypes
-
-def is_coding_biotype(biotype):
-    return biotype in protein_coding
diff --git a/pyensembl/gene.py b/pyensembl/gene.py
index b2d6c41..5e6842b 100644
--- a/pyensembl/gene.py
+++ b/pyensembl/gene.py
@@ -16,7 +16,6 @@
 
 from memoized_property import memoized_property
 
-from .biotypes import is_valid_biotype
 from .locus_with_genome import LocusWithGenome
 
 class Gene(LocusWithGenome):
@@ -30,22 +29,36 @@ def __init__(
             end,
             strand,
             biotype,
-            genome,
-            require_valid_biotype=True):
-        LocusWithGenome.__init__(self, contig, start, end, strand, genome)
-        self.id = gene_id
-        self.name = gene_name
-        self.biotype = biotype
-        self.require_valid_biotype = require_valid_biotype
-        if require_valid_biotype and not is_valid_biotype(biotype):
-            raise ValueError(
-                "Invalid gene_biotype %s for gene with ID = %s" % (
-                    biotype, gene_id))
+            genome):
+        LocusWithGenome.__init__(
+            self,
+            contig=contig,
+            start=start,
+            end=end,
+            strand=strand,
+            biotype=biotype,
+            genome=genome)
+        self.gene_id = gene_id
+        self.gene_name = gene_name
+
+    @property
+    def id(self):
+        """
+        Alias for gene_id necessary for backwards compatibility.
+        """
+        return self.gene_id
+
+    @property
+    def name(self):
+        """
+        Alias for gene_name necessary for backwards compatibility.
+        """
+        return self.gene_name
 
     def __str__(self):
-        return "Gene(id=%s, name=%s, biotype=%s, location=%s:%d-%d)" % (
-            self.id,
-            self.name,
+        return "Gene(gene_id=%s, gene_name=%s, biotype=%s, location=%s:%d-%d)" % (
+            self.gene_id,
+            self.gene_name,
             self.biotype,
             self.contig,
             self.start,
@@ -62,10 +75,8 @@ def __hash__(self):
 
     def to_dict(self):
         state_dict = LocusWithGenome.to_dict(self)
-        state_dict["gene_id"] = self.id
-        state_dict["gene_name"] = self.name
-        state_dict["biotype"] = self.biotype
-        state_dict["require_valid_biotype"] = self.require_valid_biotype
+        state_dict["gene_id"] = self.gene_id
+        state_dict["gene_name"] = self.gene_name
         return state_dict
 
     @memoized_property
diff --git a/pyensembl/genome.py b/pyensembl/genome.py
index 6c07b56..7cdbb8f 100644
--- a/pyensembl/genome.py
+++ b/pyensembl/genome.py
@@ -649,8 +649,7 @@ def gene_by_id(self, gene_id):
                 end=end,
                 strand=strand,
                 biotype=gene_biotype,
-                genome=self,
-                require_valid_biotype=("gene_biotype" in field_names))
+                genome=self)
 
         return self._genes[gene_id]
 
@@ -828,8 +827,7 @@ def transcript_by_id(self, transcript_id):
                 strand=strand,
                 biotype=transcript_biotype,
                 gene_id=gene_id,
-                genome=self,
-                require_valid_biotype=("transcript_biotype" in field_names))
+                genome=self)
 
         return self._transcripts[transcript_id]
 
diff --git a/pyensembl/locus_with_genome.py b/pyensembl/locus_with_genome.py
index c614019..aef67d7 100644
--- a/pyensembl/locus_with_genome.py
+++ b/pyensembl/locus_with_genome.py
@@ -19,10 +19,11 @@ class LocusWithGenome(Locus):
     Common base class for Gene and Transcript to avoid copying
     their shared logic.
     """
-    def __init__(self, contig, start, end, strand, genome):
+    def __init__(self, contig, start, end, strand, biotype, genome):
         Locus.__init__(self, contig, start, end, strand)
         self.genome = genome
         self.db = self.genome.db
+        self.biotype = biotype
 
     def to_dict(self):
         return dict(
@@ -30,4 +31,22 @@ def to_dict(self):
             start=self.start,
             end=self.end,
             strand=self.strand,
+            biotype=self.biotype,
             genome=self.genome)
+
+    @property
+    def is_protein_coding(self):
+        """
+        We're not counting immunoglobulin-like genes from the T-cell receptor or
+        or antibodies since they occur in fragments that must be recombined.
+        It might be worth consider counting non-sense mediated decay and
+        non-stop decay since variants in these could potentially make a
+        functional protein. To read more about the biotypes used in Ensembl:
+            http://vega.sanger.ac.uk/info/about/gene_and_transcript_types.html
+            http://www.gencodegenes.org/gencode_biotypes.html
+
+        For now let's stick with the simple category of 'protein_coding', which
+        means that there is an open reading frame in this gene/transcript
+        whose successful transcription has been observed.
+        """
+        return self.biotype == "protein_coding"
diff --git a/pyensembl/transcript.py b/pyensembl/transcript.py
index 82da315..197acfc 100644
--- a/pyensembl/transcript.py
+++ b/pyensembl/transcript.py
@@ -16,7 +16,6 @@
 
 from memoized_property import memoized_property
 
-from .biotypes import is_valid_biotype
 from .common import memoize
 from .locus_with_genome import LocusWithGenome
 
@@ -39,28 +38,42 @@ def __init__(
             strand,
             biotype,
             gene_id,
-            genome,
-            require_valid_biotype=True):
-        LocusWithGenome.__init__(self, contig, start, end, strand, genome)
-        self.id = transcript_id
-        self.name = transcript_name
-        self.biotype = biotype
+            genome):
+        LocusWithGenome.__init__(
+            self,
+            contig=contig,
+            start=start,
+            end=end,
+            strand=strand,
+            biotype=biotype,
+            genome=genome)
+        self.transcript_id = transcript_id
+        self.transcript_name = transcript_name
         self.gene_id = gene_id
-        self.require_valid_biotype = require_valid_biotype
-        if require_valid_biotype and not is_valid_biotype(biotype):
-            raise ValueError(
-                "Invalid biotype '%s' for transcript with ID=%s, name=%s" % (
-                    biotype, transcript_id, transcript_name))
+
+    @property
+    def id(self):
+        """
+        Alias for transcript_id necessary for backward compatibility.
+        """
+        return self.transcript_id
+
+    @property
+    def name(self):
+        """
+        Alias for transcript_name necessary for backward compatibility.
+        """
+        return self.transcript_name
 
     def __str__(self):
         return (
-            "Transcript(id=%s,"
+            "Transcript(transcript_id=%s,"
             " name=%s,"
             " gene_id=%s,"
             " gene_name=%s,"
             " biotype=%s,"
             " location=%s:%d-%d)") % (
-                self.id,
+                self.transcript_id,
                 self.name,
                 self.gene.id,
                 self.gene.name,
@@ -86,11 +99,9 @@ def __hash__(self):
 
     def to_dict(self):
         state_dict = LocusWithGenome.to_dict(self)
-        state_dict["transcript_id"] = self.id
+        state_dict["transcript_id"] = self.transcript_id
         state_dict["transcript_name"] = self.name
-        state_dict["biotype"] = self.biotype
         state_dict["gene_id"] = self.gene_id
-        state_dict["require_valid_biotype"] = self.require_valid_biotype
         return state_dict
 
     @property