diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1da7b9f02..5d1227408 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -35,7 +35,7 @@ Bugfixes and improvements:
 - [#134](https://github.com/lucventurini/mikado/issues/134): when checking for potential Alternative Splicing Events (ASEs), now Mikado will check whether the CDS phases are in frame with each other. Moreover
  **Mikado will now calculate the CDS overlap percentage based on the primary transcript CDS length**, not the minimum CDS length between primary and candidate. Please note that the change **regarding the frame** also affects the monosublocus stage. Mikado still considers only the primary ORFs for the overlap.
 - [#138](https://github.com/lucventurini/mikado/issues/138): Solved a bug which led Mikado to recalculate the phases for each model during picking, potentially creating mistakes for models truncated at the 5' end.
-- [#142](https://github.com/lucventurini/mikado/issues/142): padded transcripts will add terminal exons so that 
+- [#142](https://github.com/lucventurini/mikado/issues/142): padded transcripts will add terminal *exons* rather than just extending their terminal ends. This should prevent the creation of faux retained introns. Please see the issue for more details.
 
 # Version 1.2.4
 
diff --git a/Mikado/configuration/configuration_blueprint.json b/Mikado/configuration/configuration_blueprint.json
index 6cd116717..80d9b8963 100644
--- a/Mikado/configuration/configuration_blueprint.json
+++ b/Mikado/configuration/configuration_blueprint.json
@@ -240,7 +240,9 @@
             "- out_fasta: output transcript FASTA file",
             "- gff: array of input predictions for this step.",
             "- log: output log. Default: prepare.log",
-            "- labels: labels to be associated with the input GFFs. Default: None."
+            "- labels: labels to be associated with the input GFFs. Default: None.",
+            "- reference: these files are treated as reference-like, ie, these transcripts will never get discarded",
+            "  during the preparation step."
           ],
           "type": "object",
           "required": ["out", "out_fasta", "gff"],
@@ -252,6 +254,7 @@
             "gff": { "type": "array", "default": []},
             "labels": {"type": "array", "default": []},
             "strand_specific_assemblies": {"type": "array", "default": []},
+            "reference": {"type": "array", "default": []},
             "source_score":{
               "type": "object",
               "default": {},
diff --git a/Mikado/loci/locus.py b/Mikado/loci/locus.py
index 06db3a040..6dd280836 100644
--- a/Mikado/loci/locus.py
+++ b/Mikado/loci/locus.py
@@ -924,7 +924,7 @@ def expand_transcript(transcript, new_starting_exons, new_final_exons, fai, logg
                                  transcript.start, transcript.end, len(genome_seq))
             logger.error(error)
             raise InvalidTranscript(error)
-        seq = TranscriptChecker(transcript, genome_seq).cdna
+        seq = TranscriptChecker(transcript, genome_seq, is_reference=True).cdna
         assert len(seq) == transcript.cdna_length, (len(seq), transcript.cdna_length, transcript.exons)
         assert len(seq) == backup.cdna_length + upstream + downstream, (
             len(seq), backup.cdna_length + upstream + downstream,
diff --git a/Mikado/preparation/annotation_parser.py b/Mikado/preparation/annotation_parser.py
index fd6e2a86b..ac3dbca2b 100644
--- a/Mikado/preparation/annotation_parser.py
+++ b/Mikado/preparation/annotation_parser.py
@@ -65,11 +65,11 @@ def run(self):
         while True:
             results = self.submission_queue.get()
             try:
-                label, handle, strand_specific, shelf_name = results
+                label, handle, strand_specific, is_reference, shelf_name = results
             except ValueError as exc:
                 raise ValueError("{}.\tValues: {}".format(exc, ", ".join([str(_) for _ in results])))
             if handle == "EXIT":
-                self.submission_queue.put(("EXIT", "EXIT", "EXIT", "EXIT"))
+                self.submission_queue.put(("EXIT", "EXIT", "EXIT", "EXIT", "EXIT"))
                 break
             counter += 1
             self.logger.debug("Received %s (label: %s; SS: %s, shelf_name: %s)",
@@ -87,6 +87,7 @@ def run(self):
                                             self.logger,
                                             min_length=self.min_length,
                                             strip_cds=self.__strip_cds,
+                                            is_reference=is_reference,
                                             strand_specific=strand_specific)
                 elif gff_handle.__annot_type__ == "gtf":
                     new_ids = load_from_gtf(shelf_name,
@@ -95,6 +96,7 @@ def run(self):
                                             found_ids,
                                             self.logger,
                                             min_length=self.min_length,
+                                            is_reference=is_reference,
                                             strip_cds=self.__strip_cds,
                                             strand_specific=strand_specific)
                 elif gff_handle.__annot_type__ == "bed12":
@@ -103,6 +105,7 @@ def run(self):
                                               label,
                                               found_ids,
                                               self.logger,
+                                              is_reference=is_reference,
                                               min_length=self.min_length,
                                               strip_cds=self.__strip_cds,
                                               strand_specific=strand_specific)
@@ -182,6 +185,7 @@ def load_into_storage(shelf_name, exon_lines, min_length, logger, strip_cds=True
             "CREATE TABLE dump (chrom text, start integer, end integer, strand text, tid text, features blob)")
 
     cursor.execute("CREATE INDEX idx ON dump (tid)")
+    logger.debug("Created tables for shelf %s", shelf_name)
 
     for tid in exon_lines:
         if "features" not in exon_lines[tid]:
@@ -248,9 +252,13 @@ def load_into_storage(shelf_name, exon_lines, min_length, logger, strip_cds=True
 
         # Discard transcript under a certain size
         if tlength < min_length:
-            logger.info("Discarding %s because its size (%d) is under the minimum of %d",
-                         tid, tlength, min_length)
-            continue
+            if exon_lines[tid]["is_reference"] is True:
+                logger.info("%s retained even if it is too short (%d) as it is a reference transcript.",
+                            tid)
+            else:
+                logger.info("Discarding %s because its size (%d) is under the minimum of %d",
+                             tid, tlength, min_length)
+                continue
 
         values = json.dumps(exon_lines[tid])
 
@@ -274,6 +282,7 @@ def load_from_gff(shelf_name,
                   found_ids,
                   logger,
                   min_length=0,
+                  is_reference=False,
                   strip_cds=False,
                   strand_specific=False):
     """
@@ -292,11 +301,16 @@ def load_from_gff(shelf_name,
     :type strip_cds: bool
     :param strand_specific: whether the assembly is strand-specific or not.
     :type strand_specific: bool
+    :param is_reference: boolean. If set to True, the transcript will always be retained.
+    :type is_reference: bool
     :return:
     """
 
     exon_lines = dict()
 
+    strip_cds = strip_cds and (not is_reference)
+    strand_specific = strand_specific or is_reference
+
     transcript2genes = dict()
     new_ids = set()
 
@@ -342,6 +356,7 @@ def load_from_gff(shelf_name,
                 exon_lines[row.id]["features"][row.feature].append((row.start, row.end, row.phase))
 
             exon_lines[row.id]["strand_specific"] = strand_specific
+            exon_lines[row.id]["is_reference"] = is_reference
             continue
         elif row.is_exon is True:
             if not row.is_cds or (row.is_cds is True and strip_cds is False):
@@ -366,7 +381,6 @@ def load_from_gff(shelf_name,
                     row.transcript = ["{0}_{1}".format(label, tid) for tid in row.transcript]
 
                 parents = row.transcript[:]
-                to_delete = set()
                 for tid in parents:
 
                     if tid in found_ids:
@@ -386,6 +400,7 @@ def load_from_gff(shelf_name,
                         exon_lines[tid]["tid"] = tid
                         exon_lines[tid]["parent"] = transcript2genes[tid]
                         exon_lines[tid]["strand_specific"] = strand_specific
+                        exon_lines[tid]["is_reference"] = is_reference
                     elif tid not in exon_lines and tid not in transcript2genes:
                         continue
                     else:
@@ -405,7 +420,9 @@ def load_from_gff(shelf_name,
                 continue
     gff_handle.close()
 
-    load_into_storage(shelf_name, exon_lines, logger=logger, min_length=min_length, strip_cds=strip_cds)
+    logger.info("Starting to load %s", shelf_name)
+    load_into_storage(shelf_name, exon_lines,
+                      logger=logger, min_length=min_length, strip_cds=strip_cds)
 
     return new_ids
 
@@ -416,6 +433,7 @@ def load_from_gtf(shelf_name,
                   found_ids,
                   logger,
                   min_length=0,
+                  is_reference=False,
                   strip_cds=False,
                   strand_specific=False):
     """
@@ -434,11 +452,16 @@ def load_from_gtf(shelf_name,
     :type strip_cds: bool
     :param strand_specific: whether the assembly is strand-specific or not.
     :type strand_specific: bool
+    :param is_reference: boolean. If set to True, the transcript will always be retained.
+    :type is_reference: bool
     :return:
     """
 
     exon_lines = dict()
 
+    strip_cds = strip_cds and (not is_reference)
+    strand_specific = strand_specific or is_reference
+
     # Reduce memory footprint
     [intern(_) for _ in ["chrom", "features", "strand", "attributes", "tid", "parent", "attributes"]]
 
@@ -470,6 +493,7 @@ def load_from_gtf(shelf_name,
             exon_lines[row.transcript]["tid"] = row.id
             exon_lines[row.transcript]["parent"] = "{}.gene".format(row.id)
             exon_lines[row.transcript]["strand_specific"] = strand_specific
+            exon_lines[row.transcript]["is_reference"] = is_reference
             if "exon_number" in exon_lines[row.transcript]["attributes"]:
                 del exon_lines[row.id]["attributes"]["exon_number"]
             continue
@@ -494,6 +518,7 @@ def load_from_gtf(shelf_name,
             exon_lines[row.id]["tid"] = row.transcript
             exon_lines[row.id]["parent"] = "{}.gene".format(row.id)
             exon_lines[row.id]["strand_specific"] = strand_specific
+            exon_lines[row.id]["is_reference"] = is_reference
         else:
             if row.transcript in to_ignore:
                 continue
@@ -509,7 +534,10 @@ def load_from_gtf(shelf_name,
         exon_lines[row.transcript]["features"][row.feature].append((row.start, row.end, row.phase))
         new_ids.add(row.transcript)
     gff_handle.close()
-    load_into_storage(shelf_name, exon_lines, logger=logger, min_length=min_length, strip_cds=strip_cds)
+    logger.info("Starting to load %s", shelf_name)
+    load_into_storage(shelf_name,
+                      exon_lines,
+                      logger=logger, min_length=min_length, strip_cds=strip_cds)
 
     return new_ids
 
@@ -520,6 +548,7 @@ def load_from_bed12(shelf_name,
                     found_ids,
                     logger,
                     min_length=0,
+                    is_reference=False,
                     strip_cds=False,
                     strand_specific=False):
     """
@@ -538,11 +567,16 @@ def load_from_bed12(shelf_name,
     :type strip_cds: bool
     :param strand_specific: whether the assembly is strand-specific or not.
     :type strand_specific: bool
+    :param is_reference: boolean. If set to True, the transcript will always be retained.
+    :type is_reference: bool
     :return:
     """
 
     exon_lines = dict()
 
+    strip_cds = strip_cds and (not is_reference)
+    strand_specific = strand_specific or is_reference
+
     # Reduce memory footprint
     [intern(_) for _ in ["chrom", "features", "strand", "attributes", "tid", "parent", "attributes"]]
 
@@ -573,6 +607,7 @@ def load_from_bed12(shelf_name,
             exon_lines[transcript.id]["tid"] = transcript.id
             exon_lines[transcript.id]["parent"] = "{}.gene".format(transcript.id)
             exon_lines[transcript.id]["strand_specific"] = strand_specific
+            exon_lines[transcript.id]["is_reference"] = is_reference
             exon_lines[transcript.id]["features"]["exon"] = [
                 (exon[0], exon[1]) for exon in transcript.exons
             ]
@@ -585,6 +620,7 @@ def load_from_bed12(shelf_name,
                 ]
         new_ids.add(transcript.id)
     gff_handle.close()
-    load_into_storage(shelf_name, exon_lines, logger=logger, min_length=min_length, strip_cds=strip_cds)
+    load_into_storage(shelf_name, exon_lines,
+                      logger=logger, min_length=min_length, strip_cds=strip_cds)
 
     return new_ids
\ No newline at end of file
diff --git a/Mikado/preparation/checking.py b/Mikado/preparation/checking.py
index d675b4824..8023c89bf 100644
--- a/Mikado/preparation/checking.py
+++ b/Mikado/preparation/checking.py
@@ -21,6 +21,7 @@ def create_transcript(lines,
                       start,
                       end,
                       lenient=False,
+                      is_reference=False,
                       strand_specific=False,
                       canonical_splices=(("GT", "AG"),
                                          ("GC", "AG"),
@@ -51,6 +52,9 @@ def create_transcript(lines,
 
     :param logger: optional logger to use during processing.
 
+    :param is_reference: boolean. If set, the transcript's strand will not be checked.
+
+
     :rtype: (None|TranscriptChecker)
     """
 
@@ -94,6 +98,7 @@ def create_transcript(lines,
                                               strand_specific=strand_specific,
                                               canonical_splices=canonical_splices,
                                               force_keep_cds=force_keep_cds,
+                                              is_reference=is_reference,
                                               logger=logger)
         logger.debug("Finished adding exon lines to %s", lines["tid"])
         transcript_object.finalize()
@@ -128,7 +133,6 @@ def __init__(self,
                  gtf_out,
                  tmpdir,
                  lenient=False,
-                 # strand_specific=False,
                  canonical_splices=(("GT", "AG"),
                                     ("GC", "AG"),
                                     ("AT", "AC")),
@@ -167,7 +171,7 @@ def __init__(self,
     def run(self):
 
         checker = functools.partial(create_transcript,
-                                    lenient=self.lenient,
+                                    # lenient=self.lenient,
                                     # strand_specific=self.strand_specific,
                                     canonical_splices=self.canonical,
                                     logger=self.logger)
@@ -190,10 +194,15 @@ def run(self):
                                            counter))
                 break
             self.logger.debug("Checking %s", lines["tid"])
+            if "is_reference" not in lines:
+                raise KeyError(lines)
+
             transcript = checker(lines,
                                  str(self.fasta[lines["chrom"]][start-1:end]),
                                  start,
                                  end,
+                                 lenient=self.lenient,
+                                 is_reference=lines["is_reference"],
                                  strand_specific=lines["strand_specific"])
 
             if transcript is None:
diff --git a/Mikado/preparation/prepare.py b/Mikado/preparation/prepare.py
index e34bc8f9d..b270f9880 100644
--- a/Mikado/preparation/prepare.py
+++ b/Mikado/preparation/prepare.py
@@ -2,7 +2,7 @@
 import tempfile
 import gc
 from .checking import create_transcript, CheckingProcess
-from .annotation_parser import AnnotationParser, load_from_gtf, load_from_gff
+from .annotation_parser import AnnotationParser, load_from_gtf, load_from_gff, load_from_bed12
 from ..parsers import to_gff
 import operator
 import collections
@@ -62,11 +62,15 @@ def store_transcripts(shelf_stacks, logger, keep_redundant=False):
     transcripts = collections.defaultdict(dict)
     for shelf_name in shelf_stacks:
         shelf_score = shelf_stacks[shelf_name]["score"]
-        for values in shelf_stacks[shelf_name]["cursor"].execute("SELECT chrom, start, end, strand, tid FROM dump"):
-            chrom, start, end, strand, tid = values
-            if (start, end) not in transcripts[chrom]:
-                transcripts[chrom][(start, end)] = list()
-            transcripts[chrom][(start, end)].append((tid, shelf_name, shelf_score))
+        is_reference = shelf_stacks[shelf_name]["is_reference"]
+        try:
+            for values in shelf_stacks[shelf_name]["cursor"].execute("SELECT chrom, start, end, strand, tid FROM dump"):
+                chrom, start, end, strand, tid = values
+                if (start, end) not in transcripts[chrom]:
+                    transcripts[chrom][(start, end)] = list()
+                transcripts[chrom][(start, end)].append((tid, shelf_name, shelf_score, is_reference))
+        except sqlite3.OperationalError:
+            raise sqlite3.OperationalError("dump not found in {}".format(shelf_name))
 
     for chrom in sorted(transcripts.keys()):
 
@@ -80,14 +84,14 @@ def store_transcripts(shelf_stacks, logger, keep_redundant=False):
             if len(tids) > 1:
                 exons = collections.defaultdict(list)
                 di_features = dict()
-                for tid, shelf, score in tids:
+                for tid, shelf, score, is_reference in tids:
                     strand, features = next(shelf_stacks[shelf]["cursor"].execute(
                         "select strand, features from dump where tid = ?", (tid,)))
                     features = json.loads(features)
                     exon_set = tuple(sorted([(exon[0], exon[1], strand) for exon in
                                             features["features"]["exon"]],
                                             key=operator.itemgetter(0, 1)))
-                    exons[exon_set].append((tid, shelf, score))
+                    exons[exon_set].append((tid, shelf, score, is_reference))
                     di_features[tid] = features
                 tids = []
                 logger.debug("%d exon chains for pos %s",
@@ -95,26 +99,29 @@ def store_transcripts(shelf_stacks, logger, keep_redundant=False):
                 for tid_list in exons.values():
                     if len(tid_list) > 1 and keep_redundant is False:
                         cds = collections.defaultdict(list)
-                        for tid, shelf, score in tid_list:
+                        for tid, shelf, score, is_reference in tid_list:
                             cds_set = tuple(sorted([(exon[0], exon[1]) for exon in
                                                     di_features[tid]["features"].get("CDS", [])]))
-                            cds[cds_set].append((tid, shelf, score))
+                            cds[cds_set].append((tid, shelf, score, is_reference))
                         # Now checking the CDS
                         for cds_list in cds.values():
                             if len(cds_list) > 1:
                                 logger.debug("The following transcripts are redundant: %s",
                                              ",".join([_[0] for _ in cds_list]))
                             # TODO: we have to select things so that we prioritise transcripts correctly
-                            max_score = max([_[2] for _ in cds_list])
-                            cds_list = [_ for _ in cds_list if _[2] == max_score]
-                            to_keep = random.choice(cds_list)
+                            # First step: select things that are reference
+                            to_keep = [_ for _ in cds_list if _[3] is True]
+                            if len(to_keep) == 0:
+                                max_score = max([_[2] for _ in cds_list])
+                                cds_list = [_ for _ in cds_list if _[2] == max_score]
+                                to_keep = [random.choice(cds_list)]
+
                             for tid in cds_list:
-                                if tid != to_keep:
+                                if tid not in to_keep:
                                     logger.info("Discarding %s as redundant", tid[0])
                                 else:
                                     logger.info("Keeping %s amongst redundant transcripts", tid[0])
-
-                            tids.append(to_keep)
+                            tids.extend(to_keep)
                     else:
                         tids.extend(tid_list)
 
@@ -151,8 +158,6 @@ def perform_check(keys, shelve_stacks, args, logger):
         # with all necessary arguments aside for the lines
         partial_checker = functools.partial(
             create_transcript,
-            lenient=args.json_conf["prepare"]["lenient"],
-            # strand_specific=args.json_conf["prepare"]["strand_specific"],
             canonical_splices=args.json_conf["prepare"]["canonical"],
             logger=logger,
             force_keep_cds= not args.json_conf["prepare"]["strip_cds"])
@@ -169,6 +174,8 @@ def perform_check(keys, shelve_stacks, args, logger):
                 tobj,
                 str(args.json_conf["reference"]["genome"][chrom][key[0]-1:key[1]]),
                 key[0], key[1],
+                lenient=args.json_conf["prepare"]["lenient"],
+                is_reference=tobj["is_reference"],
                 strand_specific=tobj["strand_specific"])
             if transcript_object is None:
                 continue
@@ -195,7 +202,6 @@ def perform_check(keys, shelve_stacks, args, logger):
             os.path.basename(args.json_conf["prepare"]["files"]["out"].name),
             args.tempdir.name,
             lenient=args.json_conf["prepare"]["lenient"],
-            # strand_specific=args.json_conf["prepare"]["strand_specific"],
             canonical_splices=args.json_conf["prepare"]["canonical"],
             log_level=args.level) for _ in range(args.json_conf["prepare"]["procs"])]
 
@@ -234,6 +240,111 @@ def perform_check(keys, shelve_stacks, args, logger):
     return
 
 
+def _load_exon_lines_single_thread(args, shelve_names, logger, min_length, strip_cds):
+
+    logger.info("Starting to load lines from %d files (single-threaded)",
+                len(args.json_conf["prepare"]["files"]["gff"]))
+    previous_file_ids = collections.defaultdict(set)
+    to_do = list(zip(
+            shelve_names,
+            args.json_conf["prepare"]["files"]["labels"],
+            args.json_conf["prepare"]["files"]["strand_specific_assemblies"],
+            args.json_conf["prepare"]["files"]["reference"],
+            args.json_conf["prepare"]["files"]["gff"]))
+    logger.info("To do: %d combinations", len(to_do))
+
+    for new_shelf, label, strand_specific, is_reference, gff_name in to_do:
+        logger.info("Starting with %s", gff_name)
+        gff_handle = to_gff(gff_name)
+        found_ids = set.union(set(), *previous_file_ids.values())
+        if gff_handle.__annot_type__ == "gff3":
+            new_ids = load_from_gff(new_shelf,
+                                    gff_handle,
+                                    label,
+                                    found_ids,
+                                    logger,
+                                    min_length=min_length,
+                                    strip_cds=strip_cds and not is_reference,
+                                    is_reference=is_reference,
+                                    strand_specific=strand_specific or is_reference)
+        elif gff_handle.__annot_type__ == "bed12":
+            new_ids = load_from_bed12(new_shelf,
+                                      gff_handle,
+                                      label,
+                                      found_ids,
+                                      logger,
+                                      min_length=min_length,
+                                      strip_cds=strip_cds and not is_reference,
+                                      is_reference=is_reference,
+                                      strand_specific=strand_specific or is_reference)
+        else:
+            new_ids = load_from_gtf(new_shelf,
+                                    gff_handle,
+                                    label,
+                                    found_ids,
+                                    logger,
+                                    min_length=min_length,
+                                    strip_cds=strip_cds and not is_reference,
+                                    is_reference=is_reference,
+                                    strand_specific=strand_specific or is_reference)
+
+        previous_file_ids[gff_handle.name] = new_ids
+    return
+
+
+def _load_exon_lines_multi(args, shelve_names, logger, min_length, strip_cds, threads):
+    logger.info("Starting to load lines from %d files (using %d processes)",
+                len(args.json_conf["prepare"]["files"]["gff"]), threads)
+    submission_queue = multiprocessing.JoinableQueue(-1)
+
+    working_processes = []
+    # working_processes = [ for _ in range(threads)]
+
+    for num in range(threads):
+        proc = AnnotationParser(submission_queue,
+                                args.logging_queue,
+                                num + 1,
+                                log_level=args.level,
+                                min_length=min_length,
+                                strip_cds=strip_cds)
+        proc.start()
+        working_processes.append(proc)
+
+    # [_.start() for _ in working_processes]
+    for new_shelf, label, strand_specific, is_reference, gff_name in zip(
+            shelve_names,
+            args.json_conf["prepare"]["files"]["labels"],
+            args.json_conf["prepare"]["files"]["strand_specific_assemblies"],
+            args.json_conf["prepare"]["files"]["reference"],
+            args.json_conf["prepare"]["files"]["gff"]):
+        submission_queue.put((label, gff_name, strand_specific, is_reference, new_shelf))
+
+    submission_queue.put(("EXIT", "EXIT", "EXIT", "EXIT", "EXIT"))
+
+    [_.join() for _ in working_processes]
+
+    tid_counter = Counter()
+    for shelf in shelve_names:
+        conn = sqlite3.connect(shelf)
+        cursor = conn.cursor()
+        tid_counter.update([_[0] for _ in cursor.execute("SELECT tid FROM dump")])
+        if tid_counter.most_common()[0][1] > 1:
+            if set(args.json_conf["prepare"]["files"]["labels"]) == {""}:
+                exception = exceptions.RedundantNames(
+                    """Found redundant names during multiprocessed file analysis.
+                    Please repeat using distinct labels for your input files. Aborting.""")
+            else:
+                exception = exceptions.RedundantNames(
+                    """Found redundant names during multiprocessed file analysis, even if
+                    unique labels were provided. Please try to repeat with a different and
+                    more unique set of labels. Aborting.""")
+            logger.exception(exception)
+            raise exception
+
+    del working_processes
+    gc.collect()
+
+
 def load_exon_lines(args, shelve_names, logger, min_length=0):
 
     """This function loads all exon lines from the GFF inputs into a
@@ -255,89 +366,9 @@ def load_exon_lines(args, shelve_names, logger, min_length=0):
     strip_cds = args.json_conf["prepare"]["strip_cds"]
 
     if args.json_conf["prepare"]["single"] is True or threads == 1:
-
-        logger.info("Starting to load lines from %d files (single-threaded)",
-                    len(args.json_conf["prepare"]["files"]["gff"]))
-        previous_file_ids = collections.defaultdict(set)
-        for new_shelf, label, strand_specific, gff_name in zip(
-                shelve_names,
-                args.json_conf["prepare"]["files"]["labels"],
-                args.json_conf["prepare"]["files"]["strand_specific_assemblies"],
-                args.json_conf["prepare"]["files"]["gff"]):
-            logger.info("Starting with %s", gff_name)
-            gff_handle = to_gff(gff_name)
-            found_ids = set.union(set(), *previous_file_ids.values())
-            if gff_handle.__annot_type__ == "gff3":
-                new_ids = load_from_gff(new_shelf,
-                                        gff_handle,
-                                        label,
-                                        found_ids,
-                                        logger,
-                                        min_length=min_length,
-                                        strip_cds=strip_cds,
-                                        strand_specific=strand_specific)
-            else:
-                new_ids = load_from_gtf(new_shelf,
-                                        gff_handle,
-                                        label,
-                                        found_ids,
-                                        logger,
-                                        min_length=min_length,
-                                        strip_cds=strip_cds,
-                                        strand_specific=strand_specific)
-
-            previous_file_ids[gff_handle.name] = new_ids
+        _load_exon_lines_single_thread(args, shelve_names, logger, min_length, strip_cds)
     else:
-        logger.info("Starting to load lines from %d files (using %d processes)",
-                    len(args.json_conf["prepare"]["files"]["gff"]), threads)
-        submission_queue = multiprocessing.JoinableQueue(-1)
-
-        working_processes = []
-        # working_processes = [ for _ in range(threads)]
-
-        for num in range(threads):
-            proc = AnnotationParser(submission_queue,
-                                    args.logging_queue,
-                                    num + 1,
-                                    log_level=args.level,
-                                    min_length=min_length,
-                                    strip_cds=strip_cds)
-            proc.start()
-            working_processes.append(proc)
-
-        # [_.start() for _ in working_processes]
-        for new_shelf, label, strand_specific, gff_name in zip(
-                shelve_names,
-                args.json_conf["prepare"]["files"]["labels"],
-                args.json_conf["prepare"]["files"]["strand_specific_assemblies"],
-                args.json_conf["prepare"]["files"]["gff"]):
-
-            submission_queue.put((label, gff_name, strand_specific, new_shelf))
-
-        submission_queue.put(("EXIT", "EXIT", "EXIT", "EXIT"))
-
-        [_.join() for _ in working_processes]
-
-        tid_counter = Counter()
-        for shelf in shelve_names:
-            conn = sqlite3.connect(shelf)
-            cursor = conn.cursor()
-            tid_counter.update([_[0] for _ in cursor.execute("SELECT tid FROM dump")])
-            if tid_counter.most_common()[0][1] > 1:
-                if set(args.json_conf["prepare"]["files"]["labels"]) == {""}:
-                    exception = exceptions.RedundantNames(
-                        """Found redundant names during multiprocessed file analysis.
-                        Please repeat using distinct labels for your input files. Aborting.""")
-                else:
-                    exception = exceptions.RedundantNames(
-                        """Found redundant names during multiprocessed file analysis, even if
-                        unique labels were provided. Please try to repeat with a different and
-                        more unique set of labels. Aborting.""")
-                logger.exception(exception)
-                raise exception
-
-        del working_processes
-        gc.collect()
+        _load_exon_lines_multi(args, shelve_names, logger, min_length, strip_cds, threads)
 
     logger.info("Finished loading lines from %d files",
                 len(args.json_conf["prepare"]["files"]["gff"]))
@@ -382,6 +413,11 @@ def prepare(args, logger):
             (member in args.json_conf["prepare"]["files"]["strand_specific_assemblies"])
             for member in args.json_conf["prepare"]["files"]["gff"]]
 
+    args.json_conf["prepare"]["files"]["reference"] = [
+        (member in args.json_conf["prepare"]["files"]["reference"])
+        for member in args.json_conf["prepare"]["files"]["gff"]
+    ]
+
     shelve_names = [path_join(args.json_conf["prepare"]["files"]["output_dir"],
                               "mikado_shelf_{}.db".format(str(_).zfill(5))) for _ in
                     range(len(args.json_conf["prepare"]["files"]["gff"]))]
@@ -438,10 +474,14 @@ def prepare(args, logger):
             shelve_source_scores.append(
                 args.json_conf["prepare"]["files"]["source_score"].get(label, 0)
             )
+
         try:
-            for shelf, score in zip(shelve_names, shelve_source_scores):
+            for shelf, score, is_reference in zip(shelve_names, shelve_source_scores,
+                                    args.json_conf["prepare"]["files"]["reference"]):
+                assert isinstance(is_reference, bool)
                 conn = sqlite3.connect(shelf)
-                shelf_stacks[shelf] = {"conn": conn, "cursor": conn.cursor(), "score": score}
+                shelf_stacks[shelf] = {"conn": conn, "cursor": conn.cursor(), "score": score,
+                                       "is_reference": is_reference}
             # shelf_stacks = dict((_, shelve.open(_, flag="r")) for _ in shelve_names)
         except Exception as exc:
             raise TypeError((shelve_names, exc))
diff --git a/Mikado/subprograms/configure.py b/Mikado/subprograms/configure.py
index c3d49c7a2..deb13ecc9 100644
--- a/Mikado/subprograms/configure.py
+++ b/Mikado/subprograms/configure.py
@@ -199,12 +199,12 @@ def create_config(args):
     elif args.list:
         config["prepare"]["files"]["source_score"] = dict()
         with open(args.list) as list_file:
-            files, labels, strandedness, scores = [], [], [], []
+            files, labels, strandedness, scores, is_references = [], [], [], [], []
             files_counter = Counter()
             for line in list_file:
                 try:
                     _fields = line.rstrip().split("\t")
-                    filename, label, stranded =  _fields[:3]
+                    filename, label, stranded = _fields[:3]
 
                     if not os.path.exists(filename):
                         raise ValueError("Invalid file name: {}".format(filename))
@@ -218,6 +218,15 @@ def create_config(args):
                     else:
                         score = 0
                     scores.append(score)
+                    if len(_fields) > 4:
+                        if _fields[4] == "True":
+                            is_reference = True
+                        else:
+                            is_reference = False
+                    else:
+                        is_reference = False
+                    is_references.append(is_reference)
+
                 except ValueError as exc:
                     raise ValueError("Malformed inputs file. Error:\n{}".format(exc))
             files_counter.update(files)
@@ -234,6 +243,9 @@ def create_config(args):
             for source, score in zip(labels, scores):
                 config["prepare"]["files"]["source_score"][source] = score
 
+            config["prepare"]["files"]["reference"] = [list(labels)[_[0]] for _ in enumerate(is_references)
+                                                       if _[1] is True]
+
     elif args.no_files is True:
         for stage in ["pick", "prepare", "serialise"]:
             if "files" in config[stage]:
diff --git a/Mikado/tests/prepare_misc_test.py b/Mikado/tests/prepare_misc_test.py
index 6c494a9c8..4aa5c6700 100644
--- a/Mikado/tests/prepare_misc_test.py
+++ b/Mikado/tests/prepare_misc_test.py
@@ -218,6 +218,7 @@ def test_example_model_through_process(self):
             lines["features"] = dict()
             lines["features"]["exon"] = [(208937, 209593), (209881, 210445)]
             lines["strand_specific"] = True
+            lines["is_reference"] = False
             self.submission_queue.put((lines, lines["start"], lines["end"], 0))
             self.submission_queue.put(("EXIT", None, None, None))
             proc.start()
diff --git a/Mikado/tests/test_system_calls.py b/Mikado/tests/test_system_calls.py
index 768250ded..40eead61b 100644
--- a/Mikado/tests/test_system_calls.py
+++ b/Mikado/tests/test_system_calls.py
@@ -24,6 +24,7 @@
 from Mikado.transcripts.transcript import Namespace
 from Mikado.utilities.log_utils import create_null_logger, create_default_logger
 from Mikado.parsers.GFF import GffLine
+from Mikado.parsers.GTF import GtfLine
 from Mikado.parsers import to_gff
 from Mikado.transcripts import Transcript
 
@@ -143,8 +144,10 @@ def test_prepare_trinity_gff(self):
                 prepare.prepare(args, self.logger)
 
                 # Now that the program has run, let's check the output
-                fa = pyfaidx.Fasta(os.path.join(self.conf["prepare"]["files"]["output_dir"],
-                                                "mikado_prepared.fasta"))
+                fasta = os.path.join(self.conf["prepare"]["files"]["output_dir"],
+                                                "mikado_prepared.fasta")
+                self.assertGreater(os.stat(fasta).st_size, 0, test_file)
+                fa = pyfaidx.Fasta(fasta)
                 res = dict((_, len(fa[_])) for _ in fa.keys())
                 fa.close()
                 self.assertEqual(res, self.trinity_res)
@@ -553,6 +556,93 @@ def test_source_selection(self):
                     key = list(fa.keys())[0]
                     self.assertEqual(key, res, round)
 
+    def test_reference_selection(self):
+
+        self.conf["prepare"]["files"]["output_dir"] = outdir = tempfile.gettempdir()
+        self.conf["prepare"]["files"]["out_fasta"] = "mikado_prepared.fasta"
+        self.conf["prepare"]["files"]["out"] = "mikado_prepared.gtf"
+        self.conf["prepare"]["strip_cds"] = False
+        self.conf["prepare"]["keep_redundant"] = False
+
+        self.conf["reference"]["genome"] = self.__genomefile__.name
+
+        t = Transcript()
+        # This is *key*. Transcript T1 should never be selected, unless we are having a "lenient" analysis.
+        # However, by flagging T1 as a "reference" transcript, we should retain it
+        self.conf["prepare"]["lenient"] = False
+        t.chrom, t.start, t.end, t.strand = "Chr5", 208937, 210445, "-"
+        t.add_exons([(208937, 209593), (209881, 210445)])
+        t.id = "file1.1"
+        t.parent = "file1"
+        t.finalize()
+
+        # Likewise. We will also test that even when different scores are applied, we *still* retain all
+        # transcripts.
+        t2 = t.deepcopy()
+        t2.id = "file2.1"
+        t2.parent = "file2"
+
+        rounds = {
+            # Standard cases. I expect the transcripts to be reversed, and one to be picked
+            0: [0, 0, [], "rand", "+"],
+            1: [-2, -2, [], "rand", "+"],
+            2: [10, 10, [], "rand", "+"],
+            # T1 as reference. T1 should be kept, T1 should be on the negative strand.
+            3: [0, 0, ["T1"], sorted(["T1_file1.1"]), "-"],
+            # Both as reference. Both should be kept, both should be on the negative strand.
+            4: [1, 0, ["T1", "T2"],  sorted(["T1_file1.1", "T2_file2.1"]), "-"],
+            5: [0, 0, ["T2"], sorted(["T2_file2.1"]), "-"]
+
+        }
+
+        for fformat in ("gff3", "gtf", "bed12"):
+            t_file = tempfile.NamedTemporaryFile(mode="wt", suffix=".{}".format(fformat))
+            t2_file = tempfile.NamedTemporaryFile(mode="wt", suffix=".{}".format(fformat))
+            print(t.format(fformat), file=t_file)
+            t_file.flush()
+            print(t2.format(fformat), file=t2_file)
+            t2_file.flush()
+
+            self.conf["prepare"]["files"]["gff"] = [t_file.name, t2_file.name]
+            self.conf["prepare"]["files"]["strand_specific_assemblies"] = [t_file.name, t2_file.name]
+            self.conf["prepare"]["files"]["labels"] = ["T1", "T2"]
+
+            for round in rounds:
+                for iteration in range(4):  # Repeat each test 4 times
+                    with self.subTest(round=round, format=format, iteration=iteration,
+                                      msg="Starting round {} ({})".format(round, rounds[round])):
+                        t1_score, t2_score, is_ref, res, corr_strand = rounds[round]
+                        self.conf["prepare"]["files"]["source_score"] = {"T1": t1_score,
+                                                                         "T2": t2_score}
+                        self.conf["prepare"]["files"]["reference"] = []
+                        for label in is_ref:
+                            if label == "T1":
+                                self.conf["prepare"]["files"]["reference"].append(t_file.name)
+                            elif label == "T2":
+                                self.conf["prepare"]["files"]["reference"].append(t2_file.name)
+
+                        args = Namespace()
+                        args.strip_cds = False
+                        args.json_conf = self.conf
+                        with self.assertLogs(self.logger, "INFO") as cm:
+                            prepare.prepare(args, self.logger)
+                        self.assertGreater(os.stat(self.conf["prepare"]["files"]["out_fasta"]).st_size, 0,
+                                           (round, fformat, iteration, "\n".join(cm.output)))
+                        fa = pyfaidx.Fasta(os.path.join(outdir, os.path.basename(
+                            self.conf["prepare"]["files"]["out_fasta"])))
+                        if res != "rand":
+                            key = sorted(list(fa.keys()))
+                            self.assertEqual(key, res, round)
+                        else:
+                            self.assertEqual(len(fa.keys()), 1, (round, fa.keys(), res))
+                        gtf = os.path.join(outdir, os.path.basename(
+                            self.conf["prepare"]["files"]["out"]))
+                        strand = [_.strand for _ in to_gff(gtf)]
+                        self.assertEqual(len(set(strand)), 1, strand)
+                        self.assertEqual(set(strand).pop(), corr_strand,
+                                         (round, self.conf["prepare"]["files"]["reference"],
+                                          set(strand), corr_strand))
+
 
 class CompareCheck(unittest.TestCase):
 
diff --git a/Mikado/tests/test_transcript_checker.py b/Mikado/tests/test_transcript_checker.py
index 15c8f41c1..a35ccd002 100644
--- a/Mikado/tests/test_transcript_checker.py
+++ b/Mikado/tests/test_transcript_checker.py
@@ -360,6 +360,20 @@ def test_sequence_reversed(self):
         self.assertEqual(model.strand, "-")
         self.assertEqual(fasta, TranscriptChecker.rev_complement(seq))
 
+    def test_reference_not_flipped(self):
+
+        model = Transcript()
+        model.chrom, model.start, model.end = "Chr5", 9930, 13235
+        model.id, model.parent, model.strand = "AT5G01030.1", "AT5G01030", "-"
+        model.add_exons([(9930, 10172), (10620, 12665), (12797, 13235)])
+        model.add_exons([(10638, 12665), (12797, 13003)], features="CDS")
+        model.finalize()
+        model_fasta = self.fasta["Chr5"][model.start - 1:model.end]
+        check_model = TranscriptChecker(model, model_fasta, is_reference=True)
+        check_model.check_strand()
+        self.assertEqual(check_model.strand, "-")
+        self.assertGreater(check_model.combined_cds_length, 0)
+
 
 class StopCodonChecker(unittest.TestCase):
 
diff --git a/Mikado/transcripts/transcriptchecker.py b/Mikado/transcripts/transcriptchecker.py
index 5a89881c8..c0ec5225c 100644
--- a/Mikado/transcripts/transcriptchecker.py
+++ b/Mikado/transcripts/transcriptchecker.py
@@ -33,6 +33,7 @@ class TranscriptChecker(Transcript):
     # pylint: disable=too-many-arguments
     def __init__(self, gffline, seq,
                  strand_specific=False, lenient=False,
+                 is_reference=False,
                  canonical_splices=(("GT", "AG"), ("GC", "AG"), ("AT", "AC")),
                  force_keep_cds=False,
                  logger=None):
@@ -53,6 +54,7 @@ def __init__(self, gffline, seq,
         :type lenient: bool
         """
         self.__strand_specific = False
+        self.__is_reference = False
         self.mixed_attribute = ""
 
         if seq is None:
@@ -61,16 +63,19 @@ def __init__(self, gffline, seq,
             self.__dict__.update(gffline.__dict__)
         else:
             super().__init__(gffline)
-        self.original_strand = self.strand[:]
+        if self.strand is not None:
+            self.original_strand = self.strand[:]
+        else:
+            self.original_strand = None
         self.__set_fasta_seq(seq)
-        # self.fasta_seq = seq
-        self.strand_specific = strand_specific
         self.checked = False
-        self.lenient = lenient
         self.mixed_splices = False
         self.reversed = False
         self.canonical_splices = []
-        self.__force_keep_cds = force_keep_cds
+        self.is_reference = is_reference
+        self.__force_keep_cds = force_keep_cds or self.is_reference
+        self.lenient = lenient or self.is_reference
+        self.strand_specific = strand_specific or self.is_reference
         if not isinstance(canonical_splices, (tuple, list)):
             raise ValueError("Canonical splices should be provided as lists or tuples")
 
@@ -80,6 +85,7 @@ def __init__(self, gffline, seq,
 
         self.canonical_junctions = []
         self.logger = logger
+
     # pylint: enable=too-many-arguments
 
     @property
@@ -110,6 +116,16 @@ def rev_complement(cls, string):
 
         return Seq.reverse_complement(string)
 
+    @property
+    def is_reference(self):
+        return self.__is_reference
+
+    @is_reference.setter
+    def is_reference(self, value):
+        if not isinstance(value, bool):
+            raise TypeError("This property only accepts boolean values")
+        self.__is_reference = value
+
     @property
     def strand_specific(self):
         """
@@ -227,10 +243,15 @@ def check_strand(self):
                             canonical_counter["+"])
 
             elif canonical_counter["-"] > 0:
-                self.logger.debug("Transcript %s only has negative splice junctions, (%s), reversing.",
+                if self.is_reference is False:
+                    self.logger.debug("Transcript %s only has negative splice junctions, (%s), reversing.",
                                   self.id, canonical_counter)
-                self.reverse_strand()
-                self.reversed = True
+                    self.reverse_strand()
+                    self.reversed = True
+                else:
+                    self.logger.debug(
+                        "Transcript %s only has negative splice junctions, but as it is a reference \
+                        we will not reverse it")
 
             if canonical_counter["+"] >= canonical_counter["-"] or (
                             max(canonical_counter["-"], canonical_counter["+"]) > 0 and self.strand_specific is True):