From d37f8a5d7008f108afa889e5c0e963284061ddd5 Mon Sep 17 00:00:00 2001 From: Luca Venturini Date: Mon, 18 Feb 2019 12:20:18 +0000 Subject: [PATCH] Fixed #148; also, now metrics/score files will correctly report whether a transcript was "reference" to begin with. --- Mikado/loci/superlocus.py | 5 +- Mikado/picking/picker.py | 4 +- Mikado/transcripts/transcript.py | 43 +++- .../transcript_methods/finalizing.py | 7 +- Singularity.def | 206 ++++++++++++++++++ 5 files changed, 248 insertions(+), 17 deletions(-) create mode 100644 Singularity.def diff --git a/Mikado/loci/superlocus.py b/Mikado/loci/superlocus.py index a725fff9b..c59ffbc5f 100644 --- a/Mikado/loci/superlocus.py +++ b/Mikado/loci/superlocus.py @@ -1146,7 +1146,7 @@ def define_loci(self): use_transcript_scores=self._use_transcript_scores, stranded=self.stranded, verified_introns=self.locus_verified_introns, - logger = self.logger, + logger=self.logger, source=self.source ) else: @@ -1163,6 +1163,7 @@ def define_loci(self): self.logger.debug("Removing %s (primary: %s) as it has no reference transcripts", lid, self.loci[lid].primary_transcript_id) del self.loci[lid] + self.logger.debug("Remaining loci in %s: %s", self.id, ",".join(list(self.loci.keys()))) return @@ -1428,4 +1429,4 @@ def id(self) -> str: @property def lost_transcripts(self): - return self.__lost.copy() \ No newline at end of file + return self.__lost.copy() diff --git a/Mikado/picking/picker.py b/Mikado/picking/picker.py index 90a04fee1..0c503b8db 100644 --- a/Mikado/picking/picker.py +++ b/Mikado/picking/picker.py @@ -399,9 +399,9 @@ def __print_gff_headers(self, locus_out, score_keys): session = sqlalchemy.orm.sessionmaker(bind=engine)() dbutils.DBBASE.metadata.create_all(engine) - metrics = Superlocus.available_metrics[3:] + metrics = Superlocus.available_metrics[4:] metrics.extend(["external.{}".format(_.source) for _ in session.query(ExternalSource.source).all()]) - metrics = Superlocus.available_metrics[:3] + sorted(metrics) + metrics = Superlocus.available_metrics[:4] + sorted(metrics) if self.sub_out != '': assert isinstance(self.sub_out, str) diff --git a/Mikado/transcripts/transcript.py b/Mikado/transcripts/transcript.py index 53f61a76c..3ff90e713 100644 --- a/Mikado/transcripts/transcript.py +++ b/Mikado/transcripts/transcript.py @@ -259,6 +259,7 @@ def __init__(self, *args, # Mock setting of base hidden variables self.__id = "" self.__finalized = False # Flag. We do not want to repeat the finalising more than once. + self.__source = None self._first_phase = None self.__logger = None self.__strand = self.__score = None @@ -295,7 +296,7 @@ def __init__(self, *args, # Starting settings for everything else self.__chrom = None - self.source = source + self.source = self._original_source = source self.feature = "transcript" self.__start, self.__end = None, None self.attributes = dict() @@ -331,12 +332,12 @@ def __init__(self, *args, self.__cds_introntree = IntervalTree() self._possibly_without_exons = False self._accept_undefined_multi = accept_undefined_multi - self.__is_reference = False if len(args) == 0: return else: self.__initialize_with_line(args[0]) + self._original_source = self.source self.feature = intern(self.feature) @@ -838,16 +839,18 @@ def get_internal_orf_beds(self): yield new_row - @property + @Metric def is_reference(self): """Checks whether the transcript has been marked as reference by Mikado prepare""" - return self.__is_reference - @is_reference.setter - def is_reference(self, value): - if value not in (False, True): - raise TypeError("The 'is_reference' attribute must be boolean") - self.__is_reference = value + if self.json_conf is not None: + return self.original_source in self.json_conf.get("prepare", {}).get("files", {}).get( + "reference", {}) + else: + return False + + is_reference.category = "External" + is_reference.rtype = "bool" @property def frames(self): @@ -1205,6 +1208,7 @@ def load_dict(self, state): state[key] = intern(state[key]) setattr(self, key, state[key]) + self._original_source = self.source self.attributes = {} for key, val in state["attributes"].items(): if val in ["True", "False"]: @@ -1378,7 +1382,7 @@ def get_available_metrics(cls) -> list: # inspect.getmembers(cls))) # assert "tid" in metrics and "parent" in metrics and "score" in metrics _metrics = sorted([metric for metric in metrics]) - final_metrics = ["tid", "alias", "parent", "score"] + _metrics + final_metrics = ["tid", "alias", "parent", "original_source", "score"] + _metrics return final_metrics # ###################Class properties################################## @@ -1558,6 +1562,23 @@ def parent(self, parent): if self.__parent: self.__parent = [intern(_) for _ in self.__parent] + @property + def source(self): + """Source of the transcript. Equivalent to the second field in the GFF/GTF files.""" + return self.__source + + @source.setter + def source(self, source): + if source is not None and not isinstance(source, str): + raise TypeError("Source values must be strings or None!") + self.__source = source + + @property + def original_source(self): + """This property returns the original source assigned to the transcript (before Mikado assigns its own final + source value).""" + return self._original_source + @property def gene(self): @@ -2681,7 +2702,7 @@ def source_score(self): if self.json_conf is not None: return self.json_conf.get("prepare", {}).get("files", {}).get( - "source_score", {}).get(self.source, 0) + "source_score", {}).get(self.original_source, 0) else: return 0 diff --git a/Mikado/transcripts/transcript_methods/finalizing.py b/Mikado/transcripts/transcript_methods/finalizing.py index 3865ac403..b7434c6d0 100644 --- a/Mikado/transcripts/transcript_methods/finalizing.py +++ b/Mikado/transcripts/transcript_methods/finalizing.py @@ -731,9 +731,12 @@ def finalize(transcript): transcript.logger.debug("No predetermined has_stop_codon attribute for %s. Attributes: %s", transcript.id, transcript.attributes) - for prop in transcript.attributes: + for prop in list(transcript.attributes.keys()): if hasattr(transcript, prop): - setattr(transcript, prop, transcript.attributes[prop]) + try: + setattr(transcript, prop, transcript.attributes[prop]) + except AttributeError: # Some instance attributes CANNOT be set from the attributes of the GTF + transcript.attributes.pop(prop) # transcript = __calc_cds_introns(transcript) diff --git a/Singularity.def b/Singularity.def new file mode 100644 index 000000000..2899d8604 --- /dev/null +++ b/Singularity.def @@ -0,0 +1,206 @@ +Bootstrap: docker +From: centos:7 +OSversion: 7 +Include: yum wget +# If you want the updates (available at the bootstrap date) to be installed +# inside the container during the bootstrap instead of the General Availability +# point release (7.x) then uncomment the following line + +# UpdateURL: http://yum-repos.hpccluster/centos/7/updates/$basearch/ + +%test + python3.6 --version + pip3.6 --version + gcc --version + make --version + + python3.6 -c "import numpy" + + +%environment + export PATH="/usr/local/bin:$PATH:/usr/local/conda/bin/" + source /usr/local/conda/bin/activate + export AUGUSTUS_CONFIG_PATH=/opt/augustus-3.3/config/ + export TRINITY_HOME=/usr/local/bin/trinityrnaseq/ + export PATH=$PATH:${TRINITY_HOME} + alias python=python3.6 + + +%post + mkdir /jic + mkdir /nbi + mkdir /tgac + mkdir /ei + mkdir /tsl + mkdir /opt/software + + ### Install your packages ### + cd /opt/software/ + + yum -y install centos-release-scl + yum -y install scl-utils + yum -y install devtoolset-7-gcc* + scl enable devtoolset-7 bash + which gcc + gcc --version + mkdir -p /etc/profile.d/ + echo -e '#!/bin/bash\nsource scl_source enable devtoolset-7' > /etc/profile.d/enablegcc7.sh + + gcc --version + make --version + + # Clean up yum + yum clean all + + version=2.5.1 + cd /usr/local/src + wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh + bash Miniconda3-latest-Linux-x86_64.sh -b -p /usr/local/conda + export PATH="/usr/local/conda/bin:$PATH" + source activate + conda install -y -c conda-forge pip + + python3 --version + cd $(dirname $(which python3)) + cd /opt/software/ + + # Install python requirements + wget https://github.com/lucventurini/mikado/blob/1.2.4/requirements.txt + cat requirements.txt + pip3 install -r requirements.txt + + # Now install mikado + pip3.6 install --prefix=/usr/local/ mikado==1.2.4 + # pip3.6 install --prefix=/usr/local/ networkit==4.6 # High-performance networks in Python + mikado --help + + # Finally install EiAnnot + git clone --recursive https://github.com/lucventurini/ei-annotation.git + cd ei-annotation; + git checkout 0.0.2; + cd eicore; + python3 setup.py bdist_wheel; + pip3.6 install --prefix=/usr/local/ -U dist/*whl; + cd ../; + python3 setup.py bdist_wheel; + pip install --prefix=/usr/local/ -U dist/*whl; + cd /opt/software/ + + +%apprun snakemake + snakemake "@" + +%apprun hisat2-build + hisat2-build "@" + +%apprun jupyter + jupyter "@" + +%apprun gmap-build + gmap-build "@" + +%apprun gmap + gmap "@" + +%apprun gsnap + gsnap "@" + +%apprun mikado + mikado "@" + +%apprun samtools + samtools "@" + +%apprun portcullis + portcullis "@" + +%apprun junctools + junctools "@" + +%apprun stringtie + stringtie "@" + +%apprun class + class "@" + +%apprun hisat2 + hisat2 "@" + +%apprun augustus + /opt/augustus-3.3/bin/augustus "@" + +%apprun filterBam + /opt/augustus-3.3/bin/filterBam "@" + +%apprun aln2wig + /opt/augustus-3.3/bin/aln2wig "@" + +%apprun joingenes + /opt/augustus-3.3/bin/joingenes "@" + +%apprun etraining + /opt/augustus-3.3/bin/etraining "@" + +%apprun bam2hints + /opt/augustus-3.3/bin/bam2hints "@" + +%apprun fastBlockSearch + /opt/augustus-3.3/bin/fastBlockSearch "@" + +%apprun homGeneMapping + /opt/augustus-3.3/bin/homGeneMapping "@" + +%apprun prepareAlign + prepareAlign "@" + +%apprun utrrnaseq + utrrnaseq "@" + +%apprun paftools + k8 $(which paftools.js) "@" + +%apprun k8 + k8 "@" + +%apprun eiannot + eiannot "@" + +%apprun strawberry + strawberry "@" + +%apprun class + class "@" + +%apprun scallop + scallop "@" + +%apprun ruby + ruby "@" + +%apprun STAR + STAR "@" + +%apprun STARlong + STARlong "@" + +%apprun diamond + diamond "@" + +%apprun prinseq + /usr/local/bin/prinseq-lite-0.20.4/prinseq-lite.pl "@" + +%apprun "prinseq-lite.pl" + /usr/local/bin/prinseq-lite-0.20.4/prinseq-lite.pl "@" + +%apprun gt + gt "@" + +%apprun gffread + gffread "@" + +%apprun cufflinks + cufflinks "@" + +%apprun cuffcompare + cuffcompare "@" +