resolve merge conflict

pymzml · May 24, 2018 · 5a303a8 · 5a303a8
2 parents 5a6a8e0 + d71d26c
commit 5a303a8
Show file tree

Hide file tree

Showing 34 changed files with 1,588 additions and 28 deletions.
diff --git a/docs/source/code_inc/access_run_info.inc b/docs/source/code_inc/access_run_info.inc
@@ -0,0 +1,48 @@
+.. code-block:: python
+
+	#!/usr/bin/env python
+	import sys
+	import pymzml
+
+
+	def main(mzml_file):
+	    '''
+	    Basic example script to access basic run info of an mzML file. Requires a
+	    mzML file as first command line argument.
+
+	    usage:
+
+	        ./access_run_info.py <path_to_mzml_file>
+
+	    >>> run.info =
+	            {
+	                'encoding': 'utf-8',
+	                 'file_name': '/Users/joe/Dev/pymzml_2.0/tests/data/BSA1.mzML.gz',
+	                 'file_object': <pymzml.file_interface.FileInterface object at 0x1039a3f28>,
+	                 'obo_version': '1.1.0',
+	                 'offset_dict': None,
+	                 'run_id': 'ru_0',
+	                 'spectrum_count': 1684,
+	                 'start_time': '2009-08-09T22:32:31'
+	             }
+
+	    '''
+	    run = pymzml.run.Reader(mzml_file)
+	    print(
+	        '''
+	Summary for mzML file:
+	    {file_name}
+	Run was measured on {start_time} using obo version {obo_version}
+	File contains {spectrum_count} spectra
+	        '''.format(
+	            **run.info
+	        )
+	    )
+	
+	
+	if __name__ == '__main__':
+	    if len(sys.argv) < 2:
+	        print(main.__doc__)
+	        exit()
+	    mzml_file = sys.argv[1]
+	    main(mzml_file)
diff --git a/docs/source/code_inc/batch_compress_to_igzip.inc b/docs/source/code_inc/batch_compress_to_igzip.inc
@@ -0,0 +1,37 @@
+.. code-block:: python
+
+	#!/usr/bin/env python3.4
+
+	import sys
+	import os
+	from pymzml.utils.utils import index_gzip
+	from pymzml.run import Reader
+
+
+	def main(mzml_path):
+	    """
+	    Create and indexed gzip mzML file from a plain mzML.
+
+	    Usage: python3 gzip_mzml.py <path/to/mzml> <path/to/output>
+	    """
+	    with open(mzml_path) as fin:
+	        fin.seek(0,2)
+	        max_offset_len = fin.tell()
+	        max_spec_no    = Reader(mzml_path).get_spectrum_count() + 10
+
+	    out_path = '{0}.gz'.format(mzml_path)
+	    index_gzip(
+	        mzml_path,
+	        out_path,
+	        max_idx = max_spec_no,
+	        idx_len = len(str(max_offset_len))
+	    )
+	    print('Wrote file {0}'.format(out_path))
+
+	if __name__ == '__main__':
+	    if len(sys.argv) > 1:
+	        main(
+	            sys.argv[1],
+	        )
+	    else:
+	        print(main.__doc__)
diff --git a/docs/source/code_inc/compare_spectra.inc b/docs/source/code_inc/compare_spectra.inc
@@ -0,0 +1,70 @@
+.. code-block:: python
+
+	#!/usr/bin/env python
+	# -*- coding: utf-8 -*-
+
+	from __future__ import print_function
+	import pymzml
+	import os
+
+
+	def main():
+	    """
+	    Compare multiple spectra and return the cosine distance between them.
+	    The returned value is between 0 and 1, a returned value of 1
+	    represents highest similarity.
+
+	    usage:
+
+	        ./compare_spectra.py
+
+	    """
+	    example_file = os.path.join(
+	        os.path.dirname(__file__),
+	        os.pardir,
+	        'tests',
+	        'data',
+	        'example.mzML'
+	    )
+	    print(
+	        """
+	            Comparing spectra
+	        """
+	    )
+	    # print(example_file)
+	    run = pymzml.run.Reader(example_file)
+	    tmp = []
+	    for spec in run:
+	        if spec.ms_level == 1:
+	            print(
+	                "Parsing spectrum lvl 1 has id {0}".format(
+	                    spec.ID
+	                )
+	            )
+	            tmp.append( spec )
+	            if len(tmp) >= 3:
+	                break
+
+	    print(
+	        "Print total number of specs collected {0}".format(
+	            len(tmp)
+	        )
+	    )
+	    for compare_tuples in [ (0, 1), (0, 2), (1, 2) ]:
+	        print(
+	            "Cosine between spectra {0} & {1} is {2:1.4f}".format(
+	                compare_tuples[0] + 1,
+	                compare_tuples[1] + 1,
+	                tmp[ compare_tuples[0] ].similarity_to( tmp[ compare_tuples[1] ] )
+	            )
+	        )
+
+	    print(
+	        "Cosine score between first spectrum against itself: {0:1.4f}".format(
+	            tmp[0].similarity_to(tmp[0])
+	        )
+	    )
+
+
+	if __name__ == '__main__':
+	    main()
diff --git a/docs/source/code_inc/compress_all_files_in_folder_2_igzip.inc b/docs/source/code_inc/compress_all_files_in_folder_2_igzip.inc
@@ -0,0 +1,36 @@
+.. code-block:: python
+
+	#!/usr/bin/env python3.4
+
+	import sys
+	import os
+	from pymzml.utils.utils import index_gzip
+	# from pymzml.run import Reader
+	import pymzml
+	import glob
+
+
+	def main(folder):
+	    """
+	    Create an indexed gzip mzML file from a plain mzML of all files in folder
+
+	    Usage: python3 gzip_mzml.py <folder>
+	    """
+
+	    for mzml_path in glob.glob(os.path.join(folder, '*.mzML')):
+	        print('Processing file: {0}'.format(mzml_path))
+	        with open(mzml_path) as fin:
+	            fin.seek(0,2)
+	            max_offset_len = fin.tell()
+	            max_spec_no    = pymzml.run.Reader(mzml_path).get_spectrum_count() + 10
+	        out_path = '{0}.gz'.format(mzml_path)
+	        index_gzip(
+	            mzml_path,
+	            out_path,
+	            max_idx = max_spec_no,
+	            idx_len = len(str(max_offset_len))
+	        )
+	        print('Wrote file {0}'.format(out_path))
+
+	if __name__ == '__main__':
+	    main(sys.argv[1])
diff --git a/docs/source/code_inc/deprecation_check.inc b/docs/source/code_inc/deprecation_check.inc
@@ -0,0 +1,48 @@
+.. code-block:: python
+
+	#!/usr/bin/env python3
+
+	from __future__ import print_function
+	import pymzml
+	import os
+
+
+	def main():
+	    """
+	    Testscript to highlight the function name changes in the Spectrum class.
+
+	    Note:
+	        Please adjust any old scripts to the new syntax.
+
+	    usage:
+
+	        ./deprecation_check.py
+
+	    """
+
+	    example_file = os.path.join(
+	        os.path.dirname(__file__),
+	        os.pardir,
+	        'tests',
+	        'data',
+	        'example.mzML'
+	    )
+	    run = pymzml.run.Reader(example_file,)
+	    spectrum_list = [ ]
+	    for pos, spectrum in enumerate(run):
+	        spectrum_list.append(spectrum)
+	        spectrum.hasPeak( ( 813.19073486 ) )
+	        spectrum.extremeValues( 'mz' )
+	        spectrum.hasOverlappingPeak( 813.19073486  )
+	        spectrum.highestPeaks( 1 )
+	        spectrum.estimatedNoiseLevel()
+	        spectrum.removeNoise()
+	        spectrum.transformMZ( 813.19073486 )
+	        if pos == 1:
+	            spectrum.similarityTo(
+	                spectrum_list[0]
+	            )
+	            break
+
+	if __name__ == '__main__':
+	    main()
diff --git a/docs/source/code_inc/download_obo_database.inc b/docs/source/code_inc/download_obo_database.inc
@@ -0,0 +1,120 @@
+.. code-block:: python
+
+	#!/usr/bin/env python
+
+	from __future__ import print_function
+	from argparse import ArgumentParser
+	import os
+	import re
+	import subprocess
+	import tempfile
+	import shutil
+	import sys
+
+	'''Download all versions of the psidev OBO for mzML files'''
+
+	CVS_SERVER = ':pserver:anonymous:''@psidev.cvs.sourceforge.net:/cvsroot/psidev'
+
+
+	class CVSClient(object):
+
+	    def __init__(self, root, client_dir=None, verbose=False):
+	        self.root = root
+
+	        self.client_dir = client_dir
+	        self.cleanup = False
+	        self.verbose = verbose
+
+	    def __enter__(self):
+	        if not self.client_dir or not os.path.isdir(self.client_dir):
+	            self.client_dir = tempfile.mkdtemp(prefix='cvs')
+	            self.cleanup = True
+
+	        return self
+
+	    def __exit__(self, exc_type, exc_value, traceback):
+	        if self.cleanup:
+	            shutil.rmtree(self.client_dir, ignore_errors=True)
+
+	    def __call__(self, *args, **kwargs):
+	        command = ['cvs', '-d' + self.root] + list(args)
+
+	        # Default args for subprocess
+	        processkwargs = dict(cwd=self.client_dir)
+
+	        if not self.verbose:
+	            command.insert(1, '-Q')
+	            processkwargs['stderr'] = open(os.devnull, 'w')
+
+	        # Override defaults with provided args
+	        processkwargs.update(kwargs)
+
+	        if self.verbose:
+	            print('>>> ' + ' '.join(command), file=sys.stderr)
+
+	        return subprocess.check_output(command, **processkwargs)
+
+
+	def get_version_map(cvs):
+	    filename = 'psi/psi-ms/mzML/controlledVocabulary/psi-ms.obo'
+
+	    # Download the OBO file and get its revision log
+	    cvs('-z3', 'co', filename)
+	    revisions = parse_revisions(cvs('-z3', 'log').split('\n'))
+
+	    version_map = {}
+	    for revision in revisions:
+	        file_at_revision = cvs('-z3', 'co', '-p', '-r', revision, filename)
+	        version = parse_version(file_at_revision)
+
+	        # Revisions go from newest to oldest, so if a version exists in the
+	        # dictionary, it's already the newest revision of that version
+	        if version and version not in version_map:
+	            version_map[version] = file_at_revision
+
+	    return version_map
+
+
+	def parse_revisions(revision_log):
+	    revisions = []
+	    revision_regexp = re.compile(r'revision (\d+\.\d+)')
+	    for line in revision_log:
+	        match = revision_regexp.match(line)
+	        if match:
+	            revisions.append(match.group(1))
+	    return revisions
+
+
+	def parse_version(file_string):
+	    version_regexp = re.compile(r'remark:\s+version: (\d+\.\d+\.\d+\S*)')
+	    version = None
+	    for line in file_string.split('\n'):
+	        match = version_regexp.match(line)
+	        if match:
+	            version = match.group(1)
+
+	    return version
+
+
+	def save_versions(version_map, destination):
+	    for version, file_at_version in version_map.iteritems():
+	        destination_path = os.path.join(destination, 'psi-ms-{0}.obo'.format(version))
+	        with open(destination_path, 'w+') as destination_file:
+	            destination_file.write(file_at_version)
+
+	if __name__ == '__main__':
+	    parser = ArgumentParser(description=__doc__)
+	    parser.add_argument('destination', help='directory into which the OBO files go')
+	    parser.add_argument('-v', '--verbose', action='store_true',
+	                        help='show extra logging information')
+	    args = parser.parse_args()
+
+	    # Sanity checking
+	    assert os.path.isdir(args.destination), 'destination must be a valid directory'
+
+	    with CVSClient(CVS_SERVER, verbose=args.verbose) as cvs:
+	        cvs('login')
+	        revision_map = get_version_map(cvs)
+	        save_versions(revision_map, args.destination)
+
+	# vim: ts=4:sw=4:sts=4