Skip to content

Commit

Permalink
resolve merge conflict
Browse files Browse the repository at this point in the history
  • Loading branch information
Manuel committed May 24, 2018
2 parents 5a6a8e0 + d71d26c commit 5a303a8
Show file tree
Hide file tree
Showing 34 changed files with 1,588 additions and 28 deletions.
48 changes: 48 additions & 0 deletions docs/source/code_inc/access_run_info.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
.. code-block:: python

#!/usr/bin/env python
import sys
import pymzml


def main(mzml_file):
'''
Basic example script to access basic run info of an mzML file. Requires a
mzML file as first command line argument.

usage:

./access_run_info.py <path_to_mzml_file>

>>> run.info =
{
'encoding': 'utf-8',
'file_name': '/Users/joe/Dev/pymzml_2.0/tests/data/BSA1.mzML.gz',
'file_object': <pymzml.file_interface.FileInterface object at 0x1039a3f28>,
'obo_version': '1.1.0',
'offset_dict': None,
'run_id': 'ru_0',
'spectrum_count': 1684,
'start_time': '2009-08-09T22:32:31'
}

'''
run = pymzml.run.Reader(mzml_file)
print(
'''
Summary for mzML file:
{file_name}
Run was measured on {start_time} using obo version {obo_version}
File contains {spectrum_count} spectra
'''.format(
**run.info
)
)
if __name__ == '__main__':
if len(sys.argv) < 2:
print(main.__doc__)
exit()
mzml_file = sys.argv[1]
main(mzml_file)
37 changes: 37 additions & 0 deletions docs/source/code_inc/batch_compress_to_igzip.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
.. code-block:: python

#!/usr/bin/env python3.4

import sys
import os
from pymzml.utils.utils import index_gzip
from pymzml.run import Reader


def main(mzml_path):
"""
Create and indexed gzip mzML file from a plain mzML.

Usage: python3 gzip_mzml.py <path/to/mzml> <path/to/output>
"""
with open(mzml_path) as fin:
fin.seek(0,2)
max_offset_len = fin.tell()
max_spec_no = Reader(mzml_path).get_spectrum_count() + 10

out_path = '{0}.gz'.format(mzml_path)
index_gzip(
mzml_path,
out_path,
max_idx = max_spec_no,
idx_len = len(str(max_offset_len))
)
print('Wrote file {0}'.format(out_path))

if __name__ == '__main__':
if len(sys.argv) > 1:
main(
sys.argv[1],
)
else:
print(main.__doc__)
70 changes: 70 additions & 0 deletions docs/source/code_inc/compare_spectra.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
.. code-block:: python

#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import print_function
import pymzml
import os


def main():
"""
Compare multiple spectra and return the cosine distance between them.
The returned value is between 0 and 1, a returned value of 1
represents highest similarity.

usage:

./compare_spectra.py

"""
example_file = os.path.join(
os.path.dirname(__file__),
os.pardir,
'tests',
'data',
'example.mzML'
)
print(
"""
Comparing spectra
"""
)
# print(example_file)
run = pymzml.run.Reader(example_file)
tmp = []
for spec in run:
if spec.ms_level == 1:
print(
"Parsing spectrum lvl 1 has id {0}".format(
spec.ID
)
)
tmp.append( spec )
if len(tmp) >= 3:
break

print(
"Print total number of specs collected {0}".format(
len(tmp)
)
)
for compare_tuples in [ (0, 1), (0, 2), (1, 2) ]:
print(
"Cosine between spectra {0} & {1} is {2:1.4f}".format(
compare_tuples[0] + 1,
compare_tuples[1] + 1,
tmp[ compare_tuples[0] ].similarity_to( tmp[ compare_tuples[1] ] )
)
)

print(
"Cosine score between first spectrum against itself: {0:1.4f}".format(
tmp[0].similarity_to(tmp[0])
)
)


if __name__ == '__main__':
main()
36 changes: 36 additions & 0 deletions docs/source/code_inc/compress_all_files_in_folder_2_igzip.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
.. code-block:: python

#!/usr/bin/env python3.4

import sys
import os
from pymzml.utils.utils import index_gzip
# from pymzml.run import Reader
import pymzml
import glob


def main(folder):
"""
Create an indexed gzip mzML file from a plain mzML of all files in folder

Usage: python3 gzip_mzml.py <folder>
"""

for mzml_path in glob.glob(os.path.join(folder, '*.mzML')):
print('Processing file: {0}'.format(mzml_path))
with open(mzml_path) as fin:
fin.seek(0,2)
max_offset_len = fin.tell()
max_spec_no = pymzml.run.Reader(mzml_path).get_spectrum_count() + 10
out_path = '{0}.gz'.format(mzml_path)
index_gzip(
mzml_path,
out_path,
max_idx = max_spec_no,
idx_len = len(str(max_offset_len))
)
print('Wrote file {0}'.format(out_path))

if __name__ == '__main__':
main(sys.argv[1])
48 changes: 48 additions & 0 deletions docs/source/code_inc/deprecation_check.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
.. code-block:: python

#!/usr/bin/env python3

from __future__ import print_function
import pymzml
import os


def main():
"""
Testscript to highlight the function name changes in the Spectrum class.

Note:
Please adjust any old scripts to the new syntax.

usage:

./deprecation_check.py

"""

example_file = os.path.join(
os.path.dirname(__file__),
os.pardir,
'tests',
'data',
'example.mzML'
)
run = pymzml.run.Reader(example_file,)
spectrum_list = [ ]
for pos, spectrum in enumerate(run):
spectrum_list.append(spectrum)
spectrum.hasPeak( ( 813.19073486 ) )
spectrum.extremeValues( 'mz' )
spectrum.hasOverlappingPeak( 813.19073486 )
spectrum.highestPeaks( 1 )
spectrum.estimatedNoiseLevel()
spectrum.removeNoise()
spectrum.transformMZ( 813.19073486 )
if pos == 1:
spectrum.similarityTo(
spectrum_list[0]
)
break

if __name__ == '__main__':
main()
120 changes: 120 additions & 0 deletions docs/source/code_inc/download_obo_database.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
.. code-block:: python

#!/usr/bin/env python

from __future__ import print_function
from argparse import ArgumentParser
import os
import re
import subprocess
import tempfile
import shutil
import sys

'''Download all versions of the psidev OBO for mzML files'''

CVS_SERVER = ':pserver:anonymous:''@psidev.cvs.sourceforge.net:/cvsroot/psidev'


class CVSClient(object):

def __init__(self, root, client_dir=None, verbose=False):
self.root = root

self.client_dir = client_dir
self.cleanup = False
self.verbose = verbose

def __enter__(self):
if not self.client_dir or not os.path.isdir(self.client_dir):
self.client_dir = tempfile.mkdtemp(prefix='cvs')
self.cleanup = True

return self

def __exit__(self, exc_type, exc_value, traceback):
if self.cleanup:
shutil.rmtree(self.client_dir, ignore_errors=True)

def __call__(self, *args, **kwargs):
command = ['cvs', '-d' + self.root] + list(args)

# Default args for subprocess
processkwargs = dict(cwd=self.client_dir)

if not self.verbose:
command.insert(1, '-Q')
processkwargs['stderr'] = open(os.devnull, 'w')

# Override defaults with provided args
processkwargs.update(kwargs)

if self.verbose:
print('>>> ' + ' '.join(command), file=sys.stderr)

return subprocess.check_output(command, **processkwargs)


def get_version_map(cvs):
filename = 'psi/psi-ms/mzML/controlledVocabulary/psi-ms.obo'

# Download the OBO file and get its revision log
cvs('-z3', 'co', filename)
revisions = parse_revisions(cvs('-z3', 'log').split('\n'))

version_map = {}
for revision in revisions:
file_at_revision = cvs('-z3', 'co', '-p', '-r', revision, filename)
version = parse_version(file_at_revision)

# Revisions go from newest to oldest, so if a version exists in the
# dictionary, it's already the newest revision of that version
if version and version not in version_map:
version_map[version] = file_at_revision

return version_map


def parse_revisions(revision_log):
revisions = []
revision_regexp = re.compile(r'revision (\d+\.\d+)')
for line in revision_log:
match = revision_regexp.match(line)
if match:
revisions.append(match.group(1))
return revisions


def parse_version(file_string):
version_regexp = re.compile(r'remark:\s+version: (\d+\.\d+\.\d+\S*)')
version = None
for line in file_string.split('\n'):
match = version_regexp.match(line)
if match:
version = match.group(1)

return version


def save_versions(version_map, destination):
for version, file_at_version in version_map.iteritems():
destination_path = os.path.join(destination, 'psi-ms-{0}.obo'.format(version))
with open(destination_path, 'w+') as destination_file:
destination_file.write(file_at_version)

if __name__ == '__main__':
parser = ArgumentParser(description=__doc__)
parser.add_argument('destination', help='directory into which the OBO files go')
parser.add_argument('-v', '--verbose', action='store_true',
help='show extra logging information')
args = parser.parse_args()

# Sanity checking
assert os.path.isdir(args.destination), 'destination must be a valid directory'

with CVSClient(CVS_SERVER, verbose=args.verbose) as cvs:
cvs('login')
revision_map = get_version_map(cvs)
save_versions(revision_map, args.destination)

# vim: ts=4:sw=4:sts=4
Loading

0 comments on commit 5a303a8

Please sign in to comment.