-
Notifications
You must be signed in to change notification settings - Fork 92
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
34 changed files
with
1,588 additions
and
28 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
.. code-block:: python | ||
|
||
#!/usr/bin/env python | ||
import sys | ||
import pymzml | ||
|
||
|
||
def main(mzml_file): | ||
''' | ||
Basic example script to access basic run info of an mzML file. Requires a | ||
mzML file as first command line argument. | ||
|
||
usage: | ||
|
||
./access_run_info.py <path_to_mzml_file> | ||
|
||
>>> run.info = | ||
{ | ||
'encoding': 'utf-8', | ||
'file_name': '/Users/joe/Dev/pymzml_2.0/tests/data/BSA1.mzML.gz', | ||
'file_object': <pymzml.file_interface.FileInterface object at 0x1039a3f28>, | ||
'obo_version': '1.1.0', | ||
'offset_dict': None, | ||
'run_id': 'ru_0', | ||
'spectrum_count': 1684, | ||
'start_time': '2009-08-09T22:32:31' | ||
} | ||
|
||
''' | ||
run = pymzml.run.Reader(mzml_file) | ||
print( | ||
''' | ||
Summary for mzML file: | ||
{file_name} | ||
Run was measured on {start_time} using obo version {obo_version} | ||
File contains {spectrum_count} spectra | ||
'''.format( | ||
**run.info | ||
) | ||
) | ||
if __name__ == '__main__': | ||
if len(sys.argv) < 2: | ||
print(main.__doc__) | ||
exit() | ||
mzml_file = sys.argv[1] | ||
main(mzml_file) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
.. code-block:: python | ||
|
||
#!/usr/bin/env python3.4 | ||
|
||
import sys | ||
import os | ||
from pymzml.utils.utils import index_gzip | ||
from pymzml.run import Reader | ||
|
||
|
||
def main(mzml_path): | ||
""" | ||
Create and indexed gzip mzML file from a plain mzML. | ||
|
||
Usage: python3 gzip_mzml.py <path/to/mzml> <path/to/output> | ||
""" | ||
with open(mzml_path) as fin: | ||
fin.seek(0,2) | ||
max_offset_len = fin.tell() | ||
max_spec_no = Reader(mzml_path).get_spectrum_count() + 10 | ||
|
||
out_path = '{0}.gz'.format(mzml_path) | ||
index_gzip( | ||
mzml_path, | ||
out_path, | ||
max_idx = max_spec_no, | ||
idx_len = len(str(max_offset_len)) | ||
) | ||
print('Wrote file {0}'.format(out_path)) | ||
|
||
if __name__ == '__main__': | ||
if len(sys.argv) > 1: | ||
main( | ||
sys.argv[1], | ||
) | ||
else: | ||
print(main.__doc__) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
.. code-block:: python | ||
|
||
#!/usr/bin/env python | ||
# -*- coding: utf-8 -*- | ||
|
||
from __future__ import print_function | ||
import pymzml | ||
import os | ||
|
||
|
||
def main(): | ||
""" | ||
Compare multiple spectra and return the cosine distance between them. | ||
The returned value is between 0 and 1, a returned value of 1 | ||
represents highest similarity. | ||
|
||
usage: | ||
|
||
./compare_spectra.py | ||
|
||
""" | ||
example_file = os.path.join( | ||
os.path.dirname(__file__), | ||
os.pardir, | ||
'tests', | ||
'data', | ||
'example.mzML' | ||
) | ||
print( | ||
""" | ||
Comparing spectra | ||
""" | ||
) | ||
# print(example_file) | ||
run = pymzml.run.Reader(example_file) | ||
tmp = [] | ||
for spec in run: | ||
if spec.ms_level == 1: | ||
print( | ||
"Parsing spectrum lvl 1 has id {0}".format( | ||
spec.ID | ||
) | ||
) | ||
tmp.append( spec ) | ||
if len(tmp) >= 3: | ||
break | ||
|
||
print( | ||
"Print total number of specs collected {0}".format( | ||
len(tmp) | ||
) | ||
) | ||
for compare_tuples in [ (0, 1), (0, 2), (1, 2) ]: | ||
print( | ||
"Cosine between spectra {0} & {1} is {2:1.4f}".format( | ||
compare_tuples[0] + 1, | ||
compare_tuples[1] + 1, | ||
tmp[ compare_tuples[0] ].similarity_to( tmp[ compare_tuples[1] ] ) | ||
) | ||
) | ||
|
||
print( | ||
"Cosine score between first spectrum against itself: {0:1.4f}".format( | ||
tmp[0].similarity_to(tmp[0]) | ||
) | ||
) | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
36 changes: 36 additions & 0 deletions
36
docs/source/code_inc/compress_all_files_in_folder_2_igzip.inc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
.. code-block:: python | ||
|
||
#!/usr/bin/env python3.4 | ||
|
||
import sys | ||
import os | ||
from pymzml.utils.utils import index_gzip | ||
# from pymzml.run import Reader | ||
import pymzml | ||
import glob | ||
|
||
|
||
def main(folder): | ||
""" | ||
Create an indexed gzip mzML file from a plain mzML of all files in folder | ||
|
||
Usage: python3 gzip_mzml.py <folder> | ||
""" | ||
|
||
for mzml_path in glob.glob(os.path.join(folder, '*.mzML')): | ||
print('Processing file: {0}'.format(mzml_path)) | ||
with open(mzml_path) as fin: | ||
fin.seek(0,2) | ||
max_offset_len = fin.tell() | ||
max_spec_no = pymzml.run.Reader(mzml_path).get_spectrum_count() + 10 | ||
out_path = '{0}.gz'.format(mzml_path) | ||
index_gzip( | ||
mzml_path, | ||
out_path, | ||
max_idx = max_spec_no, | ||
idx_len = len(str(max_offset_len)) | ||
) | ||
print('Wrote file {0}'.format(out_path)) | ||
|
||
if __name__ == '__main__': | ||
main(sys.argv[1]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
.. code-block:: python | ||
|
||
#!/usr/bin/env python3 | ||
|
||
from __future__ import print_function | ||
import pymzml | ||
import os | ||
|
||
|
||
def main(): | ||
""" | ||
Testscript to highlight the function name changes in the Spectrum class. | ||
|
||
Note: | ||
Please adjust any old scripts to the new syntax. | ||
|
||
usage: | ||
|
||
./deprecation_check.py | ||
|
||
""" | ||
|
||
example_file = os.path.join( | ||
os.path.dirname(__file__), | ||
os.pardir, | ||
'tests', | ||
'data', | ||
'example.mzML' | ||
) | ||
run = pymzml.run.Reader(example_file,) | ||
spectrum_list = [ ] | ||
for pos, spectrum in enumerate(run): | ||
spectrum_list.append(spectrum) | ||
spectrum.hasPeak( ( 813.19073486 ) ) | ||
spectrum.extremeValues( 'mz' ) | ||
spectrum.hasOverlappingPeak( 813.19073486 ) | ||
spectrum.highestPeaks( 1 ) | ||
spectrum.estimatedNoiseLevel() | ||
spectrum.removeNoise() | ||
spectrum.transformMZ( 813.19073486 ) | ||
if pos == 1: | ||
spectrum.similarityTo( | ||
spectrum_list[0] | ||
) | ||
break | ||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
.. code-block:: python | ||
|
||
#!/usr/bin/env python | ||
|
||
from __future__ import print_function | ||
from argparse import ArgumentParser | ||
import os | ||
import re | ||
import subprocess | ||
import tempfile | ||
import shutil | ||
import sys | ||
|
||
'''Download all versions of the psidev OBO for mzML files''' | ||
|
||
CVS_SERVER = ':pserver:anonymous:''@psidev.cvs.sourceforge.net:/cvsroot/psidev' | ||
|
||
|
||
class CVSClient(object): | ||
|
||
def __init__(self, root, client_dir=None, verbose=False): | ||
self.root = root | ||
|
||
self.client_dir = client_dir | ||
self.cleanup = False | ||
self.verbose = verbose | ||
|
||
def __enter__(self): | ||
if not self.client_dir or not os.path.isdir(self.client_dir): | ||
self.client_dir = tempfile.mkdtemp(prefix='cvs') | ||
self.cleanup = True | ||
|
||
return self | ||
|
||
def __exit__(self, exc_type, exc_value, traceback): | ||
if self.cleanup: | ||
shutil.rmtree(self.client_dir, ignore_errors=True) | ||
|
||
def __call__(self, *args, **kwargs): | ||
command = ['cvs', '-d' + self.root] + list(args) | ||
|
||
# Default args for subprocess | ||
processkwargs = dict(cwd=self.client_dir) | ||
|
||
if not self.verbose: | ||
command.insert(1, '-Q') | ||
processkwargs['stderr'] = open(os.devnull, 'w') | ||
|
||
# Override defaults with provided args | ||
processkwargs.update(kwargs) | ||
|
||
if self.verbose: | ||
print('>>> ' + ' '.join(command), file=sys.stderr) | ||
|
||
return subprocess.check_output(command, **processkwargs) | ||
|
||
|
||
def get_version_map(cvs): | ||
filename = 'psi/psi-ms/mzML/controlledVocabulary/psi-ms.obo' | ||
|
||
# Download the OBO file and get its revision log | ||
cvs('-z3', 'co', filename) | ||
revisions = parse_revisions(cvs('-z3', 'log').split('\n')) | ||
|
||
version_map = {} | ||
for revision in revisions: | ||
file_at_revision = cvs('-z3', 'co', '-p', '-r', revision, filename) | ||
version = parse_version(file_at_revision) | ||
|
||
# Revisions go from newest to oldest, so if a version exists in the | ||
# dictionary, it's already the newest revision of that version | ||
if version and version not in version_map: | ||
version_map[version] = file_at_revision | ||
|
||
return version_map | ||
|
||
|
||
def parse_revisions(revision_log): | ||
revisions = [] | ||
revision_regexp = re.compile(r'revision (\d+\.\d+)') | ||
for line in revision_log: | ||
match = revision_regexp.match(line) | ||
if match: | ||
revisions.append(match.group(1)) | ||
return revisions | ||
|
||
|
||
def parse_version(file_string): | ||
version_regexp = re.compile(r'remark:\s+version: (\d+\.\d+\.\d+\S*)') | ||
version = None | ||
for line in file_string.split('\n'): | ||
match = version_regexp.match(line) | ||
if match: | ||
version = match.group(1) | ||
|
||
return version | ||
|
||
|
||
def save_versions(version_map, destination): | ||
for version, file_at_version in version_map.iteritems(): | ||
destination_path = os.path.join(destination, 'psi-ms-{0}.obo'.format(version)) | ||
with open(destination_path, 'w+') as destination_file: | ||
destination_file.write(file_at_version) | ||
|
||
if __name__ == '__main__': | ||
parser = ArgumentParser(description=__doc__) | ||
parser.add_argument('destination', help='directory into which the OBO files go') | ||
parser.add_argument('-v', '--verbose', action='store_true', | ||
help='show extra logging information') | ||
args = parser.parse_args() | ||
|
||
# Sanity checking | ||
assert os.path.isdir(args.destination), 'destination must be a valid directory' | ||
|
||
with CVSClient(CVS_SERVER, verbose=args.verbose) as cvs: | ||
cvs('login') | ||
revision_map = get_version_map(cvs) | ||
save_versions(revision_map, args.destination) | ||
|
||
# vim: ts=4:sw=4:sts=4 |
Oops, something went wrong.