In [36]:
import sys
import os
import numpy as np

In [37]:
os_type = "linux"
#os_type = "windows"

In [38]:
if os_type == "windows":
    import subprocess, linecache

In [39]:
htmlpage = '''
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html>
 <head>
    <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
    
    <title>Lorikeet Spectrum Viewer</title>
    
    <!--[if IE]><script language="javascript" type="text/javascript" src="js/excanvas.min.js"></script><![endif]-->
    <script type="text/javascript" src="http://ajax.googleapis.com/ajax/libs/jquery/1.4.2/jquery.min.js"></script>
    <script type="text/javascript" src="http://ajax.googleapis.com/ajax/libs/jqueryui/1.8.4/jquery-ui.min.js"></script>
    <script type="text/javascript" src="js/jquery.flot.js"></script>
    <script type="text/javascript" src="js/jquery.flot.selection.js"></script>
    
    <script type="text/javascript" src="js/specview.js"></script>
    <script type="text/javascript" src="js/peptide.js"></script>
    <script type="text/javascript" src="js/aminoacid.js"></script>
    <script type="text/javascript" src="js/ion.js"></script>
    
    <link REL="stylesheet" TYPE="text/css" HREF="css/lorikeet.css">
    
</head>

<body>

<h1>Lorikeet Plugin Example</h1>

<!-- PLACE HOLDER DIV FOR THE SPECTRUM -->
<div id="lorikeet1"></div>

<script type="text/javascript">

$(document).ready(function () {

	/* render the spectrum with the given options */
	$("#lorikeet1").specview({sequence: sequence, 
								charge: charge,
								massError: 0.02,
								precursorMz: precursorMz,
								variableMods: varMods, 
								ntermMod: ctermMod_,
								ctermMod: ctermMod_,
								peaks: peaks,
                                massError : 20,
                                massErrorUnit: "ppm",
                                peakDetect: 0     
								});	
});

'''

In [40]:
# Parsing an MS2 spectrum (title) from an MGF file (mgf)
# As spectrum files can be quite large, I use findstr to read the lines first
# This only really speeds up things if the next query is on the same MGF file
# TODO: We should think about how to optimize this for ionbot.cloud
def get_spectrum(mgf, title):
    #TODO: We might want to put this loading outside
    command = "grep -A 5000 %s %s > out"%(title,mgf)
    os.system(command)
    charge = 0
    spectrum = "["
    with open("out") as f:
        for c in f.readlines():
            if c == "": 
                continue
            if "END IONS" in c: break
            if "PEPMASS=" in c:
                parent_mz = c[8:]
            if "CHARGE" in c:
                charge = c[7:9].replace("+","")
            if not "=" in c:
                tmp = c.split(" ")
                spectrum += "[%s,%s],"%(tmp[0],tmp[1])
    spectrum = spectrum[:-1]
    spectrum += "]"
    os.remove("out")
    return spectrum, charge, parent_mz

def get_varmods(peptide, modifications):
    mods = []
    nterm_mod = "0"
    cterm_mod = "0"
    m = modifications.split("|")
    for i in range(0,len(m),2):
        if m[i] == 0:
            nterm_mod = m[i+1]
        elif m[i] == -1:
            cterm_mod = m[i+1]
        else:
            mods.append("{index: %i, modMass: %s, aminoAcid: '%s'}"%(m[i],m[i+1],peptide[[i]-1]))
    return mods, nterm_mod, cterm_mod

def run_annotation(mgf_file,title,sequence,modifications):
    spectrum, charge, parent_mz = get_spectrum(mgf_file, title)
    if spectrum == "]":
        print("spectrum not found")

    varmods_list = []
    nterm_mod = "0"
    cterm_mod = "0"
    if modifications != "":
        varmods_list, nterm_mod, cterm_mod = get_varmods(sequence, modifications)

    with open("annotations/" + sequence+title+'.html','w') as f:
        f.write(htmlpage+'\n')
        f.write('var sequence = "%s";\n'%sequence)
        f.write('var peaks = %s;\n'%spectrum)
        f.write('var charge = %s;\n'%charge)
        #f.write('var precursorMz = %s;\n'%parent_mz)
        f.write('var precursorMz = 0;\n')
        f.write('var varMods = [];\n')
        for i,mod in enumerate(varmods_list):
            f.write("varMods[%i] = %s\n"%(i,mod))
        if nterm_mod != "0":
            f.write("ntermMod_ = %s;\n"%(nterm_mod))
        if cterm_mod != "0":
            f.write("ctermMod_ = %s;\n"%(cterm_mod))
        f.write('</script></body></html>\n')
    
    print("Annotation written to annotations/{}.".format(sequence+title+'.html'))


# INPUT

In [41]:
#Specifiy the path to the MGF spectrum file
mgf_file 		= "/home/compomics/extra_disk/data/PXD006675/mgf/20160901_QEp2_SoDo_SA_LC12-13_PV8-frac1.mgf"

#Specify the spectrum title
#This should be a string that is in the title and uniquely selects it
title	= "scan=5814"

#Specify the peptide sequence
sequence = "KYMDEINKR"

#Specify modifications
#Each modification is formatted as P|D with P the position in the peptide and D the delta mass
#Positions start with position 1, 0 is for N-term and -1 is for C-term modifications
#Multiple modifications in a peptide are seprated with '|' (e.g. P|D|P|D)
modifications = ""
#modifications 	= "3|15.9949"

# OUTPUT

In [42]:
run_annotation(mgf_file,title,sequence,modifications)

Annotation written to annotations/KYEDEINKRscan=5814.html.
