Permalink
Browse files

Merge pull request #10 from f-taruttis/developtar

NewMassbankParser: Yet another bug fix, compound name now included; PreProcessSpectra: new layout for merged Spectra
  • Loading branch information...
2 parents 868da47 + eadcadb commit 7ded24301041b6b422f6a22ef8546378bdde92a0 s-wolf committed Nov 14, 2011
View
19 src/de/ipbhalle/metfrag/massbankParser/DatabaseIDs.java
@@ -0,0 +1,19 @@
+package de.ipbhalle.metfrag.massbankParser;
+
+
+public enum DatabaseIDs {
+
+ CHEBI , KEGG , PUBCHEM_CID , PUBCHEM_SID , KNAPSACK , METLIN;
+
+ @Override public String toString(){
+
+ String s = super.toString();
+
+ if(s.equals("PUBCHEM_CID") || s.equals("PUBCHEM_SID"))
+ {
+ s= s.replace("_", " ");
+ }
+
+ return s;
+ }
+}
View
111 src/de/ipbhalle/metfrag/massbankParser/NewMassbankParser.java
@@ -35,6 +35,8 @@
import java.util.TreeMap;
import java.util.Vector;
+import javax.xml.crypto.Data;
+
import de.ipbhalle.metfrag.spectrum.AssignFragmentPeak;
@@ -169,11 +171,12 @@
String formula = "";
String name = "", instrument = "", precursorType = "";
String nameTrivial = "";
- int linkPubChem = 0;
- String linkCHEBI = "";
- String linkKEGG = "none";
- String linkMetlin="none";
- String linkKnapSack ="none";
+// int linkPubChem = 0;
+// int sid=0;
+// String linkCHEBI = "";
+// String linkKEGG = "none";
+// String linkMetlin="none";
+// String linkKnapSack ="none";
String[] array;
String IUPAC = "";
int mode = 0, collisionEnergy = 0;
@@ -192,6 +195,8 @@
boolean isPositive = false;
+ Map<DatabaseIDs, String> dbLinks= new HashMap<DatabaseIDs, String>();
+ DatabaseIDs[] allIDs =DatabaseIDs.values();
Map<String, ArrayList<String> > recordSpecificInformation = new HashMap<String, ArrayList<String> >();
@@ -269,23 +274,24 @@
while(line.contains(PK$))
{
peakInformation = addElementsToMap(line, peakInformation);
-
- line = in.readLine();
-
+
if(line.contains("PK$PEAK:"))
{
readPeaks=true;
line = in.readLine();
}
-
+ else
+ {
+ line=in.readLine();
+ }
}
//read Peaks
if(line.equals("")) break;
-
-
+//
+// System.out.println(line+"\t"+readPeaks);
while(!line.contains("//") && readPeaks)/* && line!=null*/
@@ -295,6 +301,7 @@
String splitString[] = new String[line.split("\\s+").length];
splitString=line.split("\\s+");
+
peaks.add(new Peak(Double.valueOf(splitString[1]), Double.valueOf(splitString[2]), Double.valueOf(splitString[3]), collisionEnergy));
line = in.readLine();
@@ -333,9 +340,9 @@
name = record.get(CH$).get(CH$+CH.NAME.toString()).get(0);
- if(record.get(CH$).get(CH$+CH.NAME.toString()).size() > 1)
+ if(record.get(CH$).get(CH$+CH.NAME.toString()).size() > 0)
{
- nameTrivial = record.get(CH$).get(CH$+CH.NAME.toString()).get(1);
+ nameTrivial = record.get(CH$).get(CH$+CH.NAME.toString()).get(0);
}
if(record.get(AC$).containsKey(AC$+AC.INSTRUMENT.toString()) )
@@ -366,29 +373,53 @@
for (String ch : chs) {
- String[] dblink = ch.split("\\s+");
-
- if (dblink[1].equals("PUBCHEM")) {
-
- String splitString[] = new String[dblink[2].split(":").length];
-
- splitString = dblink[2].split(":");
-
- if(splitString.length>=2)
+// String[] dblink = ch.split("\\s+");
+
+
+// if (dblink[1].equals("PUBCHEM")) {
+//
+// String splitString[] = new String[dblink[2].split(":").length];
+//
+// splitString = dblink[2].split(":");
+//
+// if(splitString.length>=2 && dblink[1].equals("CID"))
+// {
+// linkPubChem = Integer.valueOf(splitString[1]).intValue();
+// }
+// else
+// {
+// if(splitString.length>=2 && dblink[1].equals("SID"))
+// {
+// sid= Integer.valueOf(splitString[1]).intValue();
+// }
+// }
+// } else if (dblink[1].equals("KEGG") && dblink.length >=3)
+// linkKEGG = dblink[2];
+// else if (dblink[1].equals("CHEBI") && dblink.length >=3)
+// linkCHEBI = dblink[2];
+// else if (dblink[1].contains("METLIN") && dblink.length>=3)
+// linkMetlin=dblink[2];
+// else if (dblink[1].contains("KNAPSACK") && dblink.length>=3)
+// linkKnapSack=dblink[2];
+
+ for (int i = 0; i < allIDs.length; i++) {
+
+ if(allIDs[i]!=null && ch.contains(allIDs[i].toString()))
{
- linkPubChem = Integer.valueOf(splitString[1]).intValue();
+ String link = ch.replace(allIDs[i].toString(), "");
+ link = link.replace(":", "");
+ link=link.trim();
+
+ dbLinks.put(allIDs[i], link);
+
}
- } else if (dblink[1].equals("KEGG") && dblink.length >=3)
- linkKEGG = dblink[2];
- else if (dblink[1].equals("CHEBI") && dblink.length >=3)
- linkCHEBI = dblink[2];
- else if (dblink[1].contains("METLIN") && dblink.length>=3)
- linkMetlin=dblink[2];
- else if (dblink[1].contains("KNAPSACK") && dblink.length>=3)
- linkKnapSack=dblink[2];
-
+
+ }
+
+
}
}
+
// ANALYTICAL CONDITIONS MAP
Map<String, ArrayList<String>> acmap = record.get(AC$);
@@ -426,7 +457,7 @@ else if (dblink[1].contains("KNAPSACK") && dblink.length>=3)
{
String[] splitString2 = new String[splitString[3].split("-").length];
splitString2= splitString[3].split("-");
-
+ //TODO: divide between minimum and maximum energy
collisionEnergy = Integer.valueOf(splitString2[0]).intValue();
collisionEnergy = Integer.valueOf(splitString2[1]).intValue();
@@ -502,7 +533,9 @@ else if (dblink[1].contains("KNAPSACK") && dblink.length>=3)
}
- spectra.add(new Spectrum(collisionEnergy, peaks, mass, mode, IUPAC, linkPubChem, linkKEGG, linkCHEBI,linkMetlin , linkKnapSack ,nameTrivial, formula, precursorMZ, precursorType, isPositive, smiles));
+ //TODO: add sid to spectra, or better create a Map <CompoundDatabases, (ID)String>
+
+ spectra.add(new Spectrum(collisionEnergy, peaks, mass, mode, IUPAC, dbLinks,nameTrivial, formula, precursorMZ, precursorType, isPositive, smiles));
return spectra;
}
@@ -619,15 +652,17 @@ public static void main(String[] args) {
//Vector<Spectrum> spectra = Read("/home/ftarutti/records/PR100040.txt");
//Vector<Spectrum> spectra = Read("/home/ftarutti/records/PB006007.txt");
- Vector<Spectrum> spectra = Read("/home/ftarutti/testspectra/tmp/CO000510.txt");
+
+
+
+ Vector<Spectrum> spectra = Read("/home/ftarutti/testspectra/toMerge/XX006704.txt");
for (Spectrum spectrum : spectra) {
spectrum.show();
Vector<Peak> peaks = spectrum.getPeaks();
- for (Peak peak : peaks) {
- System.out.println(peak.toString());
- }
+ System.out.println(spectrum.dblinks.toString());
+
}
View
74 src/de/ipbhalle/metfrag/massbankParser/Spectrum.java
@@ -21,10 +21,15 @@
package de.ipbhalle.metfrag.massbankParser;
import java.util.Comparator;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
import java.util.Vector;
import java.util.Collections;
import java.util.Iterator;
+import org.eclipse.swt.internal.cde.DtActionArg;
+
public class Spectrum implements java.io.Serializable, Comparable<Spectrum> {
@@ -48,6 +53,60 @@
private String smiles;
+ public Map<DatabaseIDs,String> dblinks;
+
+ public Spectrum(int collisionEnergy, Vector<Peak> peaks, double exactMass, int mode, String InchI, Map<DatabaseIDs,String> dblinks ,String nameTrivial, String formula, double precursorMZ, String precursorType, boolean isPositive,String smiles){
+ this( collisionEnergy, 0.0, peaks, exactMass, mode, InchI, dblinks, nameTrivial, formula, precursorMZ, precursorType, isPositive,smiles);
+ }
+
+ public Spectrum(int collisionEnergy, double tic, Vector<Peak> peaks, double exactMass, int mode, String InchI, Map<DatabaseIDs,String> dblinks, String nameTrivial, String formula, double precursorMZ, String precursorType, boolean isPositive,String smiles)
+ {
+ this.tic = tic;
+ this.peaks = peaks;
+ this.exactMass = exactMass;
+ this.mode = mode;
+ this.InchI = InchI;
+
+ this.nameTrivial = nameTrivial;
+ this.precursorMZ = precursorMZ;
+ this.precursorType = precursorType;
+ this.setFormula(formula);
+ this.isPositive = isPositive;
+
+ this.smiles = smiles;
+
+ this.dblinks=dblinks;
+
+ //TODO: perhaps delete the following things
+ this.CID=0;
+ this.KEGG="none";
+ this.CHEBI="";
+ this.metlin="";
+ this.knapsack="";
+ if(dblinks.containsKey(DatabaseIDs.PUBCHEM_CID))
+ {
+ this.CID = Integer.parseInt(dblinks.get(DatabaseIDs.PUBCHEM_CID));
+ }
+ if(dblinks.containsKey(DatabaseIDs.KEGG))
+ {
+ this.KEGG=dblinks.get(DatabaseIDs.KEGG);
+ }
+ if(dblinks.containsKey(DatabaseIDs.CHEBI))
+ {
+ this.CHEBI= dblinks.get(DatabaseIDs.CHEBI);
+ }
+ if(dblinks.containsKey(DatabaseIDs.METLIN))
+ {
+ this.metlin= dblinks.get(DatabaseIDs.METLIN);
+ }
+ if(dblinks.containsKey(DatabaseIDs.KNAPSACK))
+ {
+ this.knapsack=dblinks.get(DatabaseIDs.KNAPSACK);
+ }
+
+
+ }
+
public Spectrum(int collisionEnergy, double tic, Vector<Peak> peaks, double exactMass, int mode, String InchI, int CID, String KEGG, String CHEBI,String metlin,String knapsack, String nameTrivial, String formula, double precursorMZ, String precursorType, boolean isPositive, String smiles){
this.collisionEnergy = collisionEnergy;
@@ -70,6 +129,7 @@ public Spectrum(int collisionEnergy, double tic, Vector<Peak> peaks, double exac
this.smiles = smiles;
}
+
public Spectrum(int collisionEnergy, double tic, Vector<Peak> peaks, double exactMass, int mode, String InchI, int CID, String KEGG, String CHEBI,String metlin, String nameTrivial, String formula, double precursorMZ, String precursorType, boolean isPositive){
this.collisionEnergy = collisionEnergy;
@@ -89,6 +149,8 @@ public Spectrum(int collisionEnergy, double tic, Vector<Peak> peaks, double exac
this.CHEBI = CHEBI;
}
+
+
public Spectrum(int collisionEnergy, Vector<Peak> peaks, double exactMass, int mode, String InchI, int CID, String KEGG, String linkCHEBI,String linkMetlin, String linkKnapsack ,String nameTrivial, String formula, double precursorMZ, String precursorType, boolean isPositive,String smiles){
this( collisionEnergy, 0.0, peaks, exactMass, mode, InchI, CID, KEGG, linkCHEBI,linkMetlin,linkKnapsack, nameTrivial, formula, precursorMZ, precursorType, isPositive,smiles);
}
@@ -266,6 +328,18 @@ public String getSmiles(){
return smiles;
}
+ public String getDatabaseID(DatabaseIDs id)
+ {
+ if(dblinks.containsKey(id))
+ {
+ return dblinks.get(id);
+ }
+ else
+ {
+ return null;
+ }
+ }
+
public void show()
{
System.out.println("coll energy "+collisionEnergy);
View
156 src/de/ipbhalle/metfrag/spectrum/PreprocessSpectra.java
@@ -35,6 +35,7 @@
import de.ipbhalle.metfrag.massbankParser.Peak;
+import de.ipbhalle.metfrag.massbankParser.Spectrum;
import de.ipbhalle.metfrag.tools.PPMTool;
@@ -57,6 +58,10 @@
*/
private void preprocessUnsorted(String folder, double mzabs, double mzppm)
{
+
+ String selectedPrecursorTypes ="[M+H]+";
+ int selectedMode = 1;
+
//loop over all files in folder
File f = new File(folder);
File files[] = f.listFiles();
@@ -68,29 +73,54 @@ private void preprocessUnsorted(String folder, double mzabs, double mzppm)
int temp = 0;
+ String recordName ="";
+ String accession="";
+
+ String collisionEnergy="";
Map<String, List<File>> pubchemToFiles = new HashMap<String, List<File>>();
- for(int i=0; i < files.length; i++)
- {
- if(files[i].isFile())
- {
- WrapperSpectrum spectrum = new WrapperSpectrum(files[i].toString());
- if(pubchemToFiles.containsKey(Integer.toString(spectrum.getCID())))
- pubchemToFiles.get(Integer.toString(spectrum.getCID())).add(files[i]);
- else
- {
- List<File> fileList = new ArrayList<File>();
- fileList.add(files[i]);
- pubchemToFiles.put(Integer.toString(spectrum.getCID()), fileList);
+
+ boolean nameNotSet=true;
+
+ for (int i = 0; i < files.length; i++) {
+ if (files[i].isFile()) {
+ WrapperSpectrum spectrum = new WrapperSpectrum(
+ files[i].toString());
+
+ if (spectrum.isPositive()
+ || spectrum.getMode() == selectedMode
+ || spectrum.getPrecursorType().equals(
+ selectedPrecursorTypes)) {
+ if (pubchemToFiles.containsKey(Integer.toString(spectrum
+ .getCID())))
+ pubchemToFiles.get(Integer.toString(spectrum.getCID()))
+ .add(files[i]);
+ else {
+ List<File> fileList = new ArrayList<File>();
+ fileList.add(files[i]);
+ pubchemToFiles.put(Integer.toString(spectrum.getCID()),
+ fileList);
+ }
+
+ if (nameNotSet) {
+
+ recordName = spectrum.getTrivialName() + ";"
+ + spectrum.getFormula() + ";"
+ + selectedPrecursorTypes + "; MERGED";
+ nameNotSet = false;
+ }
+
+ collisionEnergy+=spectrum.getCollisionEnergy()+";";
}
-
- System.out.println(files[i].toString() + spectrum.getCID());
+
}
+
}
for (String pubchemID : pubchemToFiles.keySet()) {
String mergedNames = "";
+
Vector<WrapperSpectrum> spectra = new Vector<WrapperSpectrum>();
String lastFile = "";
@@ -107,6 +137,7 @@ private void preprocessUnsorted(String folder, double mzabs, double mzppm)
mergedNames += file.getName().split("\\.")[0];
+ accession+=file.getName().split("\\.")[0]+";";
lastFile = file.toString();
}
@@ -120,13 +151,69 @@ private void preprocessUnsorted(String folder, double mzabs, double mzppm)
try
{
BufferedReader reader = new BufferedReader(new FileReader(lastFile));
- line += reader.readLine() + "\n";
+
+ String current = reader.readLine();
+
+ if(current.contains("ACCESSION:"))
+ {
+ line+=("ACCESSION: "+accession + "\n");
+ }
+ else{
+ if(current.contains("RECORD_TITLE: "))
+ {
+ line+=("RECORD_TITLE: "+recordName+"\n");
+ }
+ else{
+ line+=(current+"\n");
+ }
+ }
+
while (line != null && !line.contains("PK$NUM_PEAK:")){
+
String currentLine = reader.readLine();
- if(currentLine.contains("PK$NUM_PEAK:"))
- break;
- else
- line += currentLine + "\n";
+
+ if (currentLine.contains("ACCESSION:")) {
+ currentLine = ("ACCESSION: " + accession + "\n");
+ line += currentLine;
+ } else {
+ if (currentLine.contains("RECORD_TITLE: ")) {
+ currentLine = ("RECORD_TITLE: " + recordName + "\n");
+ line += currentLine;
+ } else {
+
+ if (currentLine
+ .contains("AC$ANALYTICAL_CONDITION: COLLISION_ENERGY ")) {
+ String en[] = new String[collisionEnergy
+ .split(";").length];
+ int energies[] = new int[collisionEnergy
+ .split(";").length ];
+
+ en = collisionEnergy.split(";");
+ for (int i = 0; i < en.length ; i++) {
+ energies[i] = Integer.parseInt(en[i]);
+
+ }
+
+ int min = min(energies);
+ int max = max(energies);
+
+ currentLine = "AC$ANALYTICAL_CONDITION: COLLISION_ENERGY "
+ + min + "-" + max + " eV \n";
+ line += currentLine;
+ } else {
+ if (currentLine.contains("PK$NUM_PEAK:")) {
+ break;
+ } else
+ line += currentLine + "\n";
+ }
+ }
+ }
+
+
+// if(currentLine.contains("PK$NUM_PEAK:"))
+// break;
+// else
+// line += currentLine+"\n" ;
}
line += "PK$NUM_PEAK: " + mergedPeaks.size() + "\n";
line += "PK$PEAK: m/z int. rel.int.\n";
@@ -262,12 +349,10 @@ private void preprocess(String folder, double threshold)
this.peaksIntensity = new HashMap<Double, Vector<Double>>();
this.peaksRelIntensity = new HashMap<Double, Vector<Double>>();
for (int i = 0; i < spectra.size(); i++) {
- System.out.println("Spectra number: "+i);
- System.out.println(spectra.get(i).toString());
+
Vector<Peak> tempPeaks = spectra.get(i).getPeakList();
for (int j = 0; j < tempPeaks.size(); j++) {
//peaks.add(tempPeaks.get(j).getMass());
- System.out.println(tempPeaks.get(j).toString());
peaksIntensity = addToMap(peaksIntensity, tempPeaks.get(j).getMass(), tempPeaks.get(j).getIntensity());
peaksRelIntensity = addToMap(peaksRelIntensity, tempPeaks.get(j).getMass(),tempPeaks.get(j).getRelIntensity());
}
@@ -445,6 +530,33 @@ else if(temp.size() > 1)
return map;
}
+ public static int max(final int vec[])
+ {
+ int max=vec[0];
+ for(int i=1;i<vec.length;i++)
+ {
+ if(vec[i]>max)
+ {
+ max=vec[i];
+ }
+ }
+
+ return max;
+ }
+
+ public static int min(final int vec[])
+ {
+ int min=vec[0];
+ for(int i=1;i<vec.length;i++)
+ {
+ if(vec[i]<min)
+ {
+ min=vec[i];
+ }
+ }
+
+ return min;
+ }
public static void main(String[] args) {
// String folder = "/home/swolf/MassBankData/MetFragSunGrid/BrukerRawData/Processed/";
View
9 src/de/ipbhalle/metfrag/spectrum/WrapperSpectrum.java
@@ -25,6 +25,7 @@
import java.io.IOException;
import java.sql.SQLException;
import java.util.Iterator;
+import java.util.Map;
import java.util.Vector;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -73,6 +74,7 @@
private boolean isPositive;
private String smiles;
+ private Map<DatabaseIDs, String> dblinks;
/**
* Reads in a MassBank flat file from a given location.
@@ -100,6 +102,8 @@ public WrapperSpectrum(String filename){
this.isPositive = spectra.get(0).isPositive();
this.smiles=spectra.get(0).getSmiles();
+
+ this.dblinks=spectra.get(0).dblinks;
}
@@ -573,6 +577,11 @@ public String getSmiles(){
return smiles;
}
+ public Map<DatabaseIDs,String> getDBLinks()
+ {
+ return this.dblinks;
+ }
+
public static void main(String[] args) {
WrapperSpectrum spectrum = new WrapperSpectrum("/home/swolf/MassBankData/TestSpectra/HillMerged/CO000056CO000057CO000058CO000059CO000060.txt");
System.out.println(spectrum.toString());

0 comments on commit 7ded243

Please sign in to comment.