Add subject type option

lszeremeta · Apr 3, 2021 · 00ed6fb · 00ed6fb
1 parent 33ab519
commit 00ed6fb
Show file tree

Hide file tree

Showing 5 changed files with 365 additions and 65 deletions.
diff --git a/README.md b/README.md
@@ -93,6 +93,7 @@ Running SDFEater without parameters displays help.
 * `-i,--input <arg>` - input SDF file path (required)
 * `-f,--format <arg>` - output format (e.g. `cypher`, `jsonld`, `cvme`, `smiles`, `inchi`) (required; full list below)
 * `-p,--periodic` - add additional atoms data from [periodic table](https://github.com/lszeremeta/SDFEater/blob/master/src/main/resources/pl/edu/uwb/ii/sdfeater/periodic_table.json) (for `cypher` output format)
+* `-s,--subject <arg>` - subject type (`iri`, `uuid`, `bnode`; `iri` by default; for all formats excluding cypher, cvme, smiles, inchi)
 * `-u,--urls` - try to generate full database URLs instead of IDs (for `cypher` output format, always enabled in `cvme`)
 
 Remember about the appropriate file path when using Docker image. Suppose you mounted your local directory `/home/user/input` under `/app/input` and the path to the SDF file you want to use in SDFEater is `/home/user/input/file.sdf`. In this case, enter the path `/app/input/file.sdf` or `input/file.sdf` as the value of the `-i` argument.

diff --git a/src/main/java/pl/edu/uwb/ii/sdfeater/File.java b/src/main/java/pl/edu/uwb/ii/sdfeater/File.java
@@ -58,9 +58,10 @@ class File {
      * appropriate program structures
      *
      * @param molecule Molecule object to which values from the file will be entered
-     * @param format   Output format from Format enum
+     * @param format   Output format
+     * @param subject  Subject type
      */
-    void parse(Molecule molecule, SDFEater.Format format) {
+    void parse(Molecule molecule, SDFEater.Format format, SDFEater.Subject subject) {
         try {
             FileInputStream fstream = new FileInputStream(filename);
             BufferedReader br = new BufferedReader(new InputStreamReader(fstream));
@@ -156,13 +157,13 @@ void parse(Molecule molecule, SDFEater.Format format) {
                             case jsonldhtml:
                             case rdfxml:
                             case rdfthrift:
-                                molecule.addToJenaModel();
+                                molecule.addToJenaModel(subject);
                                 break;
                             case rdfa:
-                                molecule.printRDFaMolecule();
+                                molecule.printRDFaMolecule(subject);
                                 break;
                             case microdata:
-                                molecule.printMicrodataMolecule();
+                                molecule.printMicrodataMolecule(subject);
                                 break;
                             default:
                                 break;

diff --git a/src/main/java/pl/edu/uwb/ii/sdfeater/Molecule.java b/src/main/java/pl/edu/uwb/ii/sdfeater/Molecule.java
@@ -324,9 +324,20 @@ void printChemSKOSMolecule() {
 
     /**
      * Add main molecule data to Jena model
+     *
+     * @param subject subject type
      */
-    void addToJenaModel() {
-        Resource me = ResourceFactory.createResource("http://example.com/molecule#entity" + createID());
+    void addToJenaModel(SDFEater.Subject subject) {
+        Resource me = ResourceFactory.createResource();
+
+        if (subject == SDFEater.Subject.iri) {
+            me = ResourceFactory.createResource("http://example.com/molecule#entity" + createID());
+        } else if (subject == SDFEater.Subject.uuid) {
+            me = ResourceFactory.createResource("urn:uuid:" + uuid);
+        } else if (subject == SDFEater.Subject.bnode) {
+            me = ResourceFactory.createResource();
+        }
+
         for (Map.Entry<String, List<String>> entry : properties.entrySet()) {
 
             String key = entry.getKey();
@@ -381,7 +392,7 @@ void addToJenaModel() {
     /**
      * Print main molecule data in RDFa
      */
-    void printRDFaMolecule() {
+    void printRDFaMolecule(SDFEater.Subject subject) {
         StringBuilder output_str = new StringBuilder();
         for (Map.Entry<String, List<String>> entry : properties.entrySet()) {
             String key = entry.getKey();
@@ -421,8 +432,15 @@ void printRDFaMolecule() {
         }
 
         if (output_str.length() > 0) {
-            String mID = createID();
-            System.out.println("    <div typeof='schema:MolecularEntity' about='http://example.com/molecule#entity" + mID + "' id='entity" + mID + "'>");
+            if (subject == SDFEater.Subject.iri) {
+                String mID = createID();
+                System.out.println("    <div typeof='schema:MolecularEntity' about='http://example.com/molecule#entity" + mID + "' id='entity" + mID + "'>");
+            } else if (subject == SDFEater.Subject.uuid) {
+                System.out.println("    <div typeof='schema:MolecularEntity' about='urn:uuid:" + uuid + "'>");
+            } else if (subject == SDFEater.Subject.bnode) {
+                System.out.println("    <div typeof='schema:MolecularEntity' about='_:b" + createID() + "'>");
+            }
+
             System.out.print(output_str);
             System.out.println("    </div>");
         }
@@ -431,8 +449,10 @@ void printRDFaMolecule() {
 
     /**
      * Print main molecule data in Microdata
+     *
+     * @param subject subject type
      */
-    void printMicrodataMolecule() {
+    void printMicrodataMolecule(SDFEater.Subject subject) {
         StringBuilder output_str = new StringBuilder();
         for (Map.Entry<String, List<String>> entry : properties.entrySet()) {
             String key = entry.getKey();
@@ -472,8 +492,15 @@ void printMicrodataMolecule() {
         }
 
         if (output_str.length() > 0) {
-            String mID = createID();
-            System.out.println("    <div itemscope itemtype='http://schema.org/MolecularEntity' itemid='http://example.com/molecule#entity" + mID + "' id='entity" + mID + "'>");
+            if (subject == SDFEater.Subject.iri) {
+                String mID = createID();
+                System.out.println("    <div itemscope itemtype='http://schema.org/MolecularEntity' itemid='http://example.com/molecule#entity" + mID + "' id='entity" + mID + "'>");
+            } else if (subject == SDFEater.Subject.uuid) {
+                System.out.println("    <div itemscope itemtype='http://schema.org/MolecularEntity' itemid='urn:uuid:" + uuid + "'>");
+            } else if (subject == SDFEater.Subject.bnode) {
+                System.out.println("    <div itemscope itemtype='http://schema.org/MolecularEntity' itemid='_:b" + createID() + "'>");
+            }
+
             System.out.print(output_str);
             System.out.println("    </div>");
         }

diff --git a/src/main/java/pl/edu/uwb/ii/sdfeater/SDFEater.java b/src/main/java/pl/edu/uwb/ii/sdfeater/SDFEater.java
@@ -87,6 +87,9 @@ public static void main(String[] args) {
         Option formatarg = new Option("f", "format", true, "output format (cypher, cvme, smiles, inchi, turtle, ntriples, rdfxml, rdfthrift, jsonldhtml, jsonld, rdfa, microdata)");
         formatarg.setRequired(true);
         options.addOption(formatarg);
+        Option subject = new Option("s", "subject", true, "subject type (iri, uuid, bnode; iri by default); for all formats excluding cypher, cvme, smiles, inchi");
+        subject.setRequired(false);
+        options.addOption(subject);
         Option urls = new Option("u", "urls", false, "try to generate full database URLs instead of IDs (for cypher output format, always enabled in cvme)");
         urls.setRequired(false);
         options.addOption(urls);
@@ -103,51 +106,57 @@ public static void main(String[] args) {
             if (cmd.hasOption("format")) {
                 String format = cmd.getOptionValue("format");
                 if (format.equalsIgnoreCase("cypher") && !cmd.hasOption("urls") && !cmd.hasOption("periodic")) {
-                    file.parse(molecule, Format.cypher);
+                    file.parse(molecule, Format.cypher, Subject.valueOf(cmd.getOptionValue("subject", Subject.iri.toString())));
                 } else if (format.equalsIgnoreCase("cypher") && cmd.hasOption("urls") && !cmd.hasOption("periodic")) {
-                    file.parse(molecule, Format.cypheru);
+                    file.parse(molecule, Format.cypheru, Subject.valueOf(cmd.getOptionValue("subject", Subject.iri.toString())));
                 } else if (format.equalsIgnoreCase("cypher") && cmd.hasOption("periodic") && !cmd.hasOption("urls")) {
                     loadPeriodicTableData();
-                    file.parse(molecule, Format.cypherp);
+                    file.parse(molecule, Format.cypherp, Subject.valueOf(cmd.getOptionValue("subject", Subject.iri.toString())));
                 } else if (format.equalsIgnoreCase("cypher") && cmd.hasOption("urls") && cmd.hasOption("periodic")) {
                     loadPeriodicTableData();
-                    file.parse(molecule, Format.cypherup);
+                    file.parse(molecule, Format.cypherup, Subject.valueOf(cmd.getOptionValue("subject", Subject.iri.toString())));
                 } else if (format.equalsIgnoreCase("cvme")) {
-                    file.parse(molecule, Format.cvme);
+                    file.parse(molecule, Format.cvme, Subject.valueOf(cmd.getOptionValue("subject", Subject.iri.toString())));
                 } else if (format.equalsIgnoreCase("smiles")) {
-                    file.parse(molecule, Format.smiles);
+                    file.parse(molecule, Format.smiles, Subject.valueOf(cmd.getOptionValue("subject", Subject.iri.toString())));
                 } else if (format.equalsIgnoreCase("inchi")) {
-                    file.parse(molecule, Format.inchi);
+                    file.parse(molecule, Format.inchi, Subject.valueOf(cmd.getOptionValue("subject", Subject.iri.toString())));
                 } else if (format.equalsIgnoreCase("turtle")) {
                     initializeJenaModel();
-                    file.parse(molecule, Format.turtle);
+                    file.parse(molecule, Format.turtle, Subject.valueOf(cmd.getOptionValue("subject", Subject.iri.toString())));
                 } else if (format.equalsIgnoreCase("ntriples")) {
                     initializeJenaModel();
-                    file.parse(molecule, Format.ntriples);
+                    file.parse(molecule, Format.ntriples, Subject.valueOf(cmd.getOptionValue("subject", Subject.iri.toString())));
                 } else if (format.equalsIgnoreCase("jsonldhtml")) {
                     initializeJenaModel();
-                    file.parse(molecule, Format.jsonldhtml);
+                    file.parse(molecule, Format.jsonldhtml, Subject.valueOf(cmd.getOptionValue("subject", Subject.iri.toString())));
                 } else if (format.equalsIgnoreCase("jsonld")) {
                     initializeJenaModel();
-                    file.parse(molecule, Format.jsonld);
+                    file.parse(molecule, Format.jsonld, Subject.valueOf(cmd.getOptionValue("subject", Subject.iri.toString())));
                 } else if (format.equalsIgnoreCase("rdfxml")) {
                     initializeJenaModel();
-                    file.parse(molecule, Format.rdfxml);
+                    file.parse(molecule, Format.rdfxml, Subject.valueOf(cmd.getOptionValue("subject", Subject.iri.toString())));
                 } else if (format.equalsIgnoreCase("rdfthrift")) {
                     initializeJenaModel();
-                    file.parse(molecule, Format.rdfthrift);
+                    file.parse(molecule, Format.rdfthrift, Subject.valueOf(cmd.getOptionValue("subject", Subject.iri.toString())));
                 } else if (format.equalsIgnoreCase("rdfa")) {
-                    file.parse(molecule, Format.rdfa);
+                    file.parse(molecule, Format.rdfa, Subject.valueOf(cmd.getOptionValue("subject", Subject.iri.toString())));
                 } else if (format.equalsIgnoreCase("microdata")) {
-                    file.parse(molecule, Format.microdata);
+                    file.parse(molecule, Format.microdata, Subject.valueOf(cmd.getOptionValue("subject", Subject.iri.toString())));
                 } else {
                     System.err.println("The selected format is not supported");
                     formatter.printHelp("SDFEater.jar", options);
                 }
 
             }
+        } catch (IllegalArgumentException e) {
+            System.err.println("Incorrect option selected");
+            formatter.printHelp("SDFEater.jar", options);
         } catch (ParseException e) {
-            System.err.println(e.getMessage());
+            System.err.println("Parse error: " + e.getMessage());
+            formatter.printHelp("SDFEater.jar", options);
+        } catch (Exception e) {
+            System.err.println("Error: " + e.getMessage());
             formatter.printHelp("SDFEater.jar", options);
         }
     }
@@ -172,4 +181,13 @@ public enum Format {
         rdfa,
         microdata
     }
+
+    /**
+     * Subject type
+     */
+    public enum Subject {
+        iri,
+        uuid,
+        bnode
+    }
 }