lsst-dm · jbecla · Mar 18, 2015 · Mar 18, 2015
diff --git a/python/lsst/metaserv/schemaToMeta.py b/python/lsst/metaserv/schemaToMeta.py
@@ -45,16 +45,20 @@ class SchemaToMeta(object):
     (in cat/bin/schema_to_metadata.py).
     """
 
-    _tableStart = re.compile(r'CREATE TABLE (\w+)*')
+    _tableStart = re.compile(r'CREATE TABLE (\w+)')
     _tableEnd = re.compile(r"\)")
-    _engineLine = re.compile(r'\) (ENGINE|TYPE)=(\w+)*;')
-    _columnLine = re.compile(r'[\s]+(\w+) ([\w\(\)]+)')
+    _engineLine = re.compile(r'\)\s*(ENGINE|TYPE)\s*=[\s]*(\w+)\s*;')
+    _columnLine = re.compile(r'\s*(\w+)\s+\w+')
+    _idxCols = re.compile(r'\((.+?)\)')
     _unitLine = re.compile(r'<unit>(.+)</unit>')
     _ucdLine = re.compile(r'<ucd>(.+)</ucd>')
     _descrLine = re.compile(r'<descr>(.+)</descr>')
     _descrStart = re.compile(r'<descr>(.+)')
-    _descrMiddle = re.compile(r'\s*--(.+)')
-    _descrEnd = re.compile(r'\s*--(.+)</descr>')
+    _descrMiddle = re.compile(r'--(.+)')
+    _descrEnd = re.compile(r'--(.+)</descr>')
+    _descrEndEmpty = re.compile(r'-- </descr>')
+    _commandLine = re.compile(r'\s*--')
+    _defaultLine = re.compile(r'\s+DEFAULT\s+(.+?)[\s,]')
 
     def __init__(self, inputFileName):
         """
@@ -71,7 +75,8 @@ def parse(self):
         """Do actual parsing. Returns the retrieved structure as a table. The
         structure of the produced table:
 { <tableName1>: {
-    'columns': [ { 'description': <column description>,
+    'columns': [ { 'defaultValue': <value>,
+                   'description': <column description>,
                    'displayOrder': <value>,
                    'name': <value>,
                    'notNull': <value>,
@@ -100,7 +105,7 @@ def parse(self):
         for line in iF:
             # print "processing ", line
             m = SchemaToMeta._tableStart.search(line)
-            if m is not None:
+            if m is not None and not self._isCommentLine(line):
                 tableName = m.group(1)
                 table[tableName] = {}
                 colNum = 1
@@ -126,7 +131,7 @@ def parse(self):
                         elif firstWord == "UNIQUE":
                             t = "UNIQUE"
                         idxInfo = {"type" : t,
-                                   "columns" : self._retrColumns(line)
+                                   "columns" : self._retrIdxColumns(line)
                                }
                         in_table.setdefault("indexes", []).append(idxInfo)
                     else:
@@ -189,92 +194,78 @@ def parse(self):
     def _isIndexDefinition(self, c):
         return c in ["PRIMARY", "KEY", "INDEX", "UNIQUE"]
 
-    def _isCommentLine(self, str):
-        return re.match(r'\s*--', str) is not None
+    def _isCommentLine(self, theString):
+        return SchemaToMeta._commandLine.match(theString) is not None
 
-    def _isUnitLine(self, str):
-        return SchemaToMeta._unitLine.search(str) is not None
+    def _isUnitLine(self, theString):
+        return SchemaToMeta._unitLine.search(theString) is not None
 
-    def _isUcdLine(self, str):
-        return SchemaToMeta._ucdLine.search(str) is not None
+    def _isUcdLine(self, theString):
+        return SchemaToMeta._ucdLine.search(theString) is not None
 
-    def _retrUnit(self, str):
-        x = SchemaToMeta._unitLine.search(str)
-        return x.group(1)
+    def _retrUnit(self, theString):
+        result = SchemaToMeta._unitLine.search(theString)
+        return result.group(1)
 
-    def _retrUcd(self, str):
-        x = SchemaToMeta._ucdLine.search(str)
-        return x.group(1)
+    def _retrUcd(self, theString):
+        result = SchemaToMeta._ucdLine.search(theString)
+        return result.group(1)
 
-    def _containsDescrTagStart(self, str):
-        return re.search(r'<descr>', str) is not None
+    def _containsDescrTagStart(self, theString):
+        return '<descr>' in theString
 
-    def _containsDescrTagEnd(self, str):
-        return re.search(r'</descr>', str) is not None
+    def _containsDescrTagEnd(self, theString):
+        return '</descr>' in theString
 
-    def _retrDescr(self, str):
-        x = SchemaToMeta._descrLine.search(str)
-        return x.group(1)
+    def _retrDescr(self, theString):
+        result = SchemaToMeta._descrLine.search(theString)
+        return result.group(1)
 
-    def _retrDescrStart(self, str):
-        x = SchemaToMeta._descrStart.search(str)
-        return x.group(1)
+    def _retrDescrStart(self, theString):
+        result = SchemaToMeta._descrStart.search(theString)
+        return result.group(1)
 
-    def _retrDescrMid(self, str):
-        x = SchemaToMeta._descrMiddle.search(str)
-        return x.group(1)
+    def _retrDescrMid(self, theString):
+        result = SchemaToMeta._descrMiddle.search(theString)
+        return result.group(1)
 
-    def _retrDescrEnd(self, str):
-        if re.search(r'-- </descr>', str):
+    def _retrDescrEnd(self, theString):
+        if SchemaToMeta._descrEndEmpty.search(theString):
             return ''
-        x = SchemaToMeta._descrEnd.search(str)
-        return x.group(1)
+        result = SchemaToMeta._descrEnd.search(theString)
+        return result.group(1)
 
-    def _retrIsNotNull(self, str):
-        if re.search(r'NOT NULL', str):
-            return '1'
-        return '0'
+    def _retrIsNotNull(self, theString):
+        return 'NOT NULL' in theString
 
-    def _retrType(self, str):
-        arr = str.split()
+    def _retrType(self, theString):
+        arr = theString.split()
         t = arr[1]
         if t == "FLOAT(0)":
             return "FLOAT"
         return t
 
-    def _retrDefaultValue(self, str):
-        if ' DEFAULT ' not in str:
+    def _retrDefaultValue(self, theString):
+        if not SchemaToMeta._defaultLine.search(theString):
             return None
-        arr = str.split()
+        arr = theString.split()
         returnNext = 0
         for a in arr:
             if returnNext:
                 return a.rstrip(',')
             if a == 'DEFAULT':
                 returnNext = 1
 
-    # example strings:
-    # "    PRIMARY KEY (id),",
-    # "    KEY IDX_sId (sId ASC),",
-    # "    KEY IDX_d (decl DESC)",
-    # "    UNIQUE UQ_AmpMap_ampName(ampName)"
-    # "    UNIQUE UQ_x(xx DESC, yy),"
-
-    def _retrColumns(self, str):
-        xx = re.search(r'[\s\w_]+\(([\w ,]+)\)', str.rstrip())
-        xx = xx.group(1).split() # skip " ASC", " DESC" etc
-        s = ''
-        for x in xx:
-            if not x == 'ASC' and not x == 'DESC':
-                s += x
-                if x[-1] == ',':
-                    s += ' '
-        return s
+    def _retrIdxColumns(self, theString):
+        colExprs = SchemaToMeta._idxCols.search(theString).group(1).split(',')
+        columns = [" ".join([word for word in expr.split()
+                        if word not in ('ASC', 'DESC')]) for expr in colExprs]
+        return ", ".join(columns)
 
 ###############################################################################
 def printIt():
-    x = SchemaToMeta('/home/becla/dataArchDev/repos/cat/sql/baselineSchema.sql')
-    t = x.parse()
+    sToM = SchemaToMeta('../cat/sql/baselineSchema.sql')
+    t = sToM.parse()
     pp = pprint.PrettyPrinter(indent=2)
     pp.pprint(t)
 

diff --git a/tests/testSchemaToMeta.py b/tests/testSchemaToMeta.py
@@ -0,0 +1,199 @@
+#!/usr/bin/env python
+
+# LSST Data Management System
+# Copyright 2015 LSST Corporation.
+#
+# This product includes software developed by the
+# LSST Project (http://www.lsst.org/).
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the LSST License Statement and
+# the GNU General Public License along with this program.  If not,
+# see <http://www.lsstcorp.org/LegalNotices/>.
+
+"""
+This is a unittest for the SchemaToMeta class.
+
+@author  Jacek Becla, SLAC
+"""
+
+# standard library
+import logging as log
+import os
+import tempfile
+import unittest
+
+# useful for debugging
+# import pprint
+# pp = pprint.PrettyPrinter(indent=2)
+# pp.pprint(theTable)
+
+# local
+from lsst.metaserv.schemaToMeta import SchemaToMeta
+
+class TestS2M(unittest.TestCase):
+
+    def testBasics(self):
+        """
+        Basic test: load data for two tables.
+        """
+        (fd, fName) = tempfile.mkstemp()
+        theFile = os.fdopen(fd, "w")
+        theFile.write("""
+CREATE TABLE t1
+    -- <descr>This is t1 table.</descr>
+(
+    id int,
+        -- <descr>the t1.id</descr>
+    ra double DEFAULT 1,
+        -- <descr>right ascention</descr>
+        -- <ucd>pos.eq.ra</ucd>
+        -- <unit>deg</unit>
+    decl double,
+        -- <ucd>pos.eq.dec</ucd>
+        -- <unit>deg</unit>
+    s char DEFAULT 'x',
+        -- <descr>the t1.s</descr>
+    v varchar(255),
+    PRIMARY KEY pk_t1_id (id),
+    INDEX idx_t1_s (s)
+) ENGINE=MyISAM;
+
+CREATE TABLE t2
+   -- <descr>This is
+   -- t2 table.</descr>
+(
+    id2 int,
+        -- <descr>This is a very
+        -- long
+        -- description of the
+        -- t2.id2.</descr>
+    s2 char,
+        -- <descr>Description for s2.
+        -- </descr>
+    v2 varchar(255)
+) ENGINE = InnoDB;
+""")
+        theFile.close()
+        x = SchemaToMeta(fName)
+        theTable = x.parse()
+        assert(len(theTable) == 2)
+        assert(len(theTable["t1"]["columns"]) == 5)
+        assert(theTable["t1"]["columns"][0]["name"] == "id")
+        assert(theTable["t1"]["columns"][0]["description"] == "the t1.id")
+        assert(theTable["t1"]["columns"][1]["name"] == "ra")
+        assert(theTable["t1"]["columns"][1]["defaultValue"] == "1")
+        assert(theTable["t1"]["columns"][1]["description"] == "right ascention")
+        assert(theTable["t1"]["columns"][1]["ucd"] == "pos.eq.ra")
+        assert(theTable["t1"]["columns"][1]["unit"] == "deg")
+        assert(theTable["t1"]["columns"][2]["name"] == "decl")
+        assert("description" not in theTable["t1"]["columns"][2])
+        assert(theTable["t1"]["columns"][2]["ucd"] == "pos.eq.dec")
+        assert(theTable["t1"]["columns"][2]["unit"] == "deg")
+        assert(theTable["t1"]["columns"][3]["name"] == "s")
+        assert(theTable["t1"]["columns"][3]["defaultValue"] == "'x'")
+        assert(theTable["t1"]["columns"][3]["description"] == "the t1.s")
+        assert("ucd" not in theTable["t1"]["columns"][3])
+        assert(theTable["t1"]["columns"][4]["name"] == "v")
+        assert("description" not in theTable["t1"]["columns"][4])
+        assert("ucd" not in theTable["t1"]["columns"][4])
+        assert(theTable["t1"]["description"] == "This is t1 table.")
+        assert(theTable["t1"]["engine"] == "MyISAM")
+        assert(len(theTable["t1"]["indexes"]) == 2)
+        assert(theTable["t1"]["indexes"][0]["columns"] == "id")
+        assert(theTable["t1"]["indexes"][0]["type"] == "PRIMARY KEY")
+        assert(theTable["t1"]["indexes"][1]["columns"] == "s")
+        assert(theTable["t2"]["description"] == "This is t2 table.")
+        assert(theTable["t2"]["columns"][0]["description"] ==
+               "This is a very long description of the t2.id2.")
+        assert(theTable["t2"]["columns"][1]["description"] == "Description for s2.")
+        assert(theTable["t2"]["engine"] == "InnoDB")
+
+
+    def testComments(self):
+        """
+        Test commented block
+        """
+        (fd, fName) = tempfile.mkstemp()
+        theFile = os.fdopen(fd, "w")
+        theFile.write("""
+--CREATE TABLE tDummy1
+    -- <descr>This is dummy table 1.</descr>
+--(
+--    id int,
+--    PRIMARY KEY pk_t1_id (id),
+--    INDEX idx_t1_s (s)
+--) ENGINE=MyISAM;
+
+-- CREATE TABLE tDummy2
+-- (
+--    id int,
+--    PRIMARY KEY pk_t1_id (id),
+--    INDEX idx_t1_s (s)
+-- ) ENGINE=MyISAM;
+
+CREATE TABLE t3 (
+    id3 int
+) ENGINE =InnoDB;
+""")
+        theFile.close()
+        x = SchemaToMeta(fName)
+        theTable = x.parse()
+        assert(len(theTable) == 1)
+
+
+    def testIndices(self):
+        """
+        Test index lines.
+        """
+        (fd, fName) = tempfile.mkstemp()
+        theFile = os.fdopen(fd, "w")
+        theFile.write("""
+CREATE TABLE t (
+    id int,
+    sId bigint,
+    decl DOUBLE,
+    ampName VARCHAR(64),
+    xx int,
+    yy int,
+    PRIMARY KEY (id),
+    KEY IDX_sId (sId ASC),
+    INDEX IDX_d (decl DESC),
+    UNIQUE UQ_AmpMap_ampName(ampName),
+    UNIQUE UQ_x(xx DESC, yy)
+);
+""")
+        theFile.close()
+        x = SchemaToMeta(fName)
+        theTable = x.parse()
+        assert(theTable["t"]["indexes"][0]["columns"] == "id")
+        assert(theTable["t"]["indexes"][0]["type"] == "PRIMARY KEY")
+        assert(theTable["t"]["indexes"][1]["columns"] == "sId")
+        assert(theTable["t"]["indexes"][1]["type"] == "-")
+        assert(theTable["t"]["indexes"][2]["columns"] == "decl")
+        assert(theTable["t"]["indexes"][2]["type"] == "-")
+        assert(theTable["t"]["indexes"][3]["columns"] == "ampName")
+        assert(theTable["t"]["indexes"][3]["type"] == "UNIQUE")
+        assert(theTable["t"]["indexes"][4]["columns"] == "xx, yy")
+        assert(theTable["t"]["indexes"][3]["type"] == "UNIQUE")
+
+
+def main():
+    log.basicConfig(
+        format='%(asctime)s %(name)s %(levelname)s: %(message)s',
+        datefmt='%m/%d/%Y %I:%M:%S',
+        level=log.DEBUG)
+
+    unittest.main()
+
+if __name__ == "__main__":
+    main()