Skip to content

Commit

Permalink
Add unit test. Improvements to parse code
Browse files Browse the repository at this point in the history
Improvements include:
 - bug fix: 'engine = <type>' was not parsed
 - ignoring commented lines
 - added support for DEFAULT and few others
  • Loading branch information
jbecla committed Mar 18, 2015
1 parent fd41c12 commit 34c85f1
Show file tree
Hide file tree
Showing 2 changed files with 255 additions and 65 deletions.
121 changes: 56 additions & 65 deletions python/lsst/metaserv/schemaToMeta.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,16 +45,20 @@ class SchemaToMeta(object):
(in cat/bin/schema_to_metadata.py).
"""

_tableStart = re.compile(r'CREATE TABLE (\w+)*')
_tableStart = re.compile(r'CREATE TABLE (\w+)')
_tableEnd = re.compile(r"\)")
_engineLine = re.compile(r'\) (ENGINE|TYPE)=(\w+)*;')
_columnLine = re.compile(r'[\s]+(\w+) ([\w\(\)]+)')
_engineLine = re.compile(r'\)\s*(ENGINE|TYPE)\s*=[\s]*(\w+)\s*;')
_columnLine = re.compile(r'\s*(\w+)\s+\w+')
_idxCols = re.compile(r'\((.+?)\)')
_unitLine = re.compile(r'<unit>(.+)</unit>')
_ucdLine = re.compile(r'<ucd>(.+)</ucd>')
_descrLine = re.compile(r'<descr>(.+)</descr>')
_descrStart = re.compile(r'<descr>(.+)')
_descrMiddle = re.compile(r'\s*--(.+)')
_descrEnd = re.compile(r'\s*--(.+)</descr>')
_descrMiddle = re.compile(r'--(.+)')
_descrEnd = re.compile(r'--(.+)</descr>')
_descrEndEmpty = re.compile(r'-- </descr>')
_commandLine = re.compile(r'\s*--')
_defaultLine = re.compile(r'\s+DEFAULT\s+(.+?)[\s,]')

def __init__(self, inputFileName):
"""
Expand All @@ -71,7 +75,8 @@ def parse(self):
"""Do actual parsing. Returns the retrieved structure as a table. The
structure of the produced table:
{ <tableName1>: {
'columns': [ { 'description': <column description>,
'columns': [ { 'defaultValue': <value>,
'description': <column description>,
'displayOrder': <value>,
'name': <value>,
'notNull': <value>,
Expand Down Expand Up @@ -100,7 +105,7 @@ def parse(self):
for line in iF:
# print "processing ", line
m = SchemaToMeta._tableStart.search(line)
if m is not None:
if m is not None and not self._isCommentLine(line):
tableName = m.group(1)
table[tableName] = {}
colNum = 1
Expand All @@ -126,7 +131,7 @@ def parse(self):
elif firstWord == "UNIQUE":
t = "UNIQUE"
idxInfo = {"type" : t,
"columns" : self._retrColumns(line)
"columns" : self._retrIdxColumns(line)
}
in_table.setdefault("indexes", []).append(idxInfo)
else:
Expand Down Expand Up @@ -189,92 +194,78 @@ def parse(self):
def _isIndexDefinition(self, c):
return c in ["PRIMARY", "KEY", "INDEX", "UNIQUE"]

def _isCommentLine(self, str):
return re.match(r'\s*--', str) is not None
def _isCommentLine(self, theString):
return SchemaToMeta._commandLine.match(theString) is not None

def _isUnitLine(self, str):
return SchemaToMeta._unitLine.search(str) is not None
def _isUnitLine(self, theString):
return SchemaToMeta._unitLine.search(theString) is not None

def _isUcdLine(self, str):
return SchemaToMeta._ucdLine.search(str) is not None
def _isUcdLine(self, theString):
return SchemaToMeta._ucdLine.search(theString) is not None

def _retrUnit(self, str):
x = SchemaToMeta._unitLine.search(str)
return x.group(1)
def _retrUnit(self, theString):
result = SchemaToMeta._unitLine.search(theString)
return result.group(1)

def _retrUcd(self, str):
x = SchemaToMeta._ucdLine.search(str)
return x.group(1)
def _retrUcd(self, theString):
result = SchemaToMeta._ucdLine.search(theString)
return result.group(1)

def _containsDescrTagStart(self, str):
return re.search(r'<descr>', str) is not None
def _containsDescrTagStart(self, theString):
return '<descr>' in theString

def _containsDescrTagEnd(self, str):
return re.search(r'</descr>', str) is not None
def _containsDescrTagEnd(self, theString):
return '</descr>' in theString

def _retrDescr(self, str):
x = SchemaToMeta._descrLine.search(str)
return x.group(1)
def _retrDescr(self, theString):
result = SchemaToMeta._descrLine.search(theString)
return result.group(1)

def _retrDescrStart(self, str):
x = SchemaToMeta._descrStart.search(str)
return x.group(1)
def _retrDescrStart(self, theString):
result = SchemaToMeta._descrStart.search(theString)
return result.group(1)

def _retrDescrMid(self, str):
x = SchemaToMeta._descrMiddle.search(str)
return x.group(1)
def _retrDescrMid(self, theString):
result = SchemaToMeta._descrMiddle.search(theString)
return result.group(1)

def _retrDescrEnd(self, str):
if re.search(r'-- </descr>', str):
def _retrDescrEnd(self, theString):
if SchemaToMeta._descrEndEmpty.search(theString):
return ''
x = SchemaToMeta._descrEnd.search(str)
return x.group(1)
result = SchemaToMeta._descrEnd.search(theString)
return result.group(1)

def _retrIsNotNull(self, str):
if re.search(r'NOT NULL', str):
return '1'
return '0'
def _retrIsNotNull(self, theString):
return 'NOT NULL' in theString

def _retrType(self, str):
arr = str.split()
def _retrType(self, theString):
arr = theString.split()
t = arr[1]
if t == "FLOAT(0)":
return "FLOAT"
return t

def _retrDefaultValue(self, str):
if ' DEFAULT ' not in str:
def _retrDefaultValue(self, theString):
if not SchemaToMeta._defaultLine.search(theString):
return None
arr = str.split()
arr = theString.split()
returnNext = 0
for a in arr:
if returnNext:
return a.rstrip(',')
if a == 'DEFAULT':
returnNext = 1

# example strings:
# " PRIMARY KEY (id),",
# " KEY IDX_sId (sId ASC),",
# " KEY IDX_d (decl DESC)",
# " UNIQUE UQ_AmpMap_ampName(ampName)"
# " UNIQUE UQ_x(xx DESC, yy),"

def _retrColumns(self, str):
xx = re.search(r'[\s\w_]+\(([\w ,]+)\)', str.rstrip())
xx = xx.group(1).split() # skip " ASC", " DESC" etc
s = ''
for x in xx:
if not x == 'ASC' and not x == 'DESC':
s += x
if x[-1] == ',':
s += ' '
return s
def _retrIdxColumns(self, theString):
colExprs = SchemaToMeta._idxCols.search(theString).group(1).split(',')
columns = [" ".join([word for word in expr.split()
if word not in ('ASC', 'DESC')]) for expr in colExprs]
return ", ".join(columns)

###############################################################################
def printIt():
x = SchemaToMeta('/home/becla/dataArchDev/repos/cat/sql/baselineSchema.sql')
t = x.parse()
sToM = SchemaToMeta('../cat/sql/baselineSchema.sql')
t = sToM.parse()
pp = pprint.PrettyPrinter(indent=2)
pp.pprint(t)

Expand Down
199 changes: 199 additions & 0 deletions tests/testSchemaToMeta.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
#!/usr/bin/env python

# LSST Data Management System
# Copyright 2015 LSST Corporation.
#
# This product includes software developed by the
# LSST Project (http://www.lsst.org/).
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the LSST License Statement and
# the GNU General Public License along with this program. If not,
# see <http://www.lsstcorp.org/LegalNotices/>.

"""
This is a unittest for the SchemaToMeta class.
@author Jacek Becla, SLAC
"""

# standard library
import logging as log
import os
import tempfile
import unittest

# useful for debugging
# import pprint
# pp = pprint.PrettyPrinter(indent=2)
# pp.pprint(theTable)

# local
from lsst.metaserv.schemaToMeta import SchemaToMeta

class TestS2M(unittest.TestCase):

def testBasics(self):
"""
Basic test: load data for two tables.
"""
(fd, fName) = tempfile.mkstemp()
theFile = os.fdopen(fd, "w")
theFile.write("""
CREATE TABLE t1
-- <descr>This is t1 table.</descr>
(
id int,
-- <descr>the t1.id</descr>
ra double DEFAULT 1,
-- <descr>right ascention</descr>
-- <ucd>pos.eq.ra</ucd>
-- <unit>deg</unit>
decl double,
-- <ucd>pos.eq.dec</ucd>
-- <unit>deg</unit>
s char DEFAULT 'x',
-- <descr>the t1.s</descr>
v varchar(255),
PRIMARY KEY pk_t1_id (id),
INDEX idx_t1_s (s)
) ENGINE=MyISAM;
CREATE TABLE t2
-- <descr>This is
-- t2 table.</descr>
(
id2 int,
-- <descr>This is a very
-- long
-- description of the
-- t2.id2.</descr>
s2 char,
-- <descr>Description for s2.
-- </descr>
v2 varchar(255)
) ENGINE = InnoDB;
""")
theFile.close()
x = SchemaToMeta(fName)
theTable = x.parse()
assert(len(theTable) == 2)
assert(len(theTable["t1"]["columns"]) == 5)
assert(theTable["t1"]["columns"][0]["name"] == "id")
assert(theTable["t1"]["columns"][0]["description"] == "the t1.id")
assert(theTable["t1"]["columns"][1]["name"] == "ra")
assert(theTable["t1"]["columns"][1]["defaultValue"] == "1")
assert(theTable["t1"]["columns"][1]["description"] == "right ascention")
assert(theTable["t1"]["columns"][1]["ucd"] == "pos.eq.ra")
assert(theTable["t1"]["columns"][1]["unit"] == "deg")
assert(theTable["t1"]["columns"][2]["name"] == "decl")
assert("description" not in theTable["t1"]["columns"][2])
assert(theTable["t1"]["columns"][2]["ucd"] == "pos.eq.dec")
assert(theTable["t1"]["columns"][2]["unit"] == "deg")
assert(theTable["t1"]["columns"][3]["name"] == "s")
assert(theTable["t1"]["columns"][3]["defaultValue"] == "'x'")
assert(theTable["t1"]["columns"][3]["description"] == "the t1.s")
assert("ucd" not in theTable["t1"]["columns"][3])
assert(theTable["t1"]["columns"][4]["name"] == "v")
assert("description" not in theTable["t1"]["columns"][4])
assert("ucd" not in theTable["t1"]["columns"][4])
assert(theTable["t1"]["description"] == "This is t1 table.")
assert(theTable["t1"]["engine"] == "MyISAM")
assert(len(theTable["t1"]["indexes"]) == 2)
assert(theTable["t1"]["indexes"][0]["columns"] == "id")
assert(theTable["t1"]["indexes"][0]["type"] == "PRIMARY KEY")
assert(theTable["t1"]["indexes"][1]["columns"] == "s")
assert(theTable["t2"]["description"] == "This is t2 table.")
assert(theTable["t2"]["columns"][0]["description"] ==
"This is a very long description of the t2.id2.")
assert(theTable["t2"]["columns"][1]["description"] == "Description for s2.")
assert(theTable["t2"]["engine"] == "InnoDB")


def testComments(self):
"""
Test commented block
"""
(fd, fName) = tempfile.mkstemp()
theFile = os.fdopen(fd, "w")
theFile.write("""
--CREATE TABLE tDummy1
-- <descr>This is dummy table 1.</descr>
--(
-- id int,
-- PRIMARY KEY pk_t1_id (id),
-- INDEX idx_t1_s (s)
--) ENGINE=MyISAM;
-- CREATE TABLE tDummy2
-- (
-- id int,
-- PRIMARY KEY pk_t1_id (id),
-- INDEX idx_t1_s (s)
-- ) ENGINE=MyISAM;
CREATE TABLE t3 (
id3 int
) ENGINE =InnoDB;
""")
theFile.close()
x = SchemaToMeta(fName)
theTable = x.parse()
assert(len(theTable) == 1)


def testIndices(self):
"""
Test index lines.
"""
(fd, fName) = tempfile.mkstemp()
theFile = os.fdopen(fd, "w")
theFile.write("""
CREATE TABLE t (
id int,
sId bigint,
decl DOUBLE,
ampName VARCHAR(64),
xx int,
yy int,
PRIMARY KEY (id),
KEY IDX_sId (sId ASC),
INDEX IDX_d (decl DESC),
UNIQUE UQ_AmpMap_ampName(ampName),
UNIQUE UQ_x(xx DESC, yy)
);
""")
theFile.close()
x = SchemaToMeta(fName)
theTable = x.parse()
assert(theTable["t"]["indexes"][0]["columns"] == "id")
assert(theTable["t"]["indexes"][0]["type"] == "PRIMARY KEY")
assert(theTable["t"]["indexes"][1]["columns"] == "sId")
assert(theTable["t"]["indexes"][1]["type"] == "-")
assert(theTable["t"]["indexes"][2]["columns"] == "decl")
assert(theTable["t"]["indexes"][2]["type"] == "-")
assert(theTable["t"]["indexes"][3]["columns"] == "ampName")
assert(theTable["t"]["indexes"][3]["type"] == "UNIQUE")
assert(theTable["t"]["indexes"][4]["columns"] == "xx, yy")
assert(theTable["t"]["indexes"][3]["type"] == "UNIQUE")


def main():
log.basicConfig(
format='%(asctime)s %(name)s %(levelname)s: %(message)s',
datefmt='%m/%d/%Y %I:%M:%S',
level=log.DEBUG)

unittest.main()

if __name__ == "__main__":
main()

0 comments on commit 34c85f1

Please sign in to comment.