Skip to content

Commit

Permalink
Merge branch 'tickets/DM-2349'
Browse files Browse the repository at this point in the history
  • Loading branch information
jbecla committed Mar 18, 2015
2 parents fd41c12 + 34c85f1 commit e4525a8
Show file tree
Hide file tree
Showing 2 changed files with 255 additions and 65 deletions.
121 changes: 56 additions & 65 deletions python/lsst/metaserv/schemaToMeta.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,16 +45,20 @@ class SchemaToMeta(object):
(in cat/bin/schema_to_metadata.py).
"""

_tableStart = re.compile(r'CREATE TABLE (\w+)*')
_tableStart = re.compile(r'CREATE TABLE (\w+)')
_tableEnd = re.compile(r"\)")
_engineLine = re.compile(r'\) (ENGINE|TYPE)=(\w+)*;')
_columnLine = re.compile(r'[\s]+(\w+) ([\w\(\)]+)')
_engineLine = re.compile(r'\)\s*(ENGINE|TYPE)\s*=[\s]*(\w+)\s*;')
_columnLine = re.compile(r'\s*(\w+)\s+\w+')
_idxCols = re.compile(r'\((.+?)\)')
_unitLine = re.compile(r'<unit>(.+)</unit>')
_ucdLine = re.compile(r'<ucd>(.+)</ucd>')
_descrLine = re.compile(r'<descr>(.+)</descr>')
_descrStart = re.compile(r'<descr>(.+)')
_descrMiddle = re.compile(r'\s*--(.+)')
_descrEnd = re.compile(r'\s*--(.+)</descr>')
_descrMiddle = re.compile(r'--(.+)')
_descrEnd = re.compile(r'--(.+)</descr>')
_descrEndEmpty = re.compile(r'-- </descr>')
_commandLine = re.compile(r'\s*--')

This comment has been minimized.

Copy link
@ktlim

ktlim Mar 19, 2015

I think you mean _commentLine here.

_defaultLine = re.compile(r'\s+DEFAULT\s+(.+?)[\s,]')

def __init__(self, inputFileName):
"""
Expand All @@ -71,7 +75,8 @@ def parse(self):
"""Do actual parsing. Returns the retrieved structure as a table. The
structure of the produced table:
{ <tableName1>: {
'columns': [ { 'description': <column description>,
'columns': [ { 'defaultValue': <value>,
'description': <column description>,
'displayOrder': <value>,
'name': <value>,
'notNull': <value>,
Expand Down Expand Up @@ -100,7 +105,7 @@ def parse(self):
for line in iF:
# print "processing ", line
m = SchemaToMeta._tableStart.search(line)
if m is not None:
if m is not None and not self._isCommentLine(line):
tableName = m.group(1)
table[tableName] = {}
colNum = 1
Expand All @@ -126,7 +131,7 @@ def parse(self):
elif firstWord == "UNIQUE":
t = "UNIQUE"
idxInfo = {"type" : t,
"columns" : self._retrColumns(line)
"columns" : self._retrIdxColumns(line)
}
in_table.setdefault("indexes", []).append(idxInfo)
else:
Expand Down Expand Up @@ -189,92 +194,78 @@ def parse(self):
def _isIndexDefinition(self, c):
return c in ["PRIMARY", "KEY", "INDEX", "UNIQUE"]

def _isCommentLine(self, str):
return re.match(r'\s*--', str) is not None
def _isCommentLine(self, theString):
return SchemaToMeta._commandLine.match(theString) is not None

def _isUnitLine(self, str):
return SchemaToMeta._unitLine.search(str) is not None
def _isUnitLine(self, theString):
return SchemaToMeta._unitLine.search(theString) is not None

def _isUcdLine(self, str):
return SchemaToMeta._ucdLine.search(str) is not None
def _isUcdLine(self, theString):
return SchemaToMeta._ucdLine.search(theString) is not None

def _retrUnit(self, str):
x = SchemaToMeta._unitLine.search(str)
return x.group(1)
def _retrUnit(self, theString):
result = SchemaToMeta._unitLine.search(theString)
return result.group(1)

def _retrUcd(self, str):
x = SchemaToMeta._ucdLine.search(str)
return x.group(1)
def _retrUcd(self, theString):
result = SchemaToMeta._ucdLine.search(theString)
return result.group(1)

def _containsDescrTagStart(self, str):
return re.search(r'<descr>', str) is not None
def _containsDescrTagStart(self, theString):
return '<descr>' in theString

def _containsDescrTagEnd(self, str):
return re.search(r'</descr>', str) is not None
def _containsDescrTagEnd(self, theString):
return '</descr>' in theString

def _retrDescr(self, str):
x = SchemaToMeta._descrLine.search(str)
return x.group(1)
def _retrDescr(self, theString):
result = SchemaToMeta._descrLine.search(theString)
return result.group(1)

def _retrDescrStart(self, str):
x = SchemaToMeta._descrStart.search(str)
return x.group(1)
def _retrDescrStart(self, theString):
result = SchemaToMeta._descrStart.search(theString)
return result.group(1)

def _retrDescrMid(self, str):
x = SchemaToMeta._descrMiddle.search(str)
return x.group(1)
def _retrDescrMid(self, theString):
result = SchemaToMeta._descrMiddle.search(theString)
return result.group(1)

def _retrDescrEnd(self, str):
if re.search(r'-- </descr>', str):
def _retrDescrEnd(self, theString):
if SchemaToMeta._descrEndEmpty.search(theString):

This comment has been minimized.

Copy link
@ktlim

ktlim Mar 19, 2015

I think you missed the thrust of my comment. With the proper regexp, you don't need the if statement and can always return result.group(1), which will be the empty string if appropriate.

This comment has been minimized.

Copy link
@jbecla

jbecla Mar 19, 2015

Author Contributor

No I didn't! It was just a bit more complicated because sometime I want the extra leading space (e.g if I have
--This is
-- a comment.
if i strip the leading space, I'll get "This isa comment". And sometimes I don't want it (or the unit tests fail because they don't expect the extra space. Anyway, I fixed that by adding extra rstrip().

This comment has been minimized.

Copy link
@ktlim

ktlim Mar 19, 2015

That's a matter of how you join the strings from different lines. I'd think it's better to strip spaces from the beginning and end of each line and then join them using a space.

return ''
x = SchemaToMeta._descrEnd.search(str)
return x.group(1)
result = SchemaToMeta._descrEnd.search(theString)
return result.group(1)

def _retrIsNotNull(self, str):
if re.search(r'NOT NULL', str):
return '1'
return '0'
def _retrIsNotNull(self, theString):
return 'NOT NULL' in theString

def _retrType(self, str):
arr = str.split()
def _retrType(self, theString):
arr = theString.split()
t = arr[1]
if t == "FLOAT(0)":
return "FLOAT"
return t

def _retrDefaultValue(self, str):
if ' DEFAULT ' not in str:
def _retrDefaultValue(self, theString):
if not SchemaToMeta._defaultLine.search(theString):

This comment has been minimized.

Copy link
@ktlim

ktlim Mar 19, 2015

Again, you slightly missed the point of the comment. That regexp is supposed to allow you to return result.group(1) if the pattern matches, without any of the looping or stripping below.

return None
arr = str.split()
arr = theString.split()
returnNext = 0
for a in arr:
if returnNext:
return a.rstrip(',')
if a == 'DEFAULT':
returnNext = 1

# example strings:
# " PRIMARY KEY (id),",
# " KEY IDX_sId (sId ASC),",
# " KEY IDX_d (decl DESC)",
# " UNIQUE UQ_AmpMap_ampName(ampName)"
# " UNIQUE UQ_x(xx DESC, yy),"

def _retrColumns(self, str):
xx = re.search(r'[\s\w_]+\(([\w ,]+)\)', str.rstrip())
xx = xx.group(1).split() # skip " ASC", " DESC" etc
s = ''
for x in xx:
if not x == 'ASC' and not x == 'DESC':
s += x
if x[-1] == ',':
s += ' '
return s
def _retrIdxColumns(self, theString):
colExprs = SchemaToMeta._idxCols.search(theString).group(1).split(',')
columns = [" ".join([word for word in expr.split()
if word not in ('ASC', 'DESC')]) for expr in colExprs]
return ", ".join(columns)

###############################################################################
def printIt():
x = SchemaToMeta('/home/becla/dataArchDev/repos/cat/sql/baselineSchema.sql')
t = x.parse()
sToM = SchemaToMeta('../cat/sql/baselineSchema.sql')
t = sToM.parse()
pp = pprint.PrettyPrinter(indent=2)
pp.pprint(t)

Expand Down
199 changes: 199 additions & 0 deletions tests/testSchemaToMeta.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
#!/usr/bin/env python

# LSST Data Management System
# Copyright 2015 LSST Corporation.
#
# This product includes software developed by the
# LSST Project (http://www.lsst.org/).
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the LSST License Statement and
# the GNU General Public License along with this program. If not,
# see <http://www.lsstcorp.org/LegalNotices/>.

"""
This is a unittest for the SchemaToMeta class.
@author Jacek Becla, SLAC
"""

# standard library
import logging as log
import os
import tempfile
import unittest

# useful for debugging
# import pprint
# pp = pprint.PrettyPrinter(indent=2)
# pp.pprint(theTable)

# local
from lsst.metaserv.schemaToMeta import SchemaToMeta

class TestS2M(unittest.TestCase):

def testBasics(self):
"""
Basic test: load data for two tables.
"""
(fd, fName) = tempfile.mkstemp()
theFile = os.fdopen(fd, "w")
theFile.write("""
CREATE TABLE t1
-- <descr>This is t1 table.</descr>
(
id int,
-- <descr>the t1.id</descr>
ra double DEFAULT 1,
-- <descr>right ascention</descr>
-- <ucd>pos.eq.ra</ucd>
-- <unit>deg</unit>
decl double,
-- <ucd>pos.eq.dec</ucd>
-- <unit>deg</unit>
s char DEFAULT 'x',
-- <descr>the t1.s</descr>
v varchar(255),
PRIMARY KEY pk_t1_id (id),
INDEX idx_t1_s (s)
) ENGINE=MyISAM;
CREATE TABLE t2
-- <descr>This is
-- t2 table.</descr>
(
id2 int,
-- <descr>This is a very
-- long
-- description of the
-- t2.id2.</descr>
s2 char,
-- <descr>Description for s2.
-- </descr>
v2 varchar(255)
) ENGINE = InnoDB;
""")
theFile.close()
x = SchemaToMeta(fName)
theTable = x.parse()
assert(len(theTable) == 2)

This comment has been minimized.

Copy link
@ktlim

ktlim Mar 19, 2015

Note that these asserts don't do what you really want -- they're the Python built-in assert exception-raiser. You want to use the TestCase assert functions which, among other things, print out the correct and incorrect values when a failure occurs.

assert(len(theTable["t1"]["columns"]) == 5)
assert(theTable["t1"]["columns"][0]["name"] == "id")
assert(theTable["t1"]["columns"][0]["description"] == "the t1.id")
assert(theTable["t1"]["columns"][1]["name"] == "ra")
assert(theTable["t1"]["columns"][1]["defaultValue"] == "1")
assert(theTable["t1"]["columns"][1]["description"] == "right ascention")
assert(theTable["t1"]["columns"][1]["ucd"] == "pos.eq.ra")
assert(theTable["t1"]["columns"][1]["unit"] == "deg")
assert(theTable["t1"]["columns"][2]["name"] == "decl")
assert("description" not in theTable["t1"]["columns"][2])
assert(theTable["t1"]["columns"][2]["ucd"] == "pos.eq.dec")
assert(theTable["t1"]["columns"][2]["unit"] == "deg")
assert(theTable["t1"]["columns"][3]["name"] == "s")
assert(theTable["t1"]["columns"][3]["defaultValue"] == "'x'")
assert(theTable["t1"]["columns"][3]["description"] == "the t1.s")
assert("ucd" not in theTable["t1"]["columns"][3])
assert(theTable["t1"]["columns"][4]["name"] == "v")
assert("description" not in theTable["t1"]["columns"][4])
assert("ucd" not in theTable["t1"]["columns"][4])
assert(theTable["t1"]["description"] == "This is t1 table.")
assert(theTable["t1"]["engine"] == "MyISAM")
assert(len(theTable["t1"]["indexes"]) == 2)
assert(theTable["t1"]["indexes"][0]["columns"] == "id")
assert(theTable["t1"]["indexes"][0]["type"] == "PRIMARY KEY")
assert(theTable["t1"]["indexes"][1]["columns"] == "s")
assert(theTable["t2"]["description"] == "This is t2 table.")
assert(theTable["t2"]["columns"][0]["description"] ==
"This is a very long description of the t2.id2.")
assert(theTable["t2"]["columns"][1]["description"] == "Description for s2.")
assert(theTable["t2"]["engine"] == "InnoDB")


def testComments(self):
"""
Test commented block
"""
(fd, fName) = tempfile.mkstemp()
theFile = os.fdopen(fd, "w")
theFile.write("""
--CREATE TABLE tDummy1
-- <descr>This is dummy table 1.</descr>
--(
-- id int,
-- PRIMARY KEY pk_t1_id (id),
-- INDEX idx_t1_s (s)
--) ENGINE=MyISAM;
-- CREATE TABLE tDummy2
-- (
-- id int,
-- PRIMARY KEY pk_t1_id (id),
-- INDEX idx_t1_s (s)
-- ) ENGINE=MyISAM;
CREATE TABLE t3 (
id3 int
) ENGINE =InnoDB;
""")
theFile.close()
x = SchemaToMeta(fName)
theTable = x.parse()
assert(len(theTable) == 1)


def testIndices(self):
"""
Test index lines.
"""
(fd, fName) = tempfile.mkstemp()

This comment has been minimized.

Copy link
@ktlim

ktlim Mar 19, 2015

I was expecting you to simply test _retrIdxColumns() with a few self.assertEqual() lines, not test it by calling parse().

theFile = os.fdopen(fd, "w")
theFile.write("""
CREATE TABLE t (
id int,
sId bigint,
decl DOUBLE,
ampName VARCHAR(64),
xx int,
yy int,
PRIMARY KEY (id),
KEY IDX_sId (sId ASC),
INDEX IDX_d (decl DESC),
UNIQUE UQ_AmpMap_ampName(ampName),
UNIQUE UQ_x(xx DESC, yy)
);
""")
theFile.close()
x = SchemaToMeta(fName)
theTable = x.parse()
assert(theTable["t"]["indexes"][0]["columns"] == "id")
assert(theTable["t"]["indexes"][0]["type"] == "PRIMARY KEY")
assert(theTable["t"]["indexes"][1]["columns"] == "sId")
assert(theTable["t"]["indexes"][1]["type"] == "-")
assert(theTable["t"]["indexes"][2]["columns"] == "decl")
assert(theTable["t"]["indexes"][2]["type"] == "-")
assert(theTable["t"]["indexes"][3]["columns"] == "ampName")
assert(theTable["t"]["indexes"][3]["type"] == "UNIQUE")
assert(theTable["t"]["indexes"][4]["columns"] == "xx, yy")
assert(theTable["t"]["indexes"][3]["type"] == "UNIQUE")


def main():
log.basicConfig(
format='%(asctime)s %(name)s %(levelname)s: %(message)s',
datefmt='%m/%d/%Y %I:%M:%S',
level=log.DEBUG)

unittest.main()

if __name__ == "__main__":
main()

0 comments on commit e4525a8

Please sign in to comment.