Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PR for review of tickets/DM-2349 #3

Merged
merged 1 commit into from
Mar 18, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
121 changes: 56 additions & 65 deletions python/lsst/metaserv/schemaToMeta.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,16 +45,20 @@ class SchemaToMeta(object):
(in cat/bin/schema_to_metadata.py).
"""

_tableStart = re.compile(r'CREATE TABLE (\w+)*')
_tableStart = re.compile(r'CREATE TABLE (\w+)')
_tableEnd = re.compile(r"\)")
_engineLine = re.compile(r'\) (ENGINE|TYPE)=(\w+)*;')
_columnLine = re.compile(r'[\s]+(\w+) ([\w\(\)]+)')
_engineLine = re.compile(r'\)\s*(ENGINE|TYPE)\s*=[\s]*(\w+)\s*;')
_columnLine = re.compile(r'\s*(\w+)\s+\w+')
_idxCols = re.compile(r'\((.+?)\)')
_unitLine = re.compile(r'<unit>(.+)</unit>')
_ucdLine = re.compile(r'<ucd>(.+)</ucd>')
_descrLine = re.compile(r'<descr>(.+)</descr>')
_descrStart = re.compile(r'<descr>(.+)')
_descrMiddle = re.compile(r'\s*--(.+)')
_descrEnd = re.compile(r'\s*--(.+)</descr>')
_descrMiddle = re.compile(r'--(.+)')
_descrEnd = re.compile(r'--(.+)</descr>')
_descrEndEmpty = re.compile(r'-- </descr>')
_commandLine = re.compile(r'\s*--')
_defaultLine = re.compile(r'\s+DEFAULT\s+(.+?)[\s,]')

def __init__(self, inputFileName):
"""
Expand All @@ -71,7 +75,8 @@ def parse(self):
"""Do actual parsing. Returns the retrieved structure as a table. The
structure of the produced table:
{ <tableName1>: {
'columns': [ { 'description': <column description>,
'columns': [ { 'defaultValue': <value>,
'description': <column description>,
'displayOrder': <value>,
'name': <value>,
'notNull': <value>,
Expand Down Expand Up @@ -100,7 +105,7 @@ def parse(self):
for line in iF:
# print "processing ", line
m = SchemaToMeta._tableStart.search(line)
if m is not None:
if m is not None and not self._isCommentLine(line):
tableName = m.group(1)
table[tableName] = {}
colNum = 1
Expand All @@ -126,7 +131,7 @@ def parse(self):
elif firstWord == "UNIQUE":
t = "UNIQUE"
idxInfo = {"type" : t,
"columns" : self._retrColumns(line)
"columns" : self._retrIdxColumns(line)
}
in_table.setdefault("indexes", []).append(idxInfo)
else:
Expand Down Expand Up @@ -189,92 +194,78 @@ def parse(self):
def _isIndexDefinition(self, c):
return c in ["PRIMARY", "KEY", "INDEX", "UNIQUE"]

def _isCommentLine(self, str):
return re.match(r'\s*--', str) is not None
def _isCommentLine(self, theString):
return SchemaToMeta._commandLine.match(theString) is not None

def _isUnitLine(self, str):
return SchemaToMeta._unitLine.search(str) is not None
def _isUnitLine(self, theString):
return SchemaToMeta._unitLine.search(theString) is not None

def _isUcdLine(self, str):
return SchemaToMeta._ucdLine.search(str) is not None
def _isUcdLine(self, theString):
return SchemaToMeta._ucdLine.search(theString) is not None

def _retrUnit(self, str):
x = SchemaToMeta._unitLine.search(str)
return x.group(1)
def _retrUnit(self, theString):
result = SchemaToMeta._unitLine.search(theString)
return result.group(1)

def _retrUcd(self, str):
x = SchemaToMeta._ucdLine.search(str)
return x.group(1)
def _retrUcd(self, theString):
result = SchemaToMeta._ucdLine.search(theString)
return result.group(1)

def _containsDescrTagStart(self, str):
return re.search(r'<descr>', str) is not None
def _containsDescrTagStart(self, theString):
return '<descr>' in theString

def _containsDescrTagEnd(self, str):
return re.search(r'</descr>', str) is not None
def _containsDescrTagEnd(self, theString):
return '</descr>' in theString

def _retrDescr(self, str):
x = SchemaToMeta._descrLine.search(str)
return x.group(1)
def _retrDescr(self, theString):
result = SchemaToMeta._descrLine.search(theString)
return result.group(1)

def _retrDescrStart(self, str):
x = SchemaToMeta._descrStart.search(str)
return x.group(1)
def _retrDescrStart(self, theString):
result = SchemaToMeta._descrStart.search(theString)
return result.group(1)

def _retrDescrMid(self, str):
x = SchemaToMeta._descrMiddle.search(str)
return x.group(1)
def _retrDescrMid(self, theString):
result = SchemaToMeta._descrMiddle.search(theString)
return result.group(1)

def _retrDescrEnd(self, str):
if re.search(r'-- </descr>', str):
def _retrDescrEnd(self, theString):
if SchemaToMeta._descrEndEmpty.search(theString):
return ''
x = SchemaToMeta._descrEnd.search(str)
return x.group(1)
result = SchemaToMeta._descrEnd.search(theString)
return result.group(1)

def _retrIsNotNull(self, str):
if re.search(r'NOT NULL', str):
return '1'
return '0'
def _retrIsNotNull(self, theString):
return 'NOT NULL' in theString

def _retrType(self, str):
arr = str.split()
def _retrType(self, theString):
arr = theString.split()
t = arr[1]
if t == "FLOAT(0)":
return "FLOAT"
return t

def _retrDefaultValue(self, str):
if ' DEFAULT ' not in str:
def _retrDefaultValue(self, theString):
if not SchemaToMeta._defaultLine.search(theString):
return None
arr = str.split()
arr = theString.split()
returnNext = 0
for a in arr:
if returnNext:
return a.rstrip(',')
if a == 'DEFAULT':
returnNext = 1

# example strings:
# " PRIMARY KEY (id),",
# " KEY IDX_sId (sId ASC),",
# " KEY IDX_d (decl DESC)",
# " UNIQUE UQ_AmpMap_ampName(ampName)"
# " UNIQUE UQ_x(xx DESC, yy),"

def _retrColumns(self, str):
xx = re.search(r'[\s\w_]+\(([\w ,]+)\)', str.rstrip())
xx = xx.group(1).split() # skip " ASC", " DESC" etc
s = ''
for x in xx:
if not x == 'ASC' and not x == 'DESC':
s += x
if x[-1] == ',':
s += ' '
return s
def _retrIdxColumns(self, theString):
colExprs = SchemaToMeta._idxCols.search(theString).group(1).split(',')
columns = [" ".join([word for word in expr.split()
if word not in ('ASC', 'DESC')]) for expr in colExprs]
return ", ".join(columns)

###############################################################################
def printIt():
x = SchemaToMeta('/home/becla/dataArchDev/repos/cat/sql/baselineSchema.sql')
t = x.parse()
sToM = SchemaToMeta('../cat/sql/baselineSchema.sql')
t = sToM.parse()
pp = pprint.PrettyPrinter(indent=2)
pp.pprint(t)

Expand Down
199 changes: 199 additions & 0 deletions tests/testSchemaToMeta.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
#!/usr/bin/env python

# LSST Data Management System
# Copyright 2015 LSST Corporation.
#
# This product includes software developed by the
# LSST Project (http://www.lsst.org/).
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the LSST License Statement and
# the GNU General Public License along with this program. If not,
# see <http://www.lsstcorp.org/LegalNotices/>.

"""
This is a unittest for the SchemaToMeta class.

@author Jacek Becla, SLAC
"""

# standard library
import logging as log
import os
import tempfile
import unittest

# useful for debugging
# import pprint
# pp = pprint.PrettyPrinter(indent=2)
# pp.pprint(theTable)

# local
from lsst.metaserv.schemaToMeta import SchemaToMeta

class TestS2M(unittest.TestCase):

def testBasics(self):
"""
Basic test: load data for two tables.
"""
(fd, fName) = tempfile.mkstemp()
theFile = os.fdopen(fd, "w")
theFile.write("""
CREATE TABLE t1
-- <descr>This is t1 table.</descr>
(
id int,
-- <descr>the t1.id</descr>
ra double DEFAULT 1,
-- <descr>right ascention</descr>
-- <ucd>pos.eq.ra</ucd>
-- <unit>deg</unit>
decl double,
-- <ucd>pos.eq.dec</ucd>
-- <unit>deg</unit>
s char DEFAULT 'x',
-- <descr>the t1.s</descr>
v varchar(255),
PRIMARY KEY pk_t1_id (id),
INDEX idx_t1_s (s)
) ENGINE=MyISAM;

CREATE TABLE t2
-- <descr>This is
-- t2 table.</descr>
(
id2 int,
-- <descr>This is a very
-- long
-- description of the
-- t2.id2.</descr>
s2 char,
-- <descr>Description for s2.
-- </descr>
v2 varchar(255)
) ENGINE = InnoDB;
""")
theFile.close()
x = SchemaToMeta(fName)
theTable = x.parse()
assert(len(theTable) == 2)
assert(len(theTable["t1"]["columns"]) == 5)
assert(theTable["t1"]["columns"][0]["name"] == "id")
assert(theTable["t1"]["columns"][0]["description"] == "the t1.id")
assert(theTable["t1"]["columns"][1]["name"] == "ra")
assert(theTable["t1"]["columns"][1]["defaultValue"] == "1")
assert(theTable["t1"]["columns"][1]["description"] == "right ascention")
assert(theTable["t1"]["columns"][1]["ucd"] == "pos.eq.ra")
assert(theTable["t1"]["columns"][1]["unit"] == "deg")
assert(theTable["t1"]["columns"][2]["name"] == "decl")
assert("description" not in theTable["t1"]["columns"][2])
assert(theTable["t1"]["columns"][2]["ucd"] == "pos.eq.dec")
assert(theTable["t1"]["columns"][2]["unit"] == "deg")
assert(theTable["t1"]["columns"][3]["name"] == "s")
assert(theTable["t1"]["columns"][3]["defaultValue"] == "'x'")
assert(theTable["t1"]["columns"][3]["description"] == "the t1.s")
assert("ucd" not in theTable["t1"]["columns"][3])
assert(theTable["t1"]["columns"][4]["name"] == "v")
assert("description" not in theTable["t1"]["columns"][4])
assert("ucd" not in theTable["t1"]["columns"][4])
assert(theTable["t1"]["description"] == "This is t1 table.")
assert(theTable["t1"]["engine"] == "MyISAM")
assert(len(theTable["t1"]["indexes"]) == 2)
assert(theTable["t1"]["indexes"][0]["columns"] == "id")
assert(theTable["t1"]["indexes"][0]["type"] == "PRIMARY KEY")
assert(theTable["t1"]["indexes"][1]["columns"] == "s")
assert(theTable["t2"]["description"] == "This is t2 table.")
assert(theTable["t2"]["columns"][0]["description"] ==
"This is a very long description of the t2.id2.")
assert(theTable["t2"]["columns"][1]["description"] == "Description for s2.")
assert(theTable["t2"]["engine"] == "InnoDB")


def testComments(self):
"""
Test commented block
"""
(fd, fName) = tempfile.mkstemp()
theFile = os.fdopen(fd, "w")
theFile.write("""
--CREATE TABLE tDummy1
-- <descr>This is dummy table 1.</descr>
--(
-- id int,
-- PRIMARY KEY pk_t1_id (id),
-- INDEX idx_t1_s (s)
--) ENGINE=MyISAM;

-- CREATE TABLE tDummy2
-- (
-- id int,
-- PRIMARY KEY pk_t1_id (id),
-- INDEX idx_t1_s (s)
-- ) ENGINE=MyISAM;

CREATE TABLE t3 (
id3 int
) ENGINE =InnoDB;
""")
theFile.close()
x = SchemaToMeta(fName)
theTable = x.parse()
assert(len(theTable) == 1)


def testIndices(self):
"""
Test index lines.
"""
(fd, fName) = tempfile.mkstemp()
theFile = os.fdopen(fd, "w")
theFile.write("""
CREATE TABLE t (
id int,
sId bigint,
decl DOUBLE,
ampName VARCHAR(64),
xx int,
yy int,
PRIMARY KEY (id),
KEY IDX_sId (sId ASC),
INDEX IDX_d (decl DESC),
UNIQUE UQ_AmpMap_ampName(ampName),
UNIQUE UQ_x(xx DESC, yy)
);
""")
theFile.close()
x = SchemaToMeta(fName)
theTable = x.parse()
assert(theTable["t"]["indexes"][0]["columns"] == "id")
assert(theTable["t"]["indexes"][0]["type"] == "PRIMARY KEY")
assert(theTable["t"]["indexes"][1]["columns"] == "sId")
assert(theTable["t"]["indexes"][1]["type"] == "-")
assert(theTable["t"]["indexes"][2]["columns"] == "decl")
assert(theTable["t"]["indexes"][2]["type"] == "-")
assert(theTable["t"]["indexes"][3]["columns"] == "ampName")
assert(theTable["t"]["indexes"][3]["type"] == "UNIQUE")
assert(theTable["t"]["indexes"][4]["columns"] == "xx, yy")
assert(theTable["t"]["indexes"][3]["type"] == "UNIQUE")


def main():
log.basicConfig(
format='%(asctime)s %(name)s %(levelname)s: %(message)s',
datefmt='%m/%d/%Y %I:%M:%S',
level=log.DEBUG)

unittest.main()

if __name__ == "__main__":
main()