-
Notifications
You must be signed in to change notification settings - Fork 182
fixes#47 Implement ability to query fields used on a worksheet #54
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
c67766e
c30d741
49077dc
4458637
156a3cc
110f34a
544cb69
c174e8c
69eae9a
a62b6b2
8822bc5
96c1e8d
3f999e1
f5711cb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,17 +12,54 @@ | |
from tableaudocumentapi import Field | ||
from tableaudocumentapi.multilookup_dict import MultiLookupDict | ||
|
||
######## | ||
# This is needed in order to determine if something is a string or not. It is necessary because | ||
# of differences between python2 (basestring) and python3 (str). If python2 support is every | ||
# dropped, remove this and change the basestring references below to str | ||
try: | ||
basestring | ||
except NameError: | ||
basestring = str | ||
######## | ||
|
||
def _mapping_from_xml(root_xml, column_xml): | ||
retval = Field.from_xml(column_xml) | ||
local_name = retval.id | ||
if "'" in local_name: | ||
local_name = sax.escape(local_name, {"'": "'"}) | ||
xpath = ".//metadata-record[@class='column'][local-name='{}']".format(local_name) | ||
metadata_record = root_xml.find(xpath) | ||
_ColumnObjectReturnTuple = collections.namedtuple('_ColumnObjectReturnTupleType', ['id', 'object']) | ||
|
||
|
||
def _get_metadata_xml_for_field(root_xml, field_name): | ||
if "'" in field_name: | ||
field_name = sax.escape(field_name, {"'": "'"}) | ||
xpath = ".//metadata-record[@class='column'][local-name='{}']".format(field_name) | ||
return root_xml.find(xpath) | ||
|
||
|
||
def _is_used_by_worksheet(names, field): | ||
return any((y for y in names if y in field.worksheets)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can murder even more ()'s -- there's an implicit generator inside the fucntion call to any, so it can just be |
||
|
||
|
||
class FieldDictionary(MultiLookupDict): | ||
def used_by_sheet(self, name): | ||
# If we pass in a string, no need to get complicated, just check to see if name is in | ||
# the field's list of worksheets | ||
if isinstance(name, basestring): | ||
return [x for x in self.values() if name in x.worksheets] | ||
|
||
# if we pass in a list, we need to check to see if any of the names in the list are in | ||
# the field's list of worksheets | ||
return [x for x in self.values() if _is_used_by_worksheet(name, x)] | ||
|
||
|
||
def _column_object_from_column_xml(root_xml, column_xml): | ||
field_object = Field.from_column_xml(column_xml) | ||
local_name = field_object.id | ||
metadata_record = _get_metadata_xml_for_field(root_xml, local_name) | ||
if metadata_record is not None: | ||
retval.apply_metadata(metadata_record) | ||
return retval.id, retval | ||
field_object.apply_metadata(metadata_record) | ||
return _ColumnObjectReturnTuple(field_object.id, field_object) | ||
|
||
|
||
def _column_object_from_metadata_xml(metadata_xml): | ||
field_object = Field.from_metadata_xml(metadata_xml) | ||
return _ColumnObjectReturnTuple(field_object.id, field_object) | ||
|
||
|
||
class ConnectionParser(object): | ||
|
@@ -73,7 +110,7 @@ def __init__(self, dsxml, filename=None): | |
|
||
@classmethod | ||
def from_file(cls, filename): | ||
"Initialize datasource from file (.tds)" | ||
"""Initialize datasource from file (.tds)""" | ||
|
||
if zipfile.is_zipfile(filename): | ||
dsxml = xfile.get_xml_from_archive(filename).getroot() | ||
|
@@ -141,6 +178,16 @@ def fields(self): | |
return self._fields | ||
|
||
def _get_all_fields(self): | ||
column_objects = (_mapping_from_xml(self._datasourceTree, xml) | ||
for xml in self._datasourceTree.findall('.//column')) | ||
return MultiLookupDict({k: v for k, v in column_objects}) | ||
column_objects = [_column_object_from_column_xml(self._datasourceTree, xml) | ||
for xml in self._datasourceTree.findall('.//column')] | ||
existing_fields = [x.id for x in column_objects] | ||
metadata_fields = (x.text | ||
for x in self._datasourceTree.findall(".//metadata-record[@class='column']/local-name")) | ||
|
||
missing_fields = (x for x in metadata_fields if x not in existing_fields) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looking very clean! It might help readability if we break this into some more functions to abstract out the metadata processing a bit. In psuedocode:
Also fine to split that into a different issue/PR There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll do that as a second PR since I'm about to hit a string of meetings and won't be able to get that change checked in before I have to leave. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we start using the 🚀 emoji to mean "good to merge"? |
||
column_objects.extend(( | ||
_column_object_from_metadata_xml(_get_metadata_xml_for_field(self._datasourceTree, field_name)) | ||
for field_name in missing_fields | ||
)) | ||
|
||
return FieldDictionary({k: v for k, v in column_objects}) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,17 +5,12 @@ | |
############################################################################### | ||
import os | ||
import zipfile | ||
import weakref | ||
|
||
import xml.etree.ElementTree as ET | ||
|
||
from tableaudocumentapi import Datasource, xfile | ||
|
||
########################################################################### | ||
# | ||
# Utility Functions | ||
# | ||
########################################################################### | ||
|
||
|
||
class Workbook(object): | ||
""" | ||
|
@@ -33,6 +28,7 @@ def __init__(self, filename): | |
Constructor. | ||
|
||
""" | ||
|
||
self._filename = filename | ||
|
||
# Determine if this is a twb or twbx and get the xml root | ||
|
@@ -47,13 +43,26 @@ def __init__(self, filename): | |
self._datasources = self._prepare_datasources( | ||
self._workbookRoot) # self.workbookRoot.find('datasources') | ||
|
||
self._datasource_index = self._prepare_datasource_index(self._datasources) | ||
|
||
self._worksheets = self._prepare_worksheets( | ||
self._workbookRoot, self._datasource_index | ||
) | ||
|
||
########### | ||
# datasources | ||
########### | ||
@property | ||
def datasources(self): | ||
return self._datasources | ||
|
||
########### | ||
# worksheets | ||
########### | ||
@property | ||
def worksheets(self): | ||
return self._worksheets | ||
|
||
########### | ||
# filename | ||
########### | ||
|
@@ -95,12 +104,47 @@ def save_as(self, new_filename): | |
# Private API. | ||
# | ||
########################################################################### | ||
def _prepare_datasources(self, xmlRoot): | ||
@staticmethod | ||
def _prepare_datasource_index(datasources): | ||
retval = weakref.WeakValueDictionary() | ||
for datasource in datasources: | ||
retval[datasource.name] = datasource | ||
|
||
return retval | ||
|
||
@staticmethod | ||
def _prepare_datasources(xml_root): | ||
datasources = [] | ||
|
||
# loop through our datasources and append | ||
for datasource in xmlRoot.find('datasources'): | ||
datasource_elements = xml_root.find('datasources') | ||
if datasource_elements is None: | ||
return [] | ||
|
||
for datasource in datasource_elements: | ||
ds = Datasource(datasource) | ||
datasources.append(ds) | ||
|
||
return datasources | ||
|
||
@staticmethod | ||
def _prepare_worksheets(xml_root, ds_index): | ||
worksheets = [] | ||
worksheets_element = xml_root.find('.//worksheets') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But here it does return None is there's no match (shrug) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. okay I wrote a test for "an empty workbook should not throw errors" to catch all of the places where find returns None but we are assuming that it can't be None, and removed the extraneous check for None in the case of dependencies. the difference is find returns None when nothing is found, but findall returns an empty list, which we don't need to check since iterating over an empty list is a nop anyway. |
||
if worksheets_element is None: | ||
return worksheets | ||
|
||
for worksheet_element in worksheets_element: | ||
worksheet_name = worksheet_element.attrib['name'] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just checking, given our XML's usual crazyness, the name is actually the name? :) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yup, this is one place where name = name = what's in the dang ui. |
||
worksheets.append(worksheet_name) # TODO: A real worksheet object, for now, only name | ||
|
||
dependencies = worksheet_element.findall('.//datasource-dependencies') | ||
|
||
for dependency in dependencies: | ||
datasource_name = dependency.attrib['datasource'] | ||
datasource = ds_index[datasource_name] | ||
for column in dependency.findall('.//column'): | ||
column_name = column.attrib['name'] | ||
datasource.fields[column_name].add_used_in(worksheet_name) | ||
|
||
return worksheets |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,22 @@ | ||
<?xml version='1.0' encoding='utf-8' ?><workbook source-build='0.0.0 (0000.16.0510.1300)' source-platform='mac' version='10.0' xmlns:user='http://www.tableausoftware.com/xml/user'><datasources><datasource caption='xy+ (Multiple Connections)' inline='true' name='federated.1s4nxn20cywkdv13ql0yk0g1mpdx' version='10.0'><connection class='federated'><named-connections><named-connection caption='mysql55.test.tsi.lan' name='mysql.1ewmkrw0mtgsev1dnurma1blii4x'><connection class='mysql' dbname='testv1' odbc-native-protocol='yes' port='3306' server='mysql55.test.tsi.lan' source-charset='' username='test' /></named-connection><named-connection caption='mssql2012.test.tsi.lan' name='sqlserver.1erdwp01uqynlb14ul78p0haai2r'><connection authentication='sqlserver' class='sqlserver' dbname='TestV1' odbc-native-protocol='yes' one-time-sql='' server='mssql2012.test.tsi.lan' username='test' /></named-connection></named-connections></connection></datasource></datasources></workbook> | ||
<?xml version='1.0' encoding='utf-8' ?> | ||
<workbook source-build='0.0.0 (0000.16.0510.1300)' source-platform='mac' version='10.0' | ||
xmlns:user='http://www.tableausoftware.com/xml/user'> | ||
<datasources> | ||
<datasource caption='xy+ (Multiple Connections)' inline='true' name='federated.1s4nxn20cywkdv13ql0yk0g1mpdx' | ||
version='10.0'> | ||
<connection class='federated'> | ||
<named-connections> | ||
<named-connection caption='mysql55.test.tsi.lan' name='mysql.1ewmkrw0mtgsev1dnurma1blii4x'> | ||
<connection class='mysql' dbname='testv1' odbc-native-protocol='yes' port='3306' | ||
server='mysql55.test.tsi.lan' source-charset='' username='test'/> | ||
</named-connection> | ||
<named-connection caption='mssql2012.test.tsi.lan' name='sqlserver.1erdwp01uqynlb14ul78p0haai2r'> | ||
<connection authentication='sqlserver' class='sqlserver' dbname='TestV1' | ||
odbc-native-protocol='yes' one-time-sql='' server='mssql2012.test.tsi.lan' | ||
username='test'/> | ||
</named-connection> | ||
</named-connections> | ||
</connection> | ||
</datasource> | ||
</datasources> | ||
</workbook> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Namedtuples rule