Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 60 additions & 13 deletions tableaudocumentapi/datasource.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,54 @@
from tableaudocumentapi import Field
from tableaudocumentapi.multilookup_dict import MultiLookupDict

########
# This is needed in order to determine if something is a string or not. It is necessary because
# of differences between python2 (basestring) and python3 (str). If python2 support is every
# dropped, remove this and change the basestring references below to str
try:
basestring
except NameError:
basestring = str
########

def _mapping_from_xml(root_xml, column_xml):
retval = Field.from_xml(column_xml)
local_name = retval.id
if "'" in local_name:
local_name = sax.escape(local_name, {"'": "'"})
xpath = ".//metadata-record[@class='column'][local-name='{}']".format(local_name)
metadata_record = root_xml.find(xpath)
_ColumnObjectReturnTuple = collections.namedtuple('_ColumnObjectReturnTupleType', ['id', 'object'])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Namedtuples rule



def _get_metadata_xml_for_field(root_xml, field_name):
if "'" in field_name:
field_name = sax.escape(field_name, {"'": "'"})
xpath = ".//metadata-record[@class='column'][local-name='{}']".format(field_name)
return root_xml.find(xpath)


def _is_used_by_worksheet(names, field):
return any((y for y in names if y in field.worksheets))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can murder even more ()'s -- there's an implicit generator inside the fucntion call to any, so it can just be any(i for i in iterable if condition)



class FieldDictionary(MultiLookupDict):
def used_by_sheet(self, name):
# If we pass in a string, no need to get complicated, just check to see if name is in
# the field's list of worksheets
if isinstance(name, basestring):
return [x for x in self.values() if name in x.worksheets]

# if we pass in a list, we need to check to see if any of the names in the list are in
# the field's list of worksheets
return [x for x in self.values() if _is_used_by_worksheet(name, x)]


def _column_object_from_column_xml(root_xml, column_xml):
field_object = Field.from_column_xml(column_xml)
local_name = field_object.id
metadata_record = _get_metadata_xml_for_field(root_xml, local_name)
if metadata_record is not None:
retval.apply_metadata(metadata_record)
return retval.id, retval
field_object.apply_metadata(metadata_record)
return _ColumnObjectReturnTuple(field_object.id, field_object)


def _column_object_from_metadata_xml(metadata_xml):
field_object = Field.from_metadata_xml(metadata_xml)
return _ColumnObjectReturnTuple(field_object.id, field_object)


class ConnectionParser(object):
Expand Down Expand Up @@ -73,7 +110,7 @@ def __init__(self, dsxml, filename=None):

@classmethod
def from_file(cls, filename):
"Initialize datasource from file (.tds)"
"""Initialize datasource from file (.tds)"""

if zipfile.is_zipfile(filename):
dsxml = xfile.get_xml_from_archive(filename).getroot()
Expand Down Expand Up @@ -141,6 +178,16 @@ def fields(self):
return self._fields

def _get_all_fields(self):
column_objects = (_mapping_from_xml(self._datasourceTree, xml)
for xml in self._datasourceTree.findall('.//column'))
return MultiLookupDict({k: v for k, v in column_objects})
column_objects = [_column_object_from_column_xml(self._datasourceTree, xml)
for xml in self._datasourceTree.findall('.//column')]
existing_fields = [x.id for x in column_objects]
metadata_fields = (x.text
for x in self._datasourceTree.findall(".//metadata-record[@class='column']/local-name"))

missing_fields = (x for x in metadata_fields if x not in existing_fields)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking very clean!

It might help readability if we break this into some more functions to abstract out the metadata processing a bit.

In psuedocode:

_get_all_fields():
    all_the_fields = []
    all_the_fields += _get_fields_from_column_xml()
    all_the_fields += _get_weird_metadata_only_fields()

    return FieldDictionary(k: v for k, v in all_the_fields)

def _get_weird_metadata_only_fields():
    does the list comprehension

Also fine to split that into a different issue/PR

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll do that as a second PR since I'm about to hit a string of meetings and won't be able to get that change checked in before I have to leave.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we start using the 🚀 emoji to mean "good to merge"?

column_objects.extend((
_column_object_from_metadata_xml(_get_metadata_xml_for_field(self._datasourceTree, field_name))
for field_name in missing_fields
))

return FieldDictionary({k: v for k, v in column_objects})
63 changes: 54 additions & 9 deletions tableaudocumentapi/field.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@
'aggregation', # The type of aggregation on the field (e.g Sum, Avg)
]

_METADATA_TO_FIELD_MAP = [
('local-name', 'id'),
('local-type', 'datatype'),
('remote-alias', 'alias')
]


def _find_metadata_record(record, attrib):
element = record.find('.//{}'.format(attrib))
Expand All @@ -25,25 +31,60 @@ def _find_metadata_record(record, attrib):
class Field(object):
""" Represents a field in a datasource """

def __init__(self, xmldata):
for attrib in _ATTRIBUTES:
self._apply_attribute(xmldata, attrib, lambda x: xmldata.attrib.get(x, None))
def __init__(self, column_xml=None, metadata_xml=None):

# All metadata attributes begin at None
# Initialize all the possible attributes
for attrib in _ATTRIBUTES:
setattr(self, '_{}'.format(attrib), None)
for attrib in _METADATA_ATTRIBUTES:
setattr(self, '_{}'.format(attrib), None)
self._worksheets = set()

if column_xml is not None:
self._initialize_from_column_xml(column_xml)
if metadata_xml is not None:
self.apply_metadata(metadata_xml)

elif metadata_xml is not None:
self._initialize_from_metadata_xml(metadata_xml)

else:
raise AttributeError('column_xml or metadata_xml needed to initialize field')

def _initialize_from_column_xml(self, xmldata):
for attrib in _ATTRIBUTES:
self._apply_attribute(xmldata, attrib, lambda x: xmldata.attrib.get(x, None))

def _initialize_from_metadata_xml(self, xmldata):
for metadata_name, field_name in _METADATA_TO_FIELD_MAP:
self._apply_attribute(xmldata, field_name, lambda x: xmldata.find('.//{}'.format(metadata_name)).text,
read_name=metadata_name)
self.apply_metadata(xmldata)

########################################
# Special Case methods for construction fields from various sources
# not intended for client use
########################################
def apply_metadata(self, metadata_record):
for attrib in _METADATA_ATTRIBUTES:
self._apply_attribute(metadata_record, attrib, functools.partial(_find_metadata_record, metadata_record))

def add_used_in(self, name):
self._worksheets.add(name)

@classmethod
def from_xml(cls, xmldata):
return cls(xmldata)
def from_column_xml(cls, xmldata):
return cls(column_xml=xmldata)

def _apply_attribute(self, xmldata, attrib, default_func):
if hasattr(self, '_read_{}'.format(attrib)):
value = getattr(self, '_read_{}'.format(attrib))(xmldata)
@classmethod
def from_metadata_xml(cls, xmldata):
return cls(metadata_xml=xmldata)

def _apply_attribute(self, xmldata, attrib, default_func, read_name=None):
if read_name is None:
read_name = attrib
if hasattr(self, '_read_{}'.format(read_name)):
value = getattr(self, '_read_{}'.format(read_name))(xmldata)
else:
value = default_func(attrib)

Expand Down Expand Up @@ -121,6 +162,10 @@ def default_aggregation(self):
""" The default type of aggregation on the field (e.g Sum, Avg)"""
return self._aggregation

@property
def worksheets(self):
return list(self._worksheets)

######################################
# Special Case handling methods for reading the values from the XML
######################################
Expand Down
60 changes: 52 additions & 8 deletions tableaudocumentapi/workbook.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,12 @@
###############################################################################
import os
import zipfile
import weakref

import xml.etree.ElementTree as ET

from tableaudocumentapi import Datasource, xfile

###########################################################################
#
# Utility Functions
#
###########################################################################


class Workbook(object):
"""
Expand All @@ -33,6 +28,7 @@ def __init__(self, filename):
Constructor.

"""

self._filename = filename

# Determine if this is a twb or twbx and get the xml root
Expand All @@ -47,13 +43,26 @@ def __init__(self, filename):
self._datasources = self._prepare_datasources(
self._workbookRoot) # self.workbookRoot.find('datasources')

self._datasource_index = self._prepare_datasource_index(self._datasources)

self._worksheets = self._prepare_worksheets(
self._workbookRoot, self._datasource_index
)

###########
# datasources
###########
@property
def datasources(self):
return self._datasources

###########
# worksheets
###########
@property
def worksheets(self):
return self._worksheets

###########
# filename
###########
Expand Down Expand Up @@ -95,12 +104,47 @@ def save_as(self, new_filename):
# Private API.
#
###########################################################################
def _prepare_datasources(self, xmlRoot):
@staticmethod
def _prepare_datasource_index(datasources):
retval = weakref.WeakValueDictionary()
for datasource in datasources:
retval[datasource.name] = datasource

return retval

@staticmethod
def _prepare_datasources(xml_root):
datasources = []

# loop through our datasources and append
for datasource in xmlRoot.find('datasources'):
datasource_elements = xml_root.find('datasources')
if datasource_elements is None:
return []

for datasource in datasource_elements:
ds = Datasource(datasource)
datasources.append(ds)

return datasources

@staticmethod
def _prepare_worksheets(xml_root, ds_index):
worksheets = []
worksheets_element = xml_root.find('.//worksheets')
Copy link
Contributor

@t8y8 t8y8 Jul 19, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But here it does return None is there's no match (shrug)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

okay I wrote a test for "an empty workbook should not throw errors" to catch all of the places where find returns None but we are assuming that it can't be None, and removed the extraneous check for None in the case of dependencies.

the difference is find returns None when nothing is found, but findall returns an empty list, which we don't need to check since iterating over an empty list is a nop anyway.

if worksheets_element is None:
return worksheets

for worksheet_element in worksheets_element:
worksheet_name = worksheet_element.attrib['name']
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just checking, given our XML's usual crazyness, the name is actually the name? :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yup, this is one place where name = name = what's in the dang ui.

worksheets.append(worksheet_name) # TODO: A real worksheet object, for now, only name

dependencies = worksheet_element.findall('.//datasource-dependencies')

for dependency in dependencies:
datasource_name = dependency.attrib['datasource']
datasource = ds_index[datasource_name]
for column in dependency.findall('.//column'):
column_name = column.attrib['name']
datasource.fields[column_name].add_used_in(worksheet_name)

return worksheets
23 changes: 22 additions & 1 deletion test/assets/TABLEAU_10_TWB.twb
Original file line number Diff line number Diff line change
@@ -1 +1,22 @@
<?xml version='1.0' encoding='utf-8' ?><workbook source-build='0.0.0 (0000.16.0510.1300)' source-platform='mac' version='10.0' xmlns:user='http://www.tableausoftware.com/xml/user'><datasources><datasource caption='xy+ (Multiple Connections)' inline='true' name='federated.1s4nxn20cywkdv13ql0yk0g1mpdx' version='10.0'><connection class='federated'><named-connections><named-connection caption='mysql55.test.tsi.lan' name='mysql.1ewmkrw0mtgsev1dnurma1blii4x'><connection class='mysql' dbname='testv1' odbc-native-protocol='yes' port='3306' server='mysql55.test.tsi.lan' source-charset='' username='test' /></named-connection><named-connection caption='mssql2012.test.tsi.lan' name='sqlserver.1erdwp01uqynlb14ul78p0haai2r'><connection authentication='sqlserver' class='sqlserver' dbname='TestV1' odbc-native-protocol='yes' one-time-sql='' server='mssql2012.test.tsi.lan' username='test' /></named-connection></named-connections></connection></datasource></datasources></workbook>
<?xml version='1.0' encoding='utf-8' ?>
<workbook source-build='0.0.0 (0000.16.0510.1300)' source-platform='mac' version='10.0'
xmlns:user='http://www.tableausoftware.com/xml/user'>
<datasources>
<datasource caption='xy+ (Multiple Connections)' inline='true' name='federated.1s4nxn20cywkdv13ql0yk0g1mpdx'
version='10.0'>
<connection class='federated'>
<named-connections>
<named-connection caption='mysql55.test.tsi.lan' name='mysql.1ewmkrw0mtgsev1dnurma1blii4x'>
<connection class='mysql' dbname='testv1' odbc-native-protocol='yes' port='3306'
server='mysql55.test.tsi.lan' source-charset='' username='test'/>
</named-connection>
<named-connection caption='mssql2012.test.tsi.lan' name='sqlserver.1erdwp01uqynlb14ul78p0haai2r'>
<connection authentication='sqlserver' class='sqlserver' dbname='TestV1'
odbc-native-protocol='yes' one-time-sql='' server='mssql2012.test.tsi.lan'
username='test'/>
</named-connection>
</named-connections>
</connection>
</datasource>
</datasources>
</workbook>
Loading