Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
changes to turn "Python - Single Input Output" into "ATAwP"
  • Loading branch information
nmryan committed Apr 18, 2018
1 parent d54361d commit b711bbc
Showing 1 changed file with 192 additions and 54 deletions.
246 changes: 192 additions & 54 deletions Engine.py
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -5,7 +5,72 @@


import AlteryxPythonSDK as Sdk import AlteryxPythonSDK as Sdk
import xml.etree.ElementTree as Et import xml.etree.ElementTree as Et

from collections import Counter
import nltk
nltk.download('punkt')

MALE = 'male'
FEMALE = 'female'
UNKNOWN = 'unknown'
BOTH = 'both'

MALE_WORDS = set([
'guy','spokesman','chairman',"men's",'men','him',"he's",'his',
'boy','boyfriend','boyfriends','boys','brother','brothers','dad',
'dads','dude','father','fathers','fiance','gentleman','gentlemen',
'god','grandfather','grandpa','grandson','groom','he','himself',
'husband','husbands','king','male','man','mr','nephew','nephews',
'priest','prince','son','sons','uncle','uncles','waiter','widower',
'widowers'
])

FEMALE_WORDS = set([
'heroine','spokeswoman','chairwoman',"women's",'actress','women',
"she's",'her','aunt','aunts','bride','daughter','daughters','female',
'fiancee','girl','girlfriend','girlfriends','girls','goddess',
'granddaughter','grandma','grandmother','herself','ladies','lady',
'lady','mom','moms','mother','mothers','mrs','ms','niece','nieces',
'priestess','princess','queens','she','sister','sisters','waitress',
'widow','widows','wife','wives','woman'
])

def genderize(words):
mwlen = len(MALE_WORDS.intersection(words))
fwlen = len(FEMALE_WORDS.intersection(words))

if mwlen > 0 and fwlen == 0:
return MALE
elif mwlen == 0 and fwlen > 0:
return FEMALE
elif mwlen > 0 and fwlen > 0:
return BOTH
else:
return UNKNOWN

def count_gender(sentences):
sents = Counter()
words = Counter()

for sentence in sentences:
gender = genderize(sentence)
sents[gender] += 1
words[gender] += len(sentence)

return sents, words

def parse_gender(text):
sentences = [
[word.lower() for word in nltk.word_tokenize(sentence)]
for sentence in nltk.sent_tokenize(text)
]

sents, words = count_gender(sentences)
total = sum(words.values())
return sents, words, total
#for gender, count in words.items():
#pcent = (count / total) * 100
#nsents = sents[gender]
#return "{:0.3f}% {} ({} sentences)".format(pcent, gender, nsents)


class AyxPlugin: class AyxPlugin:
""" """
Expand All @@ -28,32 +93,54 @@ def __init__(self, n_tool_id: int, alteryx_engine: object, output_anchor_mgr: ob


# Custom properties # Custom properties
self.single_input = None self.single_input = None
self.n_record_select = None
self.xml_sort_info = ''
self.do_sort = False
self.field_selection = None self.field_selection = None
self.output_anchor = None self.output_anchor = None


self.male_name = "male_score"
self.male_type = Sdk.FieldType.double
self.female_name = "female_score"
self.female_type = Sdk.FieldType.double
self.both_name = "both_score"
self.both_type = Sdk.FieldType.double
self.unknown_name = "unknown_score"
self.unknown_type = Sdk.FieldType.double

self.male_sentences_name = "male_sentences"
self.male_sentences_type = Sdk.FieldType.int64
self.female_sentences_name = "female_sentences"
self.female_sentences_type = Sdk.FieldType.int64
self.both_sentences_name = "both_sentences"
self.both_sentences_type = Sdk.FieldType.int64
self.unknown_sentences_name = "unknown_sentences"
self.unknown_sentences_type = Sdk.FieldType.int64

self.male_field = None
self.female_field = None
self.both_field = None
self.unknown_field = None

self.male_sentences_field = None
self.female_sentences_field = None
self.both_sentences_field = None
self.unknown_sentences_field = None

self.input_field = None

def pi_init(self, str_xml: str): def pi_init(self, str_xml: str):
""" """
Handles building out the sort info, to pass into pre_sort() later on, from the user configuration. Handles building out the sort info, to pass into pre_sort() later on, from the user configuration.
Called when the Alteryx engine is ready to provide the tool configuration from the GUI. Called when the Alteryx engine is ready to provide the tool configuration from the GUI.
:param str_xml: The raw XML from the GUI. :param str_xml: The raw XML from the GUI.
""" """

# Getting the user-entered selections from the GUI. # Getting the user-entered selections from the GUI.
self.n_record_select = Et.fromstring(str_xml).find('NRecords').text if 'NRecords' in str_xml else None
self.do_sort = Et.fromstring(str_xml).find('DoSort').text == 'True' if 'DoSort' in str_xml else None
if Et.fromstring(str_xml).find('FieldSelect') is not None: if Et.fromstring(str_xml).find('FieldSelect') is not None:
self.field_selection = Et.fromstring(str_xml).find('FieldSelect').text self.field_selection = Et.fromstring(str_xml).find('FieldSelect').text
order_selection = Et.fromstring(str_xml).find('OrderType').text if 'OrderType' in str_xml else None else:

self.alteryx_engine.output_message(self.n_tool_id, Sdk.EngineMessageType.error, 'Please select field to analyze')
# Letting the user know of the necessary selections, if they haven't been selected.
if self.do_sort and self.field_selection is None:
self.alteryx_engine.output_message(self.n_tool_id, Sdk.EngineMessageType.error, 'Please select field to order by')
elif self.do_sort and self.field_selection is not None:
self.build_sort_info("SortInfo", self.field_selection, order_selection) # Building out the <SortInfo> portion.


#self.alteryx_engine.output_message(self.n_tool_id, Sdk.EngineMessageType.info, self.field_selection)

self.output_anchor = self.output_anchor_mgr.get_output_anchor('Output') # Getting the output anchor from the XML file. self.output_anchor = self.output_anchor_mgr.get_output_anchor('Output') # Getting the output anchor from the XML file.


def pi_add_incoming_connection(self, str_type: str, str_name: str) -> object: def pi_add_incoming_connection(self, str_type: str, str_name: str) -> object:
Expand All @@ -65,9 +152,6 @@ def pi_add_incoming_connection(self, str_type: str, str_name: str) -> object:
:return: The IncomingInterface object(s). :return: The IncomingInterface object(s).
""" """


if self.do_sort:
self.alteryx_engine.pre_sort(str_type, str_name, self.xml_sort_info)

self.single_input = IncomingInterface(self) self.single_input = IncomingInterface(self)
return self.single_input return self.single_input


Expand Down Expand Up @@ -98,31 +182,6 @@ def pi_close(self, b_has_errors: bool):


self.output_anchor.assert_close() # Checks whether connections were properly closed. self.output_anchor.assert_close() # Checks whether connections were properly closed.


def build_sort_info(self, element: str, subelement: property, order: str):
"""
A non-interface method responsible for building out the proper XML string format for pre_sort.
:param element: SortInfo or FieldFilterList
:param subelement: The user selected field
:param order: Asc or Desc
"""

# Building the XML string to pass as an argument to pre_sort's sort info parameter.
root = Et.Element(element)
sub_element = 'Field field="{0}" order="{1}"' if order != "" else 'Field field="{0}"'
Et.SubElement(root, sub_element.format(subelement, order))
xml_string = Et.tostring(root, encoding='utf8', method='xml')
self.xml_sort_info += xml_string.decode('utf8').replace("<?xml version='1.0' encoding='utf8'?>\n", "")

def xmsg(self, msg_string: str) -> str:
"""
A non-interface, non-operational placeholder for the eventual localization of predefined user-facing strings.
:param msg_string: The user-facing string.
:return: msg_string
"""

return msg_string


class IncomingInterface: class IncomingInterface:
""" """
This optional class is returned by pi_add_incoming_connection, and it implements the incoming interface methods, to This optional class is returned by pi_add_incoming_connection, and it implements the incoming interface methods, to
Expand All @@ -140,7 +199,8 @@ def __init__(self, parent: object):
self.parent = parent self.parent = parent


# Custom properties # Custom properties
self.record_cnt = 0 self.record_copier = None
self.record_creator = None


def ii_init(self, record_info_in: object) -> bool: def ii_init(self, record_info_in: object) -> bool:
""" """
Expand All @@ -149,26 +209,104 @@ def ii_init(self, record_info_in: object) -> bool:
:return: True for success, otherwise False. :return: True for success, otherwise False.
""" """


record_info_out = record_info_in.clone() # Since no new data is being introduced, setting the outgoing layout the same as record_info_in. # Returns a new, empty RecordCreator object that is identical to record_info_in.
self.parent.output_anchor.init(record_info_out) # Lets the downstream tools know what the outgoing record metadata will look like, based on record_info_out. record_info_out = record_info_in.clone()

# Adds field to record with specified name and output type.
record_info_out.add_field(self.parent.female_name, self.parent.female_type)
record_info_out.add_field(self.parent.male_name, self.parent.male_type)
record_info_out.add_field(self.parent.both_name, self.parent.both_type)
record_info_out.add_field(self.parent.unknown_name, self.parent.unknown_type)
record_info_out.add_field(self.parent.female_sentences_name, self.parent.female_sentences_type)
record_info_out.add_field(self.parent.male_sentences_name, self.parent.male_sentences_type)
record_info_out.add_field(self.parent.both_sentences_name, self.parent.both_sentences_type)
record_info_out.add_field(self.parent.unknown_sentences_name, self.parent.unknown_sentences_type)

# Lets the downstream tools know what the outgoing record metadata will look like, based on record_info_out.
self.parent.output_anchor.init(record_info_out)

# Creating a new, empty record creator based on record_info_out's record layout.
self.record_creator = record_info_out.construct_record_creator()

# Instantiate a new instance of the RecordCopier class.
self.record_copier = Sdk.RecordCopier(record_info_out, record_info_in)

# Map each column of the input to where we want in the output.
for index in range(record_info_in.num_fields):
# Adding a field index mapping.
self.record_copier.add(index, index)

# Let record copier know that all field mappings have been added.
self.record_copier.done_adding()

# Grab the index of our new field in the record, so we don't have to do a string lookup on every push_record.
self.parent.male_field = record_info_out[record_info_out.get_field_num(self.parent.male_name)]
self.parent.female_field = record_info_out[record_info_out.get_field_num(self.parent.female_name)]
self.parent.both_field = record_info_out[record_info_out.get_field_num(self.parent.both_name)]
self.parent.unknown_field = record_info_out[record_info_out.get_field_num(self.parent.unknown_name)]
self.parent.male_sentences_field = record_info_out[record_info_out.get_field_num(self.parent.male_sentences_name)]
self.parent.female_sentences_field = record_info_out[record_info_out.get_field_num(self.parent.female_sentences_name)]
self.parent.both_sentences_field = record_info_out[record_info_out.get_field_num(self.parent.both_sentences_name)]
self.parent.unknown_sentences_field = record_info_out[record_info_out.get_field_num(self.parent.unknown_sentences_name)]

# Grab the index of our input field in the record, so we don't have to do a string lookup on every push_record.
self.parent.input_field = record_info_out[record_info_out.get_field_num(self.parent.field_selection)]

return True return True


def ii_push_record(self, in_record: object) -> bool: def ii_push_record(self, in_record: object) -> bool:
""" """
Responsible for pushing records out, under a count limit set by the user in n_record_select. Responsible for pushing records out
Called when an input record is being sent to the plugin. Called when an input record is being sent to the plugin.
:param in_record: The data for the incoming record. :param in_record: The data for the incoming record.
:return: False if method calling limit (record_cnt) is hit. :return: False if method calling limit (record_cnt) is hit.
""" """

# Copy the data from the incoming record into the outgoing record.
self.record_cnt += 1 # To keep track of the push record calls. self.record_creator.reset()

self.record_copier.copy(self.record_creator, in_record)
# Quit calling ii_push_record going forward once n_record_select limit is reached.
if self.record_cnt <= int(self.parent.n_record_select): self.parent.female_field.set_null(self.record_creator)
self.parent.output_anchor.push_record(in_record) self.parent.female_sentences_field.set_null(self.record_creator)
self.parent.output_anchor.output_record_count(False) # False: Let the Alteryx engine know of the record count self.parent.male_field.set_null(self.record_creator)
else: self.parent.male_sentences_field.set_null(self.record_creator)
self.parent.both_field.set_null(self.record_creator)
self.parent.both_sentences_field.set_null(self.record_creator)
self.parent.unknown_field.set_null(self.record_creator)
self.parent.unknown_sentences_field.set_null(self.record_creator)

if self.parent.input_field.get_as_string(in_record) is not None:
self.parent.female_field.set_from_double(self.record_creator, 0)
self.parent.female_sentences_field.set_from_int64(self.record_creator, 0)
self.parent.male_field.set_from_double(self.record_creator, 0)
self.parent.male_sentences_field.set_from_int64(self.record_creator, 0)
self.parent.both_field.set_from_double(self.record_creator, 0)
self.parent.both_sentences_field.set_from_int64(self.record_creator, 0)
self.parent.unknown_field.set_from_double(self.record_creator, 0)
self.parent.unknown_sentences_field.set_from_int64(self.record_creator, 0)
sents, words, total = parse_gender(self.parent.input_field.get_as_string(in_record))
for gender, count in words.items():
pcent = (count / total)
nsents = sents[gender]

if gender == 'female':
self.parent.female_field.set_from_double(self.record_creator, pcent)
self.parent.female_sentences_field.set_from_int64(self.record_creator, nsents)
elif gender == 'male':
self.parent.male_field.set_from_double(self.record_creator, pcent)
self.parent.male_sentences_field.set_from_int64(self.record_creator, nsents)
elif gender == 'both':
self.parent.both_field.set_from_double(self.record_creator, pcent)
self.parent.both_sentences_field.set_from_int64(self.record_creator, nsents)
elif gender == 'unknown':
self.parent.unknown_field.set_from_double(self.record_creator, pcent)
self.parent.unknown_sentences_field.set_from_int64(self.record_creator, nsents)

out_record = self.record_creator.finalize_record()

# Push the record downstream and quit if there's a downstream error.
if not self.parent.output_anchor.push_record(out_record):
return False return False

return True return True


def ii_update_progress(self, d_percent: float): def ii_update_progress(self, d_percent: float):
Expand Down

0 comments on commit b711bbc

Please sign in to comment.