In [1]:
from collections import namedtuple

DataField = namedtuple("DataField", "name dataType optype")
MiningField = namedtuple("MiningField", "name usageType")
OutputField = namedtuple("OutputField", "name feature dataType optype")
Characteristic = namedtuple("Characteristic", "name baselineScore attributes")
Attribute = namedtuple("Attribute", "reasonCode partialScore predicate")


In [2]:
data_fields = [
    DataField("role", "string", "categorical"),
    DataField("age", "integer", "continuous"),
    DataField("wage", "double", "continuous"),
    DataField("calculatedScore", "double", "continuous"),
]

schema_fields = [
    MiningField("role", None),
    MiningField("age", None),
    MiningField("wage", None),
    MiningField("calculatedScore", "predicted"),
]

output_fields = [
    OutputField("OutScore", "predictedValue", "double", "continuous"),
    OutputField("ReasonCode1", "reasonCode", "string", "categorical"),
    OutputField("ReasonCode2", "reasonCode", "string", "categorical"),
    OutputField("ReasonCode3", "reasonCode", "string", "categorical"),
]

characteristic_details = [
    Characteristic("role", "19",
                   [
                    Attribute("RoleMissing", "-9", None),
                    Attribute("RoleMRKT", "19", "== 'marketing'"),
                    Attribute("RoleENGR", "3", "== 'engineering'"),
                    Attribute("RoleBSNS", "6", "== 'business'"),
                   ]
                  ),
    Characteristic("age", "18",
                   [
                    Attribute("AgeMissing", "-1", None),
                    Attribute("AgeChild", "-3", "<= 18"),
                    Attribute("AgeYoungAdult", "0", ["> 18", "<= 29"]),
                    Attribute("AgeAdult", "12", ["> 29", "<= 39"]),
                    Attribute("AgeOlderAdult", "18", "> 39"),
                   ]
                  ),
    Characteristic("wage", "10",
                   [
                    Attribute("WageMissing", "5", None),
                    Attribute("WageLow", "26", "<= 1000"),
                    Attribute("WageMedium", "5", ["> 1000", "<= 2500"]),
                    Attribute("WageHigh", "-3", "> 2500"),
                   ]
                  ),
]

In [9]:
from lxml import etree

root = etree.Element("PMML", version="4.2", xmlns="http://www.dmg.org/PMML-4_2", )
# Header
header = etree.SubElement(root, "Header")
# Data dictionary
datadict = etree.SubElement(root, "DataDictionary")
for field in data_fields:
    etree.SubElement(datadict, "DataField", **field._asdict())
# Scorecard
scorecard = etree.SubElement(root, "Scorecard")
# Mining schema
schema = etree.SubElement(scorecard, "MiningSchema")
for name, usageType in schema_fields:
    element = etree.SubElement(schema, "MiningField", name=name)
    if usageType is not None:
        element.set("usageType", usageType)
# Output fields
output = etree.SubElement(scorecard, "Output")
for field in output_fields:
    etree.SubElement(output, "OutputField", **field._asdict())
# Characteristic scoring
opmap = {
    "<":"lessThan",
    "<=":"lessOrEqual",
    "==":"equal",
    ">=":"greaterOrEqual",
    ">":"greaterThan",
}
characteristics = etree.SubElement(scorecard, "Characteristics")
for name, baselineScore, attributes in characteristic_details:
    characteristic = etree.SubElement(characteristics, "Characteristic",
                                      name = name + "Score",
                                      reasonCode = name + "RC",
                                      baselineScore = baselineScore)
    for reasonCode, partialScore, predicate in attributes:
        attribute = etree.SubElement(characteristic, "Attribute",
                                     reasonCode = reasonCode,
                                     partialScore = partialScore)
        # Predicate of None -> isMissing
        if predicate is None:
            etree.SubElement(attribute, "SimplePredicate", field=name, operator="isMissing")
            continue
        # String -> simple comparison predicate
        try:
            rule = predicate.strip()
        except AttributeError:
            pass
        else:
            cmpop, value = rule.split()
            operator = opmap[cmpop]
            etree.SubElement(attribute, "SimplePredicate", field=name, operator=operator, value=value)
            continue
        # Otherwise -> list of simple predicates anded together
        and_group = etree.SubElement(attribute, "CompoundPredicate", booleanOperator="and")
        for rule in predicate:
            cmpop, value = rule.strip().split()
            operator = opmap[cmpop]
            etree.SubElement(and_group, "SimplePredicate", field=name, operator=operator, value=value)
            continue
            
            

In [10]:
print(etree.tostring(root, pretty_print=True, xml_declaration=True))

<?xml version='1.0' encoding='ASCII'?>
<PMML version="4.2" xmlns="http://www.dmg.org/PMML-4_2">
  <Header/>
  <DataDictionary>
    <DataField dataType="string" name="role" optype="categorical"/>
    <DataField dataType="integer" name="age" optype="continuous"/>
    <DataField dataType="double" name="wage" optype="continuous"/>
    <DataField dataType="double" name="calculatedScore" optype="continuous"/>
  </DataDictionary>
  <Scorecard>
    <MiningSchema>
      <MiningField name="role"/>
      <MiningField name="age"/>
      <MiningField name="wage"/>
      <MiningField name="calculatedScore" usageType="predicted"/>
    </MiningSchema>
    <Output>
      <OutputField dataType="double" feature="predictedValue" name="OutScore" optype="continuous"/>
      <OutputField dataType="string" feature="reasonCode" name="ReasonCode1" optype="categorical"/>
      <OutputField dataType="string" feature="reasonCode" name="ReasonCode2" optype="categorical"/>
      <OutputField dataType="string" featur