In [1]:
!pip install mwparserfromhell
!pip install wptools
import re
import json
import wptools
from mwparserfromhell import parse
from mwparserfromhell.wikicode import Wikicode
from mwparserfromhell.nodes.argument import Argument
from mwparserfromhell.nodes.comment import Comment
from mwparserfromhell.nodes.external_link import ExternalLink
from mwparserfromhell.nodes.heading import Heading
from mwparserfromhell.nodes.html_entity import HTMLEntity
from mwparserfromhell.nodes.tag import Tag
from mwparserfromhell.nodes.template import Template
from mwparserfromhell.nodes.text import Text
from mwparserfromhell.nodes.wikilink import Wikilink
from mwparserfromhell.nodes.extras.parameter import Parameter

Collecting mwparserfromhell
  Downloading mwparserfromhell-0.6.4-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (176 kB)
[?25l[K     |█▉                              | 10 kB 14.3 MB/s eta 0:00:01[K     |███▊                            | 20 kB 18.6 MB/s eta 0:00:01[K     |█████▋                          | 30 kB 22.5 MB/s eta 0:00:01[K     |███████▍                        | 40 kB 23.7 MB/s eta 0:00:01[K     |█████████▎                      | 51 kB 25.8 MB/s eta 0:00:01[K     |███████████▏                    | 61 kB 27.8 MB/s eta 0:00:01[K     |█████████████                   | 71 kB 28.9 MB/s eta 0:00:01[K     |██████████████▉                 | 81 kB 20.7 MB/s eta 0:00:01[K     |████████████████▊               | 92 kB 22.1 MB/s eta 0:00:01[K     |██████████████████▌             | 102 kB 23.3 MB/s eta 0:00:01[K     |████████████████████▍           | 112 kB 23.3 MB/s eta 0:00:01[K     |██████████████████████▎       

In [2]:
def render_template(node, name, params):
  name = name.lower().strip().replace("\n", "")
  try:
    if name == "nasdaq":
      return params["1"]

    # if name == "nobold":
    #   return params["1"]
    if name == "nowrap":
      return params["1"]

    if name == "country abbreviation":
      return params["1"]

    if name == "replace":
      return params["1"]

    if name == "formatnum":
      return params["0"]

    if name == "lang":
      return params["2"]

    if name == "small":
      return params["1"]
    
    if name == "flatlist":
      return " + ' ' + ".join([params[param] for param in params])
    
    if name == "convert" or name == "cvt":
      return f"{params['1']} + ' ' + {params['2']}"
    
    if name == "increase":
      return render("▲")
    
    if name == "decrease":
      return render("▼")
    
    if name == "us$":
      return render("US$") + " + " + params["1"]
    
    if name == "plainlist":
      return " + '\n' + ".join([params[param] for param in params])
    
    if name == "break":
      return render("\n")
    
    if name == "lc":
      return "toLower(" + params["0"] + ")"
    
    if name == "nbsp":
      return render(" ")
    
    if name == "url":
      url = params['1']
      if "http" not in url:
        return f"'[' + {params['1']} + '](http://' + {params['1']} + ')'"
      else:
        return f"'[' + {params['1']} + '](' + {params['1']} + ')'" 
    
    if name == "coord":
      lat = float(params["1"][1:-1])
      lon = float(params["2"][1:-1])
      a = "N" if lat >= 0 else "S"
      b = "E" if lat >= 0 else "W"
      return f"'{lat}' + '°{a} ' + '{lon}' + '°{b}'"
    
    if name == "both":
      return f"if({params['0']} != '' && {params['1']} != '', '1', '')"
    
    if name == "#if":
      return f"if(trim({params['0']}) != '', {params['1']}, {params['2'] if '2' in params else render('')})"
    
    if name == "#replace":
      return f"replace({params['0']}, {params['1']}, {params['2']})"
    
    if name == "#ifexist":
      return params["1"]
    
    if name == "#ifeq":
      return "if(" + params["0"] + " == " + params["1"] + ", " + params["2"] + ", " + (params["3"] if "3" in params else render('')) + ")"
    
    if name == "if empty":
      return params["1"]
    
    if name == "pluralize from text":
      return render("")
    
    if name == "delink":
      return params["1"]
    
    if name == "abbr":
      return params["1"]
    
    if name == "longitem":
      return params["1"]
    
    if name == "infobox":
      return render(params)
    
    if name == "pagenamebase":
      return render("PAGENAMEBASE")
    
    if name == "br separated entries" or name == "unbulleted list" or name == "hlist":
      return " + '\n' + ".join([params[param] for param in params])
    
    if name == "comma separated entries":
      return " + ', ' + ".join([params[param] for param in params])
    
    if name == "#switch":
      if "#default" in params:
        return params["#default"]
      for param in params:
        if param != "0":
          return params[param]
    
    if name == "#invoke":
      module = params["0"].lower().replace("'", "")
      if module == "infoboximage":
        return params["image"]
      if module == "hms":
        return params["duration"]
      if module == "string":
        if params["1"] == "'replace'":
          return params["2"]
      if module == "coordinates":
          return params["2"]

  except Exception as error:
    print(f"Error ocurred in render_template({name}): {error}")

  print(f"Error: Template named '{name}' is not implemented: {params}")

  # # Fallback
  # if len(params) == 0:
  #   return render(None)

  # if len(params) == 1:
  #   for param in params:
  #     return params[param]

  return render(None)

def render(node, preserveWhitespace=True):
  if node is None:
    return render('')

  if type(node) is str:
    if preserveWhitespace:
      return "'" + node + "'"
    else:
      return "'" + node.strip() + "'"

  if type(node) is Text:
    return render(str(node))

  if type(node) is Parameter:
    return render(node.value)

  if type(node) is Template:
    param0 = []
    name = node.name.strip()
    if ":" in name:
      name = node.name.split(":")[0]
      param0 = [Parameter("0", node.name[len(name)+1:].strip())]
    params = param0 + node.params
    params = { param.name.strip(): render(param.value) for param in params }
    return render_template(node, name, params)

  if type(node) is Argument:
    name = str(node.name).replace(" ", "_").replace("-", "_")
    if name[0] in "0123456789":
      print("Illegal variable name:", name)
      name = "n" + name
    default = render(node.default)
    return f"if({name} != null, {name}, {default})"

  if type(node) is Wikicode:
    rendered_nodes = [render(node) for node in node.nodes]
    filtered_nodes = [node for node in rendered_nodes if node]
    if len(filtered_nodes) == 0:
      return render('')
    return " + ".join(filtered_nodes)

  if type(node) is Wikilink:
    return render(node.text or node.title)

  if type(node) is ExternalLink:
    return render(f"[{node.title or node.url}]({node.url})")
    
  if type(node) is Tag:
    if node.tag == "includeonly":
      return render(None)
      
    if node.tag == "br":
      return render("\n")
    if node.tag == "b":
      return render(f"**{node.contents}**")
    if node.tag == "i":
      return render(f"*{node.contents}*")
    if node.tag == "li":
      return render(f"\n * {node.contents}")
    return render(node.contents)

  if type(node) is Comment:
    return render(None)

  if type(node) is HTMLEntity:
    return render(str(node))

  print(f"Rendering for node of type {type(node)} is not implemented.")
  return str(type(node)) + " " + str(node)

def wrap_expression(node):
  return "${" + str(node) + "}"
  
def insert_component(prop, infobox, body): 
  if "above" == prop:
    title = wrap_expression(infobox[prop])
    if not title:
      title = wrap_expression("name")

    return body.append({
          "type": "Container",
          "style": "emphasis",
          "$data": title,
          "$when": "${length(trim($data)) > 0}",
          "items": [{
            "type": "TextBlock",
            "text": "${$data}",
            "style": "heading",
            "wrap": "true",
            "horizontalAlignment": "Center"
          }]
        })
      
  title = None
  if "title" == prop:
    title = wrap_expression(infobox[prop])
    if not title:
      title = wrap_expression("name")

    body.append({
          "type": "Container",
          "style": "emphasis",
          "$data": title,
          "$when": "${length(trim($data)) > 0}",
          "items": [{
            "type": "TextBlock",
            "text": "${$data}",
            "style": "heading",
            "wrap": "true",
            "horizontalAlignment": "Center"
          }]
        })
      
  subheader = None
  if re.match("^subheader[\d]*$", prop):
    subheader = wrap_expression(infobox[prop])

    body.append({
        "type": "Container",
        "$data": subheader,
        "$when": "${length(trim($data)) > 0}",
        "items": [{
          "type": "TextBlock",
          "text": "${$data}",
          "wrap": "true",
          "horizontalAlignment": "Center"
        }]
      })

  if re.match("^image[\d]*$", prop):
    url = wrap_expression(infobox[prop])
    if url:
      body.append({
          "type": "Image",
          "horizontalAlignment": "Center",
          "$data": url,
          "url": "${$data}",
          "$when": "${length(trim($data)) > 0}",
      })    
  
  if re.match("^caption[\d]*$", prop):
      text = wrap_expression(infobox[prop])
      if text:
        body.append({
            "type": "TextBlock",
            "$data": text,
            "text": "${$data}",
            "horizontalAlignment": "Center",
            "$when": "${length(trim($data)) > 0}",        
            "wrap": "true",
        })

  if re.match("^header[\d]*$", prop):
      text = wrap_expression(infobox[prop])
      body.append(
          {
              "type": "Container",
              "style": "emphasis",
              "wrap": "true",
              "$data": text, 
              "$when": "${length(trim($data)) > 0}",        
              "items": [
                {
                  "type": "TextBlock",
                  "text": "${$data}", 
                  "weight": "Bolder",
                  "horizontalAlignment": "Center",
                }
              ]
          }
      )

  label_prop = prop.replace("data", "label")
  data_prop = prop.replace("label", "data")
  if "data" in data_prop and "label" in label_prop and label_prop in infobox and data_prop in infobox:
    # Wait for label only
    if label_prop == prop:
        try:
          facts = body[len(body) - 1]["facts"]
        except:
          facts = []
          body.append(
              {
                  "type": "FactSet",
                  "facts": facts
              }
          )

        l = infobox[label_prop]
        d = infobox[data_prop]
        facts.append({ 
            "title": "${$data.title}",
            "$data": "${{'data': " + d + ", 'title': " + l + "}}",
            "value": "${$data.data}",
            "$when": "${length(trim($data.data)) > 0}"
        })
  elif "data" in prop or "label" in prop: 
    if prop == label_prop:      
      print(f"ERROR: label {prop} does not have an associated data.")
    elif prop == data_prop:
      # Add standalone data (no label)
      data = wrap_expression(infobox[prop])
      body.append(
          {
            "$data": data, 
            "$when": "${length(trim($data)) > 0}",        
            "type": "TextBlock",
            "text": "${$data}", 
            "horizontalAlignment": "Center",
          }
      )

def infobox_to_card(template, wikipedia_url=None):
  code = parse(template)
  templates = code.filter_templates(recursive=True)
  infobox_node = [template for template in templates if template.name.lower().strip() == "infobox"][0]
  print(infobox_node)
  print(infobox_node.params)
  infobox = { param.name.strip(): render(param) for param in infobox_node.params }
  body = []
  print(infobox)
  for prop in infobox:
    insert_component(prop, infobox, body)

  payload = {
      "type": "AdaptiveCard",
      "$schema": "http://adaptivecards.io/schemas/adaptive-card.json",
      "version": "1.5",
      "body": body,
      "actions": [
        {
          "type": "Action.OpenUrl",
          "title": "Wikipedia",
          "url": wikipedia_url
        }
      ]
  }

  payload_json = json.dumps(payload)
  return payload_json

def get_data_and_template(title):
  wikipedia_url = "https://en.wikipedia.org/wiki/" + title
  so = wptools.page(title).get_parse()
  box = so.data["infobox"]
  def prepare(prop):
    prop = prop.replace("'", "\\'").replace("\n", "\\n")
    a = parse(prop)
    b = render(a, preserveWhitespace=True).replace("\n", " ")
    c = eval(b)
    return c

  parsed = { prop.replace(" ", "_").replace("-", "_"): prepare(box[prop]) for prop in box }
  if "image" in so.data:
    for k, v in parsed.items():
      for i in so.data["image"]:
        if i["orig"] == v:
          parsed[k] = i["url"]
  data_json = json.dumps(parsed)

  so.data["wikitext"]
  text = so.data["wikitext"]
  i = text.lower().index("infobox")
  j = text[i:].index("|")
  infobox_template_title = text[i:i+j].strip()
  print(f"Fetching infobox template {infobox_template_title}")
  so = wptools.page('Template:' + infobox_template_title).get_parse()
  infobox_template = so.data["wikitext"]
  print(infobox_template)
  template_json = infobox_to_card(infobox_template, wikipedia_url)

  return data_json, template_json

In [69]:
title = "Milky Way" #@param {type: "string"}
title = title.replace(" ", "_")

data, template = get_data_and_template(title)

# Saving template and data to files
with open(f"{title}.template.json", "w") as f:
  f.write(template)

with open(f"{title}.data.json", "w") as f:
  f.write(data)

print("==========================")

print("Adaptive Cards template:")
print(template)

print("Adaptive Cards data:")
print(data)

print("==========================")

en.wikipedia.org (parse) Milky_Way
Milky Way (en) data
{
  image: <list(0)> 
  infobox: <dict(6)> name, epoch, constellation name, image, type, size
  iwlinks: <list(3)> https://commons.wikimedia.org/wiki/Category:M...
  pageid: 2589714
  parsetree: <str(243067)> <root><template><title>Short descriptio...
  requests: <list(1)> parse
  title: Milky Way
  wikibase: Q321
  wikidata_url: https://www.wikidata.org/wiki/Q321
  wikitext: <str(191534)> {{Short description|Barred spiral galaxy...
}
en.wikipedia.org (parse) Template:Infobox galaxy
Template:Infobox galaxy (en) data
{
  image: <list(0)> 
  infobox: <dict(41)> image, abovestyle, headerstyle, belowstyle, ...
  pageid: 970028
  parsetree: <str(15964)> <root><template><title>Infobox</title><p...
  requests: <list(1)> parse
  title: Template:Infobox galaxy
  wikibase: Q5882327
  wikidata_url: https://www.wikidata.org/wiki/Q5882327
  wikitext: <str(5557)> {{Infobox| above    = {{{name<includeonly>...
}


Fetching infobox template Infobox galaxy
{{Infobox
| above    = {{{name<includeonly>|{{PAGENAME}}</includeonly>}}}
| image    = {{#invoke:InfoboxImage|InfoboxImage|image={{{image<includeonly>|</includeonly>}}}|size={{{image_size|}}}|sizedefault=frameless|upright={{{upright|}}}|alt={{{alt|}}}}}
| caption = {{{caption<includeonly>|</includeonly>}}}
| abovestyle  = background: #ddd;
| headerstyle = background: #ddd;
| belowstyle  = background: #ddd;
| labelstyle  = background: inherit;

|header1 = Observation data {{#if:{{{epoch<includeonly>|</includeonly>}}}|({{{epoch}}} [[Epoch (astronomy)|epoch]]) }}
| label2 = Pronunciation
|  data2 = {{{pronounce<includeonly>|</includeonly>}}}

| label3 = [[Constellation]]
|  data3 = {{{constellation name<includeonly>|</includeonly>}}}
| label4 = [[Right ascension]]
|  data4 = {{{ra<includeonly>|</includeonly>}}}
| label5 = [[Declination]]
|  data5 = {{{dec<includeonly>|</includeonly>}}}

| label6 = [[Redshift]]
|  data6 = {{{z<includeonly>|</include