## Importing Python Libraries

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import os
import bs4
from bs4 import BeautifulSoup

from pathlib import Path
import requests
import plotly.express as px
import plotly.graph_objects as go
from collections import Counter
import math
import numpy as np

# Visualisation dependencies
import textwrap
import networkx as nx
from pyvis.network import Network
import seaborn as sns

#### List of all Beautiful Soup methods

In [3]:
[m for m in dir(BeautifulSoup) if m[0].islower()]

['append',
 'childGenerator',
 'children',
 'clear',
 'css',
 'decode',
 'decode_contents',
 'decompose',
 'decomposed',
 'default',
 'descendants',
 'encode',
 'encode_contents',
 'endData',
 'extend',
 'extract',
 'fetchNextSiblings',
 'fetchParents',
 'fetchPrevious',
 'fetchPreviousSiblings',
 'find',
 'findAll',
 'findAllNext',
 'findAllPrevious',
 'findChild',
 'findChildren',
 'findNext',
 'findNextSibling',
 'findNextSiblings',
 'findParent',
 'findParents',
 'findPrevious',
 'findPreviousSibling',
 'findPreviousSiblings',
 'find_all',
 'find_all_next',
 'find_all_previous',
 'find_next',
 'find_next_sibling',
 'find_next_siblings',
 'find_parent',
 'find_parents',
 'find_previous',
 'find_previous_sibling',
 'find_previous_siblings',
 'format_string',
 'formatter_for_name',
 'get',
 'getText',
 'get_attribute_list',
 'get_text',
 'handle_data',
 'handle_endtag',
 'handle_starttag',
 'has_attr',
 'has_key',
 'index',
 'insert',
 'insert_after',
 'insert_before',
 'isSelfClosing

## Import data

In [4]:
def getXML(url):
    # request for xml document of given url
    response = requests.get(url)    
    # response will be provided in JSON format
    return response.text

In [5]:
xml_document = getXML('https://crimproject.org/mei/CRIM_Model_0019.mei')

Convert into Beautiful Soup Object:

In [6]:
soup = BeautifulSoup(xml_document, "html.parser")



## Visualizing XML as Network Graphs

In [9]:
def format_element(tag: bs4.element.Tag, wrap_length=20, exclude=[]):
    attrs_list = []
    
    for a, v in tag.attrs.items():
        if a in exclude:
            continue
        attrs_list.append(f"{a}={v}")
    
    formatted_string = f"{tag.name} ({' '.join(attrs_list)})" if attrs_list else tag.name
    
    return textwrap.fill(formatted_string, wrap_length)



def create_network(tag: bs4.element.Tag, with_attributes: bool = False, attrs_to_exclude=[]):

    all_tags = [tag] + tag.find_all()

    G = nx.DiGraph()

    for node in all_tags:
        depth = len(list(node.parents))
        G.add_node(
                   id(node), 
                   label=format_element(node, exclude=attrs_to_exclude) if with_attributes else node.name,
                   value=len(list(node.descendants)),
                   group=depth,
                   level=depth,
                   scaling={'label': {'enabled': True}},
                  )

    for node in all_tags:
        for child in node.children:
            if child.name:
                G.add_edge(id(node), id(child), 
                           arrows='to',
                       id=f"{id(node)}_{node.name}|{id(child)}_{child.name}")

    return G



def display_network(network, 
                    filename="tmp.html", 
                    width=900, 
                    height=900, 
                    bgcolor="white",
                    font_color="black",
                    notebook=True,
                   ):

    nt = Network(notebook=notebook, width=width, height=height, bgcolor=bgcolor, font_color=font_color, cdn_resources='remote')
    nt.from_nx(network)

    return nt.show(filename)

Create network of single measure:

In [10]:
measure_network = create_network(soup.find("measure", {"n": 1}))
display_network(measure_network, filename="simple_measure.html")

simple_measure.html
