In [1]:
import os
import zipfile
import json
import re

In [8]:
# XML macro expand block: start[0], end[1]
list_expand = ['''<ac:structured-macro ac:macro-id="f0500336-571b-4170-813e-cafd846b76da" ac:name="expand" ac:schema-version="1">
  <ac:rich-text-body>\n''', '''\n</ac:rich-text-body>
</ac:structured-macro>''']

# XML macro code block: start[0], end[1]
list_code = ['''<ac:structured-macro ac:macro-id="52e1fb62-54c6-4b20-81a5-47d910f774ec" ac:name="code" ac:schema-version="1">
      <ac:plain-text-body><![CDATA[''', ''']]></ac:plain-text-body>
    </ac:structured-macro>''']


def create_dot_list_col(list_columns, dict_type_column = False, name_table = False):
    if dict_type_column and name_table:
        list_li_columns = []
        for col in list_columns:
            if name_table + ' ' + col in dict_type_column.keys():
                list_li_columns.append('  <li>' + col + '(' + dict_type_column[name_table + ' ' + col]['type'] + ')' + '</li>\n')
            else:
                list_li_columns.append('  <li>' + col + '</li>\n')
    else:       
        list_li_columns = ['  <li>' + x + '</li>\n' for x in list_columns]
    return '<ul>\n'+ ''.join(list_li_columns) + '</ul>\n'

In [None]:
# Set the absolute path to 'DATA' folder and print all files in there
if 'DATA' in os.listdir():
    path_to_file = os.path.join(os.getcwd(), 'DATA')
    print('Files in folder DATA', os.listdir(path_to_file))
else:
    print('Create a folder "DATA" in the same directiry as the script and move the .pbit file there')

In [None]:
# Set file Name
str_file_name = 'Dashboard.pbit'
if str_file_name[-5:].lower() != '.pbit':
    str_file_name += '.pbit'
    
print(f'Selected "{str_file_name}"')

if os.path.exists(path_to_file):
    if str_file_name in os.listdir(path_to_file):
        path_and_file = os.path.join(path_to_file, str_file_name)
    else:
        print('File not found')
else:
    print('Path not found')    

In [4]:
# Open pbit archive and loads json "DataModelSchema"
with zipfile.ZipFile(path_and_file, mode="r") as archive:
    str_DataModelSchema = archive.read("DataModelSchema").decode(encoding="utf-16")
json_DataModelSchema = json.loads(str_DataModelSchema)

print('Number of lines in DataModelSchema:' , str_DataModelSchema.count('\n'))

Number of lines in DataModelSchema: 20666


In [5]:
# Parse: Table Name, Columns Name and Source of Table
dict_table_col = {}
dict_table_source = {}
dict_measures = {}
dict_col_type = {}


for table in json_DataModelSchema['model']['tables']:
    if 'isHidden' not in table.keys():
        if 'columns' in table.keys():
            dict_table_col[table['name']] = [col['name'] for col in table['columns'] if 'isHidden' not in col.keys()]
            dict_table_source[table['name']] = table['partitions'][0]['source']['expression'][1].replace('#(lf)', '\n')
            for col in table['columns']:
                if 'type' in col.keys() and 'expression' in col.keys() and 'isHidden' not in col.keys():
                    dict_col_type[table['name'] + ' ' + col['name']] = {'type':col['type'], 'expression':''.join(col['expression'])}
            if 'measures' in table.keys():
                dict_measures[table['name']] = {measure['name']:measure['expression'] for measure in table['measures'] if 'expression' in measure.keys()}
        else: # Measure Table
            dict_measures[table['name']] = {measure['name']:measure['expression'] for measure in table['measures']}
    

In [9]:
# Make xml file for the wiki page

str_result_text = ''
str_result_text += '<h2>\n  <strong>Tables:</strong>\n</h2>\n'
for table in dict_table_col.keys():
    str_result_text += '<p>\n  <strong>' + table + '</strong>\n</p>\n'
    str_result_text += list_expand[0] + create_dot_list_col(dict_table_col[table], dict_col_type, table) 
    str_result_text += list_code[0] + dict_table_source[table] + list_code[-1] + list_expand[-1]


str_result_text += '<h2>\n  <strong>Measures:</strong>\n</h2>\n'

for table_measures in dict_measures:
    str_result_text += '<p>\n  <strong>' + table_measures + '</strong>\n</p>\n'
    str_result_text += list_expand[0]
    for mesure in dict_measures[table_measures]:
        str_result_text += mesure + '\n'
        str_result_text += list_code[0] + ''.join(list(dict_measures[table_measures][mesure])) + list_code[-1] + '\n'
    str_result_text += list_expand[-1]

str_result_text += '<h2>\n  <strong>Calculated columns:</strong>\n</h2>\n'
str_result_text += list_expand[0]

for col_name in dict_col_type:    
    str_result_text += col_name + '\n'
    str_result_text += list_code[0] + ''.join(list(dict_col_type[col_name]['expression'])) + list_code[-1] + '\n'

str_result_text += list_expand[-1]

str_result_text = str_result_text.replace('#(tab)', '    ')

str_result_text = str_result_text.replace('&', '&amp;')

# str_result_text = str_result_text.replace('<', '&lt;')

with open('wik_page.xml', 'w') as f:
    f.write(str_result_text)