In [1]:
# This should be set by Pyspark
from pyspark.sql import SparkSession
spark = SparkSession.builder.config("spark.ui.enabled", False).getOrCreate()

In [2]:
# Initialise test environment
from dapla.magics.documentation import DaplaDocumentationMagics
from IPython import get_ipython
ipython = get_ipython()

# Provide a mock template from attached json file
def doc_template_mock(ds, use_simple):
    with open('template.json', 'r') as f:
        return f.read()

# Register dapla magics manually
magics = DaplaDocumentationMagics(ipython, doc_template_mock)
ipython.register_magics(magics)

In [3]:
# Create a test dataframe
from pyspark.sql.types import *
konto_type = StructType([
    StructField('kontonummer', StringType()),
    StructField('innskudd', IntegerType()),
    StructField('gjeld', IntegerType())])
konto_data = [
    ('12345555', 25000, 0),
    ('12347777', 120000, 500),
]

konto = spark.createDataFrame(konto_data, konto_type)

In [6]:
# Now run the magic
%document -f result.json konto

HTML(value='<b>Dataset metadata</b>')

Box(children=(Box(children=(Label(value='Name'), Text(value='konto datasett demo')), layout=Layout(display='fl…

HTML(value='<b>Instance variables</b>')

Accordion(children=(Box(children=(Box(children=(Label(value='Description'), Textarea(value='vilkårlig lang sek…

Button(description='Save to file', icon='file-code', style=ButtonStyle())

Output()

In [5]:
konto.doc

{'name': 'konto datasett demo',
 'description': 'Inneholder kontoer av forskjellig art.',
 'unitType': {'concept-type': 'UnitType',
  'selected-id': 'UnitType_DUMMY',
  'candidates': [{'id': 'some-id-could-be-guid', 'name': 'Heltall'},
   {'id': 'UnitType_DUMMY', 'name': 'UnitType_default'}]},
 'instanceVariables': [{'name': 'kontonummer',
   'description': 'vilkårlig lang sekvens av tegn inkludert aksenter og spesielle tegn fra standardiserte tegnsett',
   'identifierComponentIsComposite': False,
   'identifierComponentIsUnique': False,
   'dataStructureComponentRole': {'selected-enum': 'ENTITY',
    'enums': ['ENTITY', 'IDENTITY', 'COUNT', 'TIME', 'GEO']},
   'dataStructureComponentType': {'selected-enum': 'MEASURE',
    'enums': ['IDENTIFIER', 'MEASURE', 'ATTRIBUTE']},
   'population': {'concept-type': 'Population',
    'selected-id': 'Population_DUMMY',
    'candidates': [{'id': 'some-id-could-be-guid',
      'name': 'All families 2018-01-01'},
     {'id': 'Population_DUMMY', 'name