In [1]:
# This should be set by Pyspark
from pyspark.sql import SparkSession
spark = SparkSession.builder.config("spark.ui.enabled", False).getOrCreate()

In [2]:
# Initialise test environment
from dapla.magics import DaplaDocumentationMagics
from dapla.magics import DaplaLineageMagics
from IPython import get_ipython
ipython = get_ipython()

# Provide a mock template from attached json file
import json
def doc_template_mock(ds, use_simple, path):
    with open('doc_template_with_smart_match.json', 'r') as f:
        return json.load(f)

def doc_template_candidates_mock(type):
    if type == "unitType":
        return [
           {
               'id': 'id-1',
               'name': 'Test 1'
           },
           {
               'id': 'UnitType_DUMMY',
               'name': 'Test 2'
           }
       ]
    if type == "representedVariable":
        return [
           {
               'id': 'RepresentedVariable_DUMMY',
               'name': 'Test 1'
           },
           {
               'id': 'id-2',
               'name': 'Test 2'
           },
           {
               'id': 'some-id-could-be-guid',
               'name': 'All families 2018'
           }
       ]
    if type == "population":
        return [
           {
               'id': 'Population_DUMMY',
               'name': 'Test 1'
           },
           {
               'id': 'id-2',
               'name': 'Test 2'
           }
       ]
    if type == "sentinelValueDomain":
        return [
           {
               'id': 'EnumeratedValueDomain_DUMMY-id',
               'name': 'EnumeratedValueDomain Test 1'
           },
           {
               'id': 'DescribedValueDomain-id1',
               'name': 'DescribedValueDomain Test 2'
           },
           {
               'id': 'DescribedValueDomain_DUMMY-id',
               'name': 'DescribedValueDomain DUMMY'
           }
       ]

    return []

def doc_enums_mock(enumType):
        if enumType == "dataStructureComponentType":
            return {
                "START_TIME": "START_TID",
                "ATTRIBUTE": "EGENSKAP",
                "MEASURE": "MÅL",
                "IDENTIFIER": "IDENTIFIKASJON",
                "STOP_TIME": "STOP_TID"
            }
        if enumType == "valuation":
            return {
                "tec-SENSITIVE":"SENSITIVE" ,
                "tec-SHIELDED": "SHIELDED, ",
                "tec-INTERNAL": "INTERNAL",
                "tec-OPEN": "OPEN"
            }

def doc_translation_mock(concept_type):
        if concept_type == "RepresentedVariable":
            return {
                "name": "Representert variabel",
                "description": "Beskrivelse av representert variabel"
            }
        return {"name": concept_type}


# Register dapla magics manually
magics_doc = DaplaDocumentationMagics(ipython, doc_template_mock, doc_template_candidates_mock, doc_enums_mock, doc_translation_mock)
magics_lineage = DaplaLineageMagics(ipython, doc_template_candidates_mock)
ipython.register_magics(magics_doc)
ipython.register_magics(magics_lineage)

In [3]:
# Create a test dataframe
from pyspark.sql.types import *
konto_type = StructType([
    StructField('kontonummer', StringType()),
    StructField('innskudd', IntegerType()),
    StructField('gjeld', IntegerType())])
konto_data = [
    ('12345555', 25000, 0),
    ('12347777', 120000, 500),
]

konto = spark.createDataFrame(konto_data, konto_type)

In [4]:
%%output
/skatt/konto

In [5]:
# Now run the magic
%document --nofile konto

VBox(children=(HTML(value='<b style="font-size:14px">Dataset metadata</b>'), Box(children=(Box(children=(Label…

In [6]:
%validate konto

HTML(value='<b>Kontonummer</b> <i>smart</i>')

HTML(value='<b>Innskudd</b> <i>not selected</i>')

HTML(value='<b>Gjeld</b> <i>ok</i>')

In [7]:
konto.doc

{'name': 'konto datasett demo',
 'description': 'Inneholder kontoer av forskjellig art.',
 'unitType': {'concept-type': 'UnitType',
  'selected-id': 'UnitType_DUMMY',
  'candidates': [{'id': 'id-1', 'name': 'Test 1'},
   {'id': 'UnitType_DUMMY', 'name': 'Test 2'}]},
 'instanceVariables': [{'name': 'kontonummer',
   'description': '',
   'smart-description': 'smart-description',
   'dataStructureComponentType': {'selected-enum': 'IDENTIFIER',
    'smart-enum': 'IDENTIFIER',
    'enums': True},
   'valuation': {'selected-enum': '', 'enums': True},
   'population': {'concept-type': 'Population',
    'selected-id': 'Population_DUMMY',
    'smart-match-id': 'Population_DUMMY',
    'candidates': [{'id': 'Population_DUMMY', 'name': 'Test 1'},
     {'id': 'id-2', 'name': 'Test 2'}]},
   'representedVariable': {'concept-type': 'RepresentedVariable',
    'selected-id': 'id-2',
    'smart-match-id': 'id-2',
    'candidates': [{'id': 'RepresentedVariable_DUMMY', 'name': 'Test 1'},
     {'id': 'id-