In [1]:
# This should be set by Pyspark
from pyspark.sql import SparkSession
spark = SparkSession.builder.config("spark.ui.enabled", False).getOrCreate()

In [2]:
# Initialise test environment
from dapla.magics import DaplaLineageMagics
from IPython import get_ipython
ipython = get_ipython()
from dapla.jupyterextensions.authextension import AuthClient
import requests

# Provide a mock template from attached json file
import json
def lineage_template_mock(output_schema, input_schema_map):
    def mapper(x):
        return (x[0], {
            "schema": x[1]['schema'],
            "schemaType": "SPARK",
            "timestamp": x[1]['timestamp'],
        })
    request = {
        "schema": output_schema['schema'],
        "timestamp": output_schema['timestamp'],
        "schemaType": "SPARK",
        "simpleLineage": False,
        "dependencies": [dict(map(mapper, input_schema_map.items()))],
    }
#    with open('lineage-template.json', 'r') as f:
#        return json.load(f)
    response = requests.post('http://localhost:10190/lineage/template', json=request,
                             headers={
                             }, allow_redirects=False)
    return response.json()
    
# Register dapla magics manually
magics = DaplaLineageMagics(ipython, lineage_template_mock)
ipython.register_magics(magics)

In [3]:
from pyspark.sql.types import *

# Create 3 test dataframe
person_type = StructType([
    StructField('personidentifikator', StringType()),
    StructField('kontonummer', StringType())])
person_data = [
    ('1234', '12345555'),
    ('1235', '12347777'),
]
person = spark.createDataFrame(person_data, person_type)

unrelated_type = StructType([
    StructField('weird', StringType()),
    StructField('stuff', StringType())])
unrelated = spark.createDataFrame([], unrelated_type)


konto_type = StructType([
    StructField('kontonummer', StringType()),
    StructField('innskudd', IntegerType())])
konto_data = [
    ('12345555', 25000),
    ('12347777', 120000),
]
konto = spark.createDataFrame(konto_data, konto_type)

# Create a 3rd dataframe based on the other two
innskudd = person.join(konto, 'kontonummer', how='inner')

In [4]:
# This will be done automatically by spark.read.path

%lineage_input /skatt/person {person.schema.json()}
%lineage_input /skatt/konto {konto.schema.json()}
%lineage_input /skatt/unrelated {unrelated.schema.json()}

In [5]:
# This will be done automatically by spark.write.path

%lineage_output /skatt/innskudd {innskudd.schema.json()}

In [6]:
%lineage_tree

Input datasets:
 |-- /skatt/person
 |-- /skatt/konto
 |-- /skatt/unrelated
Output datasets:
 |-- /skatt/innskudd


In [7]:
# Show GUI for mapping lineage fields
%lineage_fields innskudd

Accordion(children=(VBox(children=(HTML(value='<style>.widget-checkbox-label-bold > label > span {font-weight:…

In [18]:
innskudd.lineage

{'lineage': {'name': 'spark_schema',
  'type': 'structure',
  'fields': [{'name': 'kontonummer',
    'type': 'inherited',
    'confidence': 0.9,
    'sources': [{'field': 'kontonummer',
      'path': '/skatt/person',
      'version': 1600771266157},
     {'field': 'kontonummer',
      'path': '/skatt/konto',
      'version': 1600771266158}],
    'selected': {'/skatt/person': ['kontonummer'],
     '/skatt/konto': ['kontonummer']}},
   {'name': 'personidentifikator',
    'type': 'inherited',
    'confidence': 0.9,
    'sources': [{'field': 'personidentifikator',
      'path': '/skatt/person',
      'version': 1600771266157}],
    'selected': {'/skatt/person': ['personidentifikator'],
     '/skatt/unrelated': ['weird']}},
   {'name': 'innskudd',
    'type': 'inherited',
    'confidence': 0.9,
    'sources': [{'field': 'innskudd',
      'path': '/skatt/konto',
      'version': 1600771266158}]}],
  'sources': [{'path': '/skatt/person', 'version': 1600771266157},
   {'path': '/skatt/konto', 

In [9]:
# Return simple lineage template by path
%lineage_json innskudd

{'lineage': {'name': 'spark_schema',
  'type': 'structure',
  'fields': [{'name': 'kontonummer',
    'type': 'inherited',
    'confidence': 0.9,
    'sources': [{'field': 'kontonummer',
      'path': '/skatt/person',
      'version': 1600771266157},
     {'field': 'kontonummer',
      'path': '/skatt/konto',
      'version': 1600771266158}]},
   {'name': 'personidentifikator',
    'type': 'inherited',
    'confidence': 0.9,
    'sources': [{'field': 'personidentifikator',
      'path': '/skatt/person',
      'version': 1600771266157}]},
   {'name': 'innskudd',
    'type': 'inherited',
    'confidence': 0.9,
    'sources': [{'field': 'innskudd',
      'path': '/skatt/konto',
      'version': 1600771266158}]}],
  'sources': [{'path': '/skatt/person', 'version': 1600771266157},
   {'path': '/skatt/konto', 'version': 1600771266158},
   {'path': '/skatt/unrelated', 'version': 1600771266158}]}}

In [10]:

import ipywidgets as widgets
form_item_layout = widgets.Layout(
    display='flex',
    flex_flow='row',
    justify_content='space-between'
)
form_items = []

for field in innskudd.lineage['lineage']['fields']:
    form_items.append(widgets.Box([widgets.Label(value=field['name']), 
                                   widgets.Dropdown()],
                                layout=form_item_layout))
    
    

In [11]:
innskudd.lineage['lineage']['fields']

[{'name': 'kontonummer',
  'type': 'inherited',
  'confidence': 0.9,
  'sources': [{'field': 'kontonummer',
    'path': '/skatt/person',
    'version': 1600771266157},
   {'field': 'kontonummer',
    'path': '/skatt/konto',
    'version': 1600771266158}]},
 {'name': 'personidentifikator',
  'type': 'inherited',
  'confidence': 0.9,
  'sources': [{'field': 'personidentifikator',
    'path': '/skatt/person',
    'version': 1600771266157}]},
 {'name': 'innskudd',
  'type': 'inherited',
  'confidence': 0.9,
  'sources': [{'field': 'innskudd',
    'path': '/skatt/konto',
    'version': 1600771266158}]}]

In [12]:
import pandas as pd

In [13]:

widgets.Combobox(
    # value='John',
    placeholder='Choose Someone',
    options=['Paul', 'John', 'George', 'Ringo'],
    description='Combobox:',
    ensure_option=True,
    disabled=False
)

Combobox(value='', description='Combobox:', ensure_option=True, options=('Paul', 'John', 'George', 'Ringo'), p…

In [14]:
person.schema.fields

[StructField(personidentifikator,StringType,true),
 StructField(kontonummer,StringType,true)]

In [15]:
innskudd.schema.fields[0].keys()

AttributeError: 'StructField' object has no attribute 'keys'

In [None]:
for k,v in input_datasets.items():
    print(v)

In [None]:
w = widgets.Checkbox(description='description')

In [None]:
w = widgets.Checkbox(description="Title")
w.add_class("mytext")
w

In [None]:
%%html
<style>
.mytext > .widget-label {
    font-style: italic;
    color: blue;
    font-size: 30px;
}
.mytext span {
    font-size: 50px;
    font-weight: bold;
}
</style>

In [None]:
%%html
<style>
.widget-label_bold > label > span {
    font-weight: bold;
}
</style>

In [None]:
def on_value_change(change):
    print(change)
        

w.observe(on_value_change, names='value')
w