### Create xcom directory

In [None]:
! mkdir -p /airflow/xcom

### Imports

In [1]:
import great_expectations as ge
import requests
import json
from pprint import pprint as pp
from io import StringIO

### Fetch styrk koder and load into ge dataframe

In [2]:
res = requests.get("https://data.adeo.no/api/nav-opendata/styrk-koder/styrk_s3.csv")

In [3]:
buffer = StringIO(res.text)

In [4]:
ge_df = ge.read_csv(buffer)

In [5]:
ge_df.head()

Unnamed: 0,code,parentCode,level,name,shortName,notes
0,0,,1,Militære yrker og uoppgitt,,
1,1,,1,Ledere,,
2,2,,1,Akademiske yrker,,
3,3,,1,Høyskoleyrker,,
4,4,,1,Kontoryrker,,


### Run validation tests

In [6]:
validation_errors = []

In [7]:
# Range check 1-4 for level column
valid_range = list(range(1,5))
res = ge_df.expect_column_values_to_be_in_set('level', valid_range)
validation_errors.append(res.to_json_dict()) if not res["success"] else print("Successful range check 1-4 for level columnRange check 1-4 for level column")

Successful range check 1-4 for level columnRange check 1-4 for level column


In [8]:
# Regex check for name column
valid_regex = "[\wæøåÆØÅ]"
res = ge_df.expect_column_values_to_match_regex('name', regex=valid_regex)
validation_errors.append(res.to_json_dict()) if not res["success"] else print("Successful regex check for name column")

Successful regex check for name column


In [9]:
# Check for valid max/min styrk code column
styrk_min = 0
styrk_max = 9000
res = ge_df.expect_column_values_to_be_between('code', min_value=styrk_min, max_value=styrk_max)
validation_errors.append(res.to_json_dict()) if not res["success"] else print("Successful check for valid max/min styrk code column")

### Print validation errors to std

In [10]:
print("Validation errors")
pp(validation_errors[0])

{'exception_info': {'exception_message': None,
                    'exception_traceback': None,
                    'raised_exception': False},
 'expectation_config': {'expectation_type': 'expect_column_values_to_be_between',
                        'kwargs': {'column': 'code',
                                   'max_value': 9000,
                                   'min_value': 0,
                                   'result_format': 'BASIC'},
                        'meta': {}},
 'meta': {},
 'result': {'element_count': 582,
            'missing_count': 0,
            'missing_percent': 0.0,
            'partial_unexpected_list': [9111,
                                        9112,
                                        9122,
                                        9123,
                                        9129,
                                        9211,
                                        9212,
                                        9213,
                                  

### Write validation errors to xcom output file

In [None]:
with open("/airflow/xcom/return.json", "w") as f:
    f.write(json.dumps(validation_errors))