## **Parameters/inputs/imports**

#### **Imports**

In [1]:
import pandas as pd

from ipywidgets import interact, fixed, interactive
import ipywidgets as widgets

from stadincijfers._stadincijfers import stadincijfers

from settings import UPLOAD_DIR
from util.misc import _read_excel, _read_excel_lower
from util.parse_df import Parse_df
from util.conn_pinc_data import Conn_pinc_data

#### **Parameters**

In [8]:
all_pinc_periods = False

#### **PinC upload file**

In [9]:
#filename = '20210623_Upload crimi en verkeer.xlsx'
filename = 'v2707_zorgboerderijen.xlsx'
file = UPLOAD_DIR + '/' + filename
print(file)

./upload_pinc/v2707_zorgboerderijen.xlsx


#### **Import PinC upload file for validation**

In [10]:
data_table = _read_excel_lower(file,sheet_name=0)

In [11]:
data_table.head(2)

Unnamed: 0,period,geolevel,geoitem,v2707_lb_lbexploi_zorg_lnt,v2707_lb_lbexploi_zorg_ander
0,2016,gemeente2018,11001,1,0
1,2016,gemeente2018,11002,2,5


#### **Parse data_table**

In [12]:
parsed_df = Parse_df(data_table)

In [13]:
period_list_pinc_query = parsed_df.determine_years(all_pinc_periods, _ind_id = 0)
print(f'Period list for Pinc Query: {period_list_pinc_query} \n')
var_list_pinc_query = parsed_df.determine_indicators()
print(f'Indicator list for PinC Query: {var_list_pinc_query} \n')
geolevel = parsed_df.determine_geolevel()
print(f'Geolevel for PinC Query: {geolevel}')

List of upload table years: ['2016', '2017', '2018', '2019', '2020', '2021', '2022']
Period list for Pinc Query: 2016, 2017, 2018, 2019, 2020, 2021, 2022 

Indicator list for PinC Query: v2707_lb_lbexploi_zorg_lnt ,v2707_lb_lbexploi_zorg_ander 

Geolevel for PinC Query: gemeente2018


#### **PinC Query**

In [14]:
# Instantiate object from class
pinc = stadincijfers("provincies")

In [15]:
pinc_table = pinc.selectiontableasDataframe(var_list_pinc_query, geolevel=geolevel, periodlevel='year',period=period_list_pinc_query)

In [16]:
pinc_table.head(2)

Unnamed: 0,Geo,Perioden,land- en tuinbouw,gespecialiseerd in niet land-en-tuinbouw gerelateerde activiteiten
0,Aalst,2016,5.0,3
1,Aalst,2017,5.0,1


In [17]:
if pinc_table.columns[0] != 'Geo':
    pinc_table.rename(columns={pinc_table.columns[0]: 'Geo'}, inplace = True)

In [18]:
geolevel

'gemeente2018'

## ***Interactive check framework:***

In [19]:
connection = Conn_pinc_data(pinc_table,data_table, geolevel)

#### **Plot**

In [20]:
list_vars = sorted(list(connection.cols_to_dict().keys()))
list_geos = sorted(list(connection.reversed_level_code_dict().keys())) 

In [21]:
interact(connection.draw_figure,var=list_vars,geo=list_geos, constant=fixed(0));

interactive(children=(Dropdown(description='var', options=('gespecialiseerd in niet land-en-tuinbouw gerelatee…

#### **Overall outlier analysis**

In [22]:
interact(connection.overall_outlier_analysis, pinc_year=parsed_df.pinc_q_years, upload_year = parsed_df.years, absolute = True, ignore_inf = True);

interactive(children=(Dropdown(description='pinc_year', options=('2016', '2017', '2018', '2019', '2020', '2021…

#### **Univariate outlier analysis:**

In [23]:
interact(connection.show_outliers, var=list_vars,upload_year = parsed_df.years, pinc_year = parsed_df.pinc_q_years);

interactive(children=(Dropdown(description='var', options=('gespecialiseerd in niet land-en-tuinbouw gerelatee…