In [1]:
import os
import json
import numpy as np
import pandas as pd
import plotly.express as px
from scipy.stats import zscore

import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import dash_bootstrap_components as dbc

In [2]:
with open('./geo/wojewodztwa-min.geojson', 'r', encoding="utf8") as json_file:
    geojson = json.load(json_file)
v_id = pd.DataFrame([v['properties'] for v in geojson['features']])
v_id['nazwa'] = v_id['nazwa'].str.upper()

In [3]:
df = pd.read_csv('./data/ceidg_data_classif_cleaned.csv')

In [4]:
df.sample(4)

Unnamed: 0.1,Unnamed: 0,RandomDate,MonthOfStartingOfTheBusiness,QuarterOfStartingOfTheBusiness,MainAddressVoivodeship,MainAddressCounty,MainAddressTERC,CorrespondenceAddressVoivodeship,CorrespondenceAddressCounty,CorrespondenceAddressTERC,...,ShareholderInOtherCompanies,PKDMainSection,PKDMainDivision,PKDMainGroup,PKDMainClass,NoOfUniquePKDSections,NoOfUniquePKDDivsions,NoOfUniquePKDGroups,NoOfUniquePKDClasses,Target
1679543,1679543,2018-06-01,May,2,LUBELSKIE,RADZYŃSKI,615022.0,LUBELSKIE,RADZYŃSKI,615022.0,...,False,I,55.0,552.0,5520.0,1,1,1,1,False
1697563,1697563,2018-09-05,May,2,MAZOWIECKIE,WĘGROWSKI,1433062.0,MAZOWIECKIE,WĘGROWSKI,1433062.0,...,False,C,16.0,162.0,1623.0,2,2,6,12,False
1392693,1392693,2018-04-02,July,3,MAZOWIECKIE,PRZASNYSKI,1422042.0,MAZOWIECKIE,PRZASNYSKI,1422042.0,...,False,F,41.0,411.0,4110.0,1,2,4,9,False
2171667,2171667,2018-10-05,September,3,,,,ŚLĄSKIE,BIELSKO-BIAŁA,2461011.0,...,False,G,47.0,477.0,4779.0,1,1,1,1,True


In [5]:
data = df[['MainAddressVoivodeship', 'PKDMainSection']]

In [6]:
data[data['PKDMainSection'] == 'M'].groupby('MainAddressVoivodeship').size().to_frame('size').reset_index()

Unnamed: 0,MainAddressVoivodeship,size
0,DOLNOŚLĄSKIE,23281
1,KUJAWSKO-POMORSKIE,10904
2,LUBELSKIE,10328
3,LUBUSKIE,5303
4,MAZOWIECKIE,70494
5,MAŁOPOLSKIE,24967
6,OPOLSKIE,4875
7,PODKARPACKIE,10179
8,PODLASKIE,6224
9,POMORSKIE,19736


In [7]:
data = df[['MainAddressVoivodeship', 'MainAddressCounty', 'PKDMainSection']]
matrix = data.groupby(['MainAddressVoivodeship','PKDMainSection']).size().unstack(fill_value=0)
matrix_proportions = matrix.div(matrix.sum(axis=1), axis=0)
normalized = matrix_proportions.apply(zscore)
normalized['Max'] = normalized.idxmax(axis=1)
print(normalized['Max'])

normalized_absolute = matrix.apply(zscore)
# mało działalności z T, więc pomijamy, bo wywala w kosmos Z Score jak już coś jest
normalized_absolute['Max'] = normalized_absolute.iloc[:,:-1].idxmax(axis=1)
print(normalized_absolute['Max'])

MainAddressVoivodeship
DOLNOŚLĄSKIE           K
KUJAWSKO-POMORSKIE     D
LUBELSKIE              B
LUBUSKIE               S
MAZOWIECKIE            M
MAŁOPOLSKIE            R
OPOLSKIE               K
PODKARPACKIE           B
PODLASKIE              A
POMORSKIE              O
WARMIŃSKO-MAZURSKIE    Q
WIELKOPOLSKIE          N
ZACHODNIOPOMORSKIE     I
ŁÓDZKIE                G
ŚLĄSKIE                K
ŚWIĘTOKRZYSKIE         E
Name: Max, dtype: object
MainAddressVoivodeship
DOLNOŚLĄSKIE           F
KUJAWSKO-POMORSKIE     D
LUBELSKIE              B
LUBUSKIE               O
MAZOWIECKIE            J
MAŁOPOLSKIE            F
OPOLSKIE               O
PODKARPACKIE           B
PODLASKIE              A
POMORSKIE              O
WARMIŃSKO-MAZURSKIE    A
WIELKOPOLSKIE          A
ZACHODNIOPOMORSKIE     I
ŁÓDZKIE                B
ŚLĄSKIE                C
ŚWIĘTOKRZYSKIE         E
Name: Max, dtype: object


In [None]:
with open('./geo/wojewodztwa-min.geojson', 'r', encoding="utf8") as json_file:
    geojson_voivodeships = json.load(json_file)

with open('./geo/powiaty-min.geojson', 'r', encoding="utf8") as json_file:
    geojson_counties = json.load(json_file)

data = df[['MainAddressVoivodeship', 'MainAddressCounty', 'PKDMainSection']]
sections = sorted([x for x in data['PKDMainSection'].unique() if str(x) != 'nan'])
section_options = [dict(label=x, value=x) for x in sections]

app = dash.Dash(
    __name__, external_stylesheets=[dbc.themes.BOOTSTRAP]
)


controls = dbc.Card([
        dbc.FormGroup(
            [ 
                dbc.Label("Podział"),
                dcc.Dropdown(id="Podział", value='voivodeships', options=[
                    {'label': 'Województwa', 'value': 'voivodeships'}, {'label': 'Powiaty', 'value': 'counties'}
                ])
            ]
        ),
        dbc.FormGroup(
            [ 
                dbc.Label("Sekcja"),
                dcc.Dropdown(id="Sekcja", options=section_options)
            ]
        )
    ],
    body=True)


app.layout = dbc.Container(
    [
        html.H1("Charakterystyka przestrzenna działalności gospodarczej"),
        html.Hr(),
        dbc.Row(
            [
                dbc.Col(controls, md=4),
            ],
            align="left"
        ),
        dbc.Row(
            [
                dbc.Col(dcc.Graph(id="graph"), md=12),
            ],
            align="center"
        )
    ]
)


@app.callback(Output("graph", "figure"), [Input("Sekcja", "value"), Input("Podział", "value")])
def make_figure(section, division):
    if division is None or division == "voivodeships":
        if section is None:
            v_size = data.groupby('MainAddressVoivodeship').size().to_frame('size').reset_index()
        else:
            v_size = data[data['PKDMainSection'] == section].groupby('MainAddressVoivodeship').size().to_frame('size').reset_index()
        geojson = geojson_voivodeships
        v_id = pd.DataFrame([v['properties'] for v in geojson['features']])
        v_id['nazwa'] = v_id['nazwa'].str.upper()
        map_data = pd.merge(v_size, v_id.set_index('nazwa'), left_on='MainAddressVoivodeship', right_index=True)
        fig = px.choropleth(map_data, geojson=geojson, color="size", locations="id", featureidkey="properties.id", projection="mercator", color_continuous_scale="peach", 
                    labels={'id': 'id województwa', 'size': 'liczba zarejestrowanych działalności'}, hover_name="MainAddressVoivodeship", hover_data=['size'],
                           height=800)
    else:
        if section is None:
            v_size = data.groupby('MainAddressCounty').size().to_frame('size').reset_index()
        else:
            v_size = data[data['PKDMainSection'] == section].groupby('MainAddressCounty').size().to_frame('size').reset_index()
        geojson = geojson_counties
        v_id = pd.DataFrame([v['properties'] for v in geojson_counties['features']])
        v_id['nazwa'] = v_id['nazwa'].str[7:]
        v_id['nazwa'] = v_id['nazwa'].str.upper()
        map_data = pd.merge(v_size, v_id.set_index('nazwa'), left_on='MainAddressCounty', right_index=True)
        fig = px.choropleth(map_data, geojson=geojson, color="size", locations="id", featureidkey="properties.id", projection="mercator", color_continuous_scale="peach", 
                            labels={'id': 'id powiatu', 'size': 'liczba zarejestrowanych działalności'}, hover_name="MainAddressCounty", hover_data=['size'],
                           height=800)
    fig.update_geos(fitbounds="locations", visible=False, lataxis_range=[50,60], lonaxis_range=[0, 30])
    return fig

app.run_server()

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://127.0.0.1:8050/ (Press CTRL+C to quit)
127.0.0.1 - - [11/May/2020 20:09:50] "[37mGET / HTTP/1.1[0m" 200 -
127.0.0.1 - - [11/May/2020 20:09:50] "[37mGET /_dash-component-suites/dash_core_components/dash_core_components-shared.js.map HTTP/1.1[0m" 200 -
127.0.0.1 - - [11/May/2020 20:09:50] "[37mGET /_dash-component-suites/dash_core_components/dash_core_components.min.js.map HTTP/1.1[0m" 200 -
127.0.0.1 - - [11/May/2020 20:09:50] "[37mGET /_dash-component-suites/dash_html_components/dash_html_components.min.js.map HTTP/1.1[0m" 200 -
127.0.0.1 - - [11/May/2020 20:09:50] "[37mGET /_dash-layout HTTP/1.1[0m" 200 -
127.0.0.1 - - [11/May/2020 20:09:50] "[37mGET /_dash-dependencies HTTP/1.1[0m" 200 -
127.0.0.1 - - [11/May/2020 20:09:50] "[37mGET /_favicon.ico?v=1.12.0 HTTP/1.1[0m" 200 -
127.0.0.1 - - [11/May/2020 20:09:50] "[37mGET /_dash-component-suites/dash_core_components/async-graph.js.map HTTP/1.1[0m" 200 -
127.0.0.1 - - [11/May/2020 20:09:50] "[37mGET 