In [1]:
from bokeh.plotting import figure, show

In [2]:
from bokeh.io import output_notebook
output_notebook()

In [3]:
import requests
from pyjstat import pyjstat
from collections import OrderedDict
import json
import pandas as pd
from bokeh.models import ColumnDataSource

In [4]:
GINI_URL = 'http://data.ssb.no/api/v0/no/table/09114'

In [5]:
GINI_PAYLOAD = {
  "query": [
    {
      "code": "Region",
      "selection": {
        "filter": "all",
        "values": [
          "*"
        ]
      }
    },
    {
      "code": "ContentsCode",
      "selection": {
        "filter": "item",
        "values": [
          "Ginikoeffisient"
        ]
      }
    },
    {
      "code": "Tid",
      "selection": {
        "filter": "top",
        "values": [
            1
        ]
      }
    }
  ],
  "response": {
    "format": "json-stat"
  }
}


In [6]:
ginidata = requests.post(GINI_URL, json=GINI_PAYLOAD)

In [7]:
ginidf = pyjstat.from_json_stat(ginidata.json(object_pairs_hook=OrderedDict), naming='id')[0]

In [8]:
POP_URL = 'http://data.ssb.no/api/v0/no/table/01222'
POP_PAYLOAD = {
  "query": [
    {
      "code": "Region",
      "selection": {
        "filter": "all",
        "values": [
          "*"
        ]
      }
    },
    {
      "code": "ContentsCode",
      "selection": {
        "filter": "item",
        "values": [
          "Folketallet1"
        ]
      }
    },
    {
      "code": "Tid",
      "selection": {
        "filter": "top",
        "values": [
            1
        ]
      }
    }
  ],
  "response": {
    "format": "json-stat"
  }
}


In [9]:
popdata = requests.post(POP_URL, json=POP_PAYLOAD)

In [10]:
popdf = pyjstat.from_json_stat(popdata.json(object_pairs_hook=OrderedDict), naming='id')[0]

In [11]:
popdf.drop(["ContentsCode", 'Tid'], axis=1, inplace=True)
ginidf.drop(["ContentsCode", "Tid"], axis=1, inplace=True)

In [12]:
popdf.rename(columns={'value': 'folketall'}, inplace=True)
ginidf.rename(columns={'value':'gini'}, inplace=True)

In [13]:
ad2 = pd.merge(popdf, ginidf, on='Region')

In [14]:
source = ColumnDataSource(data=ad2)
p = figure(x_axis_type='log', 
           x_axis_label="Municipality size", 
           y_axis_label="Gini coefficient", 
           title="Municipality size and income inequality in Norway")
p.circle(x='folketall', y='gini', source=source)
show(p)

In [15]:
D_URL = 'http://data.ssb.no/api/v0/no/table/05212'
D_PAYLOAD1 = {
  "query": [
    {
      "code": "Region",
      "selection": {
        "filter": "all",
        "values": [
          "*"
        ]
      }
    },
    {
      "code": "TettSpredt",
      "selection": {
        "filter": "item",
        "values": [
          "10"
        ]
      }
    },
    {
      "code": "ContentsCode",
      "selection": {
        "filter": "item",
        "values": [
          "Folkemengde"
        ]
      }
    },
    {
      "code": "Tid",
      "selection": {
        "filter": "top",
        "values": [
            1
        ]
      }
    }
  ],
  "response": {
    "format": "json-stat"
  }
}


D_PAYLOAD2 = {
  "query": [
    {
      "code": "Region",
      "selection": {
        "filter": "all",
        "values": [
          "*"
        ]
      }
    },
    {
      "code": "ContentsCode",
      "selection": {
        "filter": "item",
        "values": [
          "Folkemengde"
        ]
      }
    },
    {
      "code": "Tid",
      "selection": {
        "filter": "top",
        "values": [
            1
        ]
      }
    }
  ],
  "response": {
    "format": "json-stat"
  }
}


In [16]:
densedata = requests.post(D_URL, json=D_PAYLOAD1)

In [17]:
totaldata = requests.post(D_URL, json=D_PAYLOAD2)

In [18]:
densedf = pyjstat.from_json_stat(densedata.json(object_pairs_hook=OrderedDict), naming='id')[0]
totaldf = pyjstat.from_json_stat(totaldata.json(object_pairs_hook=OrderedDict), naming='id')[0]

In [19]:
totaldf.head()

Unnamed: 0,Region,ContentsCode,Tid,value
0,0,Folkemengde,2016,5213985
1,1,Folkemengde,2016,289867
2,101,Folkemengde,2016,30544
3,102,Folkemengde,2016,0
4,103,Folkemengde,2016,0


In [20]:
densedf.drop(["ContentsCode", 'Tid', 'TettSpredt'], axis=1, inplace=True)
totaldf.drop(["ContentsCode", "Tid"], axis=1, inplace=True)
densedf.rename(columns={'value': 'tettbygd'}, inplace=True)
totaldf.rename(columns={'value':'totalt'}, inplace=True)

In [21]:
denseshare_df = pd.merge(densedf, totaldf, on='Region')
denseshare_df['denseshare'] = denseshare_df['tettbygd']/denseshare_df['totalt']

In [22]:
denseshare_df.head()

Unnamed: 0,Region,tettbygd,totalt,denseshare
0,0,4229849,5213985,0.811251
1,1,247968,289867,0.855454
2,101,26491,30544,0.867306
3,102,0,0,
4,103,0,0,


In [23]:
dense_gini_df = pd.merge(denseshare_df, ginidf, on='Region')

In [24]:
dense_gini_df.head()

Unnamed: 0,Region,tettbygd,totalt,denseshare,gini
0,0,4229849,5213985,0.811251,0.263
1,1,247968,289867,0.855454,0.239
2,101,26491,30544,0.867306,0.232
3,104,31634,32182,0.982972,0.253
4,105,49584,54678,0.906836,0.231


In [26]:
source = ColumnDataSource(data=dense_gini_df)
p = figure( 
           x_axis_label="Share of population living in dense areas", 
           y_axis_label="Gini coefficient", 
           title="Population density and income inequality in Norway")
p.circle(x='denseshare', y='gini', source=source)
show(p)