In [5]:
import pandas as pd
import requests
import json
import numpy as np
import geopandas as gpd

### Wskaźnik 1: lekarze (personel pracujący ogółem) na 10 tys. ludności

In [2]:
page_0 = 'https://bdl.stat.gov.pl/api/v1/data/by-variable/454185?unit-level=5&page-size=100&page=0'
page_1 = 'https://bdl.stat.gov.pl/api/v1/data/by-variable/454185?unit-level=5&page-size=100&page=1'
page_2 = 'https://bdl.stat.gov.pl/api/v1/data/by-variable/454185?unit-level=5&page-size=100&page=2'
page_3 = 'https://bdl.stat.gov.pl/api/v1/data/by-variable/454185?unit-level=5&page-size=100&page=3'

In [59]:
def get_df (page_url):
    response_API = requests.get(page_url)
    population_summary = json.loads(response_API.text)
    rows = []

    for data in population_summary['results']:
        for value in data['values']:
            row = {'id': data['id'],
                'name': data['name'],
                'year': value['year'],
                'value': value['val'],
                'powiat_numer': data['id'][2:4] + data['id'][7:9],
                'woj_numer': data['id'][2:4]
                }
            rows.append(row)
    df = pd.DataFrame(rows)
    return df

In [11]:
df_0 = get_df(page_0)
df_1 = get_df(page_1)
df_2 = get_df(page_2)
df_3 = get_df(page_3)

In [12]:
dfs = [df_0, df_1, df_2, df_3]

In [13]:
doctors_df = pd.concat(dfs, axis=0)

In [14]:
doctors_df

Unnamed: 0,id,name,year,value,powiat_numer
0,011212001000,Powiat bocheński,2010,26.7,1201
1,011212001000,Powiat bocheński,2011,28.8,1201
2,011212001000,Powiat bocheński,2013,33.7,1201
3,011212001000,Powiat bocheński,2014,36.3,1201
4,011212001000,Powiat bocheński,2015,37.6,1201
...,...,...,...,...,...
875,071427338000,Powiat żyrardowski,2017,31.3,1438
876,071427338000,Powiat żyrardowski,2018,29.0,1438
877,071427338000,Powiat żyrardowski,2019,27.7,1438
878,071427338000,Powiat żyrardowski,2020,29.2,1438


In [15]:
len(doctors_df['powiat_numer'].unique())

380

In [17]:
doctors_df.to_csv('../../population_data/other-measures/doctors_per_1000.csv', index=False)

### Wskaźnik 2: Łoźka w szpitalach ogólnych w powiatach

In [18]:
page_0 = 'https://bdl.stat.gov.pl/api/v1/data/by-variable/152354?unit-level=5&page-size=100&page=0'
page_1 = 'https://bdl.stat.gov.pl/api/v1/data/by-variable/152354?unit-level=5&page-size=100&page=1'
page_2 = 'https://bdl.stat.gov.pl/api/v1/data/by-variable/152354?unit-level=5&page-size=100&page=2'
page_3 = 'https://bdl.stat.gov.pl/api/v1/data/by-variable/152354?unit-level=5&page-size=100&page=3'

In [19]:
df_0 = get_df(page_0)
df_1 = get_df(page_1)
df_2 = get_df(page_2)
df_3 = get_df(page_3)

In [20]:
dfs = [df_0, df_1, df_2, df_3]

In [21]:
bed_df = pd.concat(dfs, axis=0)

In [22]:
bed_df

Unnamed: 0,id,name,year,value,powiat_numer
0,011212001000,Powiat bocheński,2005,230,1201
1,011212001000,Powiat bocheński,2006,191,1201
2,011212001000,Powiat bocheński,2007,190,1201
3,011212001000,Powiat bocheński,2008,215,1201
4,011212001000,Powiat bocheński,2009,211,1201
...,...,...,...,...,...
1355,071427338000,Powiat żyrardowski,2017,297,1438
1356,071427338000,Powiat żyrardowski,2018,272,1438
1357,071427338000,Powiat żyrardowski,2019,247,1438
1358,071427338000,Powiat żyrardowski,2020,247,1438


In [23]:
bed_df.to_csv('../../population_data/other-measures/beds.csv', index=False)

### Wskaźnik 3: Liczba szpitali ogólnych w województwach

In [26]:
page_0 = 'https://bdl.stat.gov.pl/api/v1/data/by-variable/63127?unit-level=2&page-size=20'

In [27]:
hospitals_df = get_df(page_0)

In [28]:
hospitals_df

Unnamed: 0,id,name,year,value,woj_numer
0,011200000000,MAŁOPOLSKIE,2004,73,12
1,011200000000,MAŁOPOLSKIE,2005,75,12
2,011200000000,MAŁOPOLSKIE,2006,61,12
3,011200000000,MAŁOPOLSKIE,2007,63,12
4,011200000000,MAŁOPOLSKIE,2008,61,12
...,...,...,...,...,...
283,071400000000,MAZOWIECKIE,2017,118,14
284,071400000000,MAZOWIECKIE,2018,121,14
285,071400000000,MAZOWIECKIE,2019,112,14
286,071400000000,MAZOWIECKIE,2020,118,14


In [29]:
hospitals_df.to_csv('../../population_data/other-measures/hospitals.csv', index=False)

### Wskaźnik 4: Oczekiwane trwanie zycia w zdrowiu w momencie narodzin - Healthy Life Years 0 w województwach

In [30]:
page_0_males = 'https://bdl.stat.gov.pl/api/v1/data/by-variable/1608279?unit-level=2&page-size=16'

In [31]:
HLY_0_males = get_df(page_0_males)

In [32]:
HLY_0_males['gender'] = 'MALE'

In [67]:
HLY_0_males.query("name == 'LUBELSKIE'")

Unnamed: 0,id,name,year,value,woj_numer,gender
156,60600000000,LUBELSKIE,2009,58.7,6,MALE
157,60600000000,LUBELSKIE,2010,58.2,6,MALE
158,60600000000,LUBELSKIE,2011,57.9,6,MALE
159,60600000000,LUBELSKIE,2012,57.7,6,MALE
160,60600000000,LUBELSKIE,2013,57.7,6,MALE
161,60600000000,LUBELSKIE,2014,57.6,6,MALE
162,60600000000,LUBELSKIE,2015,57.9,6,MALE
163,60600000000,LUBELSKIE,2016,58.5,6,MALE
164,60600000000,LUBELSKIE,2017,59.4,6,MALE
165,60600000000,LUBELSKIE,2018,59.5,6,MALE


In [34]:
page_0_females = 'https://bdl.stat.gov.pl/api/v1/data/by-variable/1608285?unit-level=2&page-size=16'

In [35]:
HLY_0_females = get_df(page_0_females)
HLY_0_females['gender'] = 'FEMALE'

In [68]:
HLY_0_females.query("name == 'LUBELSKIE'")

Unnamed: 0,id,name,year,value,woj_numer,gender
156,60600000000,LUBELSKIE,2009,63.2,6,FEMALE
157,60600000000,LUBELSKIE,2010,63.2,6,FEMALE
158,60600000000,LUBELSKIE,2011,62.8,6,FEMALE
159,60600000000,LUBELSKIE,2012,62.4,6,FEMALE
160,60600000000,LUBELSKIE,2013,61.7,6,FEMALE
161,60600000000,LUBELSKIE,2014,61.5,6,FEMALE
162,60600000000,LUBELSKIE,2015,61.3,6,FEMALE
163,60600000000,LUBELSKIE,2016,62.3,6,FEMALE
164,60600000000,LUBELSKIE,2017,62.6,6,FEMALE
165,60600000000,LUBELSKIE,2018,62.8,6,FEMALE


In [37]:
HLY_0 = pd.concat([HLY_0_males, HLY_0_females])

In [38]:
HLY_0

Unnamed: 0,id,name,year,value,woj_numer,gender
0,011200000000,MAŁOPOLSKIE,2009,56.4,12,MALE
1,011200000000,MAŁOPOLSKIE,2010,59.5,12,MALE
2,011200000000,MAŁOPOLSKIE,2011,59.4,12,MALE
3,011200000000,MAŁOPOLSKIE,2012,59.6,12,MALE
4,011200000000,MAŁOPOLSKIE,2013,60.1,12,MALE
...,...,...,...,...,...,...
203,071400000000,MAZOWIECKIE,2017,63.5,14,FEMALE
204,071400000000,MAZOWIECKIE,2018,63.2,14,FEMALE
205,071400000000,MAZOWIECKIE,2019,63.1,14,FEMALE
206,071400000000,MAZOWIECKIE,2020,62.7,14,FEMALE


In [39]:
len(HLY_0['id'].unique())

16

In [40]:
HLY_0.to_csv('../../population_data/other-measures/HLY.csv', index=False)

In [71]:
HLY_0.query("year == '2019'").sort_values(by=['value'])

Unnamed: 0,id,name,year,value,woj_numer,gender
140,51000000000,ŁÓDZKIE,2019,58.7,10,MALE
23,12400000000,ŚLĄSKIE,2019,59.3,24,MALE
75,30200000000,DOLNOŚLĄSKIE,2019,59.3,2,MALE
127,42800000000,WARMIŃSKO-MAZURSKIE,2019,59.3,28,MALE
166,60600000000,LUBELSKIE,2019,59.4,6,MALE
153,52600000000,ŚWIĘTOKRZYSKIE,2019,59.4,26,MALE
205,71400000000,MAZOWIECKIE,2019,59.6,14,MALE
192,62000000000,PODLASKIE,2019,59.6,20,MALE
101,40400000000,KUJAWSKO-POMORSKIE,2019,59.8,4,MALE
88,31600000000,OPOLSKIE,2019,59.9,16,MALE


### Wskaznik 5: Przeciętne trwanie zycia e_0. 

Podzielenie HLY_0/e_0 daje procent zycia w zdrowiu - ciekawy wskaznik

In [41]:
page_0_males = 'https://bdl.stat.gov.pl/api/v1/data/by-variable/101554?unit-level=2&page-size=16'

In [42]:
E_0_males = get_df(page_0_males)
E_0_males['gender'] = 'MALE'

In [43]:
E_0_males

Unnamed: 0,id,name,year,value,woj_numer,gender
0,011200000000,MAŁOPOLSKIE,1995,69.2,12,MALE
1,011200000000,MAŁOPOLSKIE,1996,69.4,12,MALE
2,011200000000,MAŁOPOLSKIE,1997,69.7,12,MALE
3,011200000000,MAŁOPOLSKIE,1998,70.6,12,MALE
4,011200000000,MAŁOPOLSKIE,1999,70.5,12,MALE
...,...,...,...,...,...,...
427,071400000000,MAZOWIECKIE,2017,74.0,14,MALE
428,071400000000,MAZOWIECKIE,2018,74.0,14,MALE
429,071400000000,MAZOWIECKIE,2019,74.3,14,MALE
430,071400000000,MAZOWIECKIE,2020,72.8,14,MALE


In [65]:
E_0_males.query('name == "LUBELSKIE"')

Unnamed: 0,id,name,year,value,woj_numer,gender
324,60600000000,LUBELSKIE,1995,67.5,6,MALE
325,60600000000,LUBELSKIE,1996,68.0,6,MALE
326,60600000000,LUBELSKIE,1997,68.0,6,MALE
327,60600000000,LUBELSKIE,1998,68.3,6,MALE
328,60600000000,LUBELSKIE,1999,68.6,6,MALE
329,60600000000,LUBELSKIE,2000,69.1,6,MALE
330,60600000000,LUBELSKIE,2001,69.6,6,MALE
331,60600000000,LUBELSKIE,2002,69.6,6,MALE
332,60600000000,LUBELSKIE,2003,69.9,6,MALE
333,60600000000,LUBELSKIE,2004,70.0,6,MALE


In [45]:
page_0_females = 'https://bdl.stat.gov.pl/api/v1/data/by-variable/101555?unit-level=2&page-size=16'

In [46]:
E_0_females = get_df(page_0_females)
E_0_females['gender'] = 'FEMALE'

In [66]:
E_0_females.query('name == "LUBELSKIE"')

Unnamed: 0,id,name,year,value,woj_numer,gender
324,60600000000,LUBELSKIE,1995,77.2,6,FEMALE
325,60600000000,LUBELSKIE,1996,77.2,6,FEMALE
326,60600000000,LUBELSKIE,1997,77.4,6,FEMALE
327,60600000000,LUBELSKIE,1998,77.7,6,FEMALE
328,60600000000,LUBELSKIE,1999,78.2,6,FEMALE
329,60600000000,LUBELSKIE,2000,78.5,6,FEMALE
330,60600000000,LUBELSKIE,2001,78.8,6,FEMALE
331,60600000000,LUBELSKIE,2002,79.3,6,FEMALE
332,60600000000,LUBELSKIE,2003,79.4,6,FEMALE
333,60600000000,LUBELSKIE,2004,79.5,6,FEMALE


In [48]:
E_0_df = pd.concat([E_0_males, E_0_females])

In [49]:
E_0_df

Unnamed: 0,id,name,year,value,woj_numer,gender
0,011200000000,MAŁOPOLSKIE,1995,69.2,12,MALE
1,011200000000,MAŁOPOLSKIE,1996,69.4,12,MALE
2,011200000000,MAŁOPOLSKIE,1997,69.7,12,MALE
3,011200000000,MAŁOPOLSKIE,1998,70.6,12,MALE
4,011200000000,MAŁOPOLSKIE,1999,70.5,12,MALE
...,...,...,...,...,...,...
427,071400000000,MAZOWIECKIE,2017,82.1,14,FEMALE
428,071400000000,MAZOWIECKIE,2018,82.0,14,FEMALE
429,071400000000,MAZOWIECKIE,2019,82.1,14,FEMALE
430,071400000000,MAZOWIECKIE,2020,80.9,14,FEMALE


In [50]:
E_0_df.to_csv('../../population_data/other-measures/E0.csv', index=False)

In [73]:
E_0_df.query("year == '2019'").sort_values(by=['value'])

Unnamed: 0,id,name,year,value,woj_numer,gender
294,51000000000,ŁÓDZKIE,2019,72.5,10,MALE
78,20800000000,LUBUSKIE,2019,72.9,8,MALE
267,42800000000,WARMIŃSKO-MAZURSKIE,2019,73.0,28,MALE
159,30200000000,DOLNOŚLĄSKIE,2019,73.5,2,MALE
132,23200000000,ZACHODNIOPOMORSKIE,2019,73.6,32,MALE
213,40400000000,KUJAWSKO-POMORSKIE,2019,73.7,4,MALE
51,12400000000,ŚLĄSKIE,2019,73.8,24,MALE
321,52600000000,ŚWIĘTOKRZYSKIE,2019,73.8,26,MALE
348,60600000000,LUBELSKIE,2019,73.9,6,MALE
429,71400000000,MAZOWIECKIE,2019,74.3,14,MALE


### Wskaźnik 6 : procent zycia w zdrowiu

In [51]:
health_precentage_df = pd.merge(HLY_0, E_0_df, on=['id', 'year', 'gender'], how='left')

In [52]:
health_precentage_df

Unnamed: 0,id,name_x,year,value_x,woj_numer_x,gender,name_y,value_y,woj_numer_y
0,011200000000,MAŁOPOLSKIE,2009,56.4,12,MALE,MAŁOPOLSKIE,73.1,12
1,011200000000,MAŁOPOLSKIE,2010,59.5,12,MALE,MAŁOPOLSKIE,73.7,12
2,011200000000,MAŁOPOLSKIE,2011,59.4,12,MALE,MAŁOPOLSKIE,73.9,12
3,011200000000,MAŁOPOLSKIE,2012,59.6,12,MALE,MAŁOPOLSKIE,74.0,12
4,011200000000,MAŁOPOLSKIE,2013,60.1,12,MALE,MAŁOPOLSKIE,74.8,12
...,...,...,...,...,...,...,...,...,...
411,071400000000,MAZOWIECKIE,2017,63.5,14,FEMALE,MAZOWIECKIE,82.1,14
412,071400000000,MAZOWIECKIE,2018,63.2,14,FEMALE,MAZOWIECKIE,82.0,14
413,071400000000,MAZOWIECKIE,2019,63.1,14,FEMALE,MAZOWIECKIE,82.1,14
414,071400000000,MAZOWIECKIE,2020,62.7,14,FEMALE,MAZOWIECKIE,80.9,14


In [53]:
health_percentage_copy = health_precentage_df.copy()

In [54]:
health_percentage_copy = health_percentage_copy.rename(columns={'value_x':'HLY', 'value_y' : 'E'})

In [55]:
health_percentage_copy['health_percentage'] = health_percentage_copy['HLY']/health_percentage_copy['E'] * 100

In [56]:
health_percentage_copy

Unnamed: 0,id,name_x,year,HLY,woj_numer_x,gender,name_y,E,woj_numer_y,health_percentage
0,011200000000,MAŁOPOLSKIE,2009,56.4,12,MALE,MAŁOPOLSKIE,73.1,12,77.154583
1,011200000000,MAŁOPOLSKIE,2010,59.5,12,MALE,MAŁOPOLSKIE,73.7,12,80.732700
2,011200000000,MAŁOPOLSKIE,2011,59.4,12,MALE,MAŁOPOLSKIE,73.9,12,80.378890
3,011200000000,MAŁOPOLSKIE,2012,59.6,12,MALE,MAŁOPOLSKIE,74.0,12,80.540541
4,011200000000,MAŁOPOLSKIE,2013,60.1,12,MALE,MAŁOPOLSKIE,74.8,12,80.347594
...,...,...,...,...,...,...,...,...,...,...
411,071400000000,MAZOWIECKIE,2017,63.5,14,FEMALE,MAZOWIECKIE,82.1,14,77.344702
412,071400000000,MAZOWIECKIE,2018,63.2,14,FEMALE,MAZOWIECKIE,82.0,14,77.073171
413,071400000000,MAZOWIECKIE,2019,63.1,14,FEMALE,MAZOWIECKIE,82.1,14,76.857491
414,071400000000,MAZOWIECKIE,2020,62.7,14,FEMALE,MAZOWIECKIE,80.9,14,77.503090


In [57]:
health_percentage_copy.to_csv('../../population_data/other-measures/health_percentage.csv', index=False)

### Mediana zycia mieszkanca

In [58]:
page_0 = 'https://bdl.stat.gov.pl/api/v1/data/by-variable/746289?unit-level=5&page-size=100&page=0'
page_1 = 'https://bdl.stat.gov.pl/api/v1/data/by-variable/746289?unit-level=5&page-size=100&page=1'
page_2 = 'https://bdl.stat.gov.pl/api/v1/data/by-variable/746289?unit-level=5&page-size=100&page=2'
page_3 = 'https://bdl.stat.gov.pl/api/v1/data/by-variable/746289?unit-level=5&page-size=100&page=3'

In [60]:
df_0 = get_df(page_0)
df_1 = get_df(page_1)
df_2 = get_df(page_2)
df_3 = get_df(page_3)

In [61]:
dfs = [df_0, df_1, df_2, df_3]

In [62]:
age_df = pd.concat(dfs, axis=0)

In [63]:
age_df

Unnamed: 0,id,name,year,value,powiat_numer,woj_numer
0,011212001000,Powiat bocheński,2018,37.9,1201,12
1,011212001000,Powiat bocheński,2019,38.3,1201,12
2,011212001000,Powiat bocheński,2020,38.9,1201,12
3,011212001000,Powiat bocheński,2021,39.4,1201,12
4,011212001000,Powiat bocheński,2022,39.8,1201,12
...,...,...,...,...,...,...
395,071427338000,Powiat żyrardowski,2018,41.0,1438,14
396,071427338000,Powiat żyrardowski,2019,41.4,1438,14
397,071427338000,Powiat żyrardowski,2020,41.9,1438,14
398,071427338000,Powiat żyrardowski,2021,42.3,1438,14


In [64]:
age_df.to_csv('../../population_data/other-measures/age_median.csv', index=False)