In [1]:
import tempfile

import pandas as pd
import geopandas as gpd
import pyogrio
import httpx

In [3]:
vek_pohlavi = pd.read_csv("https://csu.gov.cz/docs/107508/36f9eebd-00ff-5fb3-3ccf-de910ab1ef22/sldb2021_vek10_pohlavi.csv?version=1.0")

In [9]:
vek_pohlavi.columns

Index(['idhod', 'hodnota', 'ukaz_kod', 'vek_cis', 'vek_kod', 'pohlavi_cis',
       'pohlavi_kod', 'uzemi_cis', 'uzemi_kod', 'sldb_rok', 'sldb_datum',
       'ukaz_txt', 'vek_txt', 'pohlavi_txt', 'uzemi_txt'],
      dtype='object')

In [8]:
vek_pohlavi_obec = vek_pohlavi[vek_pohlavi.uzemi_cis == 43]

In [12]:
vals = vek_pohlavi_obec.set_index(["uzemi_kod", 'pohlavi_txt', "vek_txt"])['hodnota']

In [16]:
age_group_population = vals.groupby(level=[0, 2]).sum()
age_group_population

uzemi_kod  vek_txt       
500011     0 - 9 let         324
           10 - 19 let       312
           100 a více let      0
           20 - 29 let       306
           30 - 39 let       484
                            ... 
599999     50 - 59 let       666
           60 - 69 let       628
           70 - 79 let       482
           80 - 89 let       164
           90 - 99 let        14
Name: hodnota, Length: 68794, dtype: int64

In [20]:
age_group_population = age_group_population.reset_index(level=1)

In [22]:
mean_age = {
    '0 - 9 let': 5, '10 - 19 let': 15, '100 a více let': 100, '20 - 29 let': 25,
       '30 - 39 let': 35, '40 - 49 let': 45, '50 - 59 let': 55, '60 - 69 let': 65,
       '70 - 79 let': 75, '80 - 89 let': 85, '90 - 99 let': 95
}

In [23]:
age_group_population['mean_age'] = age_group_population['vek_txt'].map(mean_age)

In [27]:
age_group_population['weighted_age'] = age_group_population['hodnota'] * age_group_population['mean_age']
mean_age_obec = age_group_population.groupby(level=0).apply(lambda group: group['weighted_age'].sum() / group['hodnota'].sum())

In [28]:
mean_age_obec

uzemi_kod
500011    45.528343
500020    44.782427
500046    42.127722
500062    42.579208
500071    43.869464
            ...    
599930    41.399055
599948    43.611194
599956    40.730833
599964    41.216216
599999    41.640564
Length: 6254, dtype: float64

In [29]:
family_url  = "https://csu.gov.cz/docs/107508/b9f83996-cebb-c134-114e-54d8462e2976/sldb2021_stav.csv?version=1.0"

In [30]:
family = pd.read_csv(family_url)

In [42]:
family = family[family.uzemi_cis == 43]

In [43]:
stav = family.set_index(['uzemi_kod', 'stav_txt'])['hodnota']

In [44]:
total_pop = stav.groupby(level=0).sum()

In [45]:
stav

uzemi_kod  stav_txt                                                  
500011     NaN                                                           1817
           Svobodný/svobodná                                              671
           Ženatý/vdaná                                                   823
           Rozvedený/rozvedená                                            171
           Vdovec/vdova                                                   142
                                                                         ... 
599999     Vdovec/vdova                                                   163
           Registrované partnerství trvající                                2
           Registrované partnerství zaniklé rozhodnutím soudu               0
           Registrované partnerství zaniklé úmrtím partnera/partnerky       0
           Nezjištěno                                                       8
Name: hodnota, Length: 56286, dtype: int64

In [46]:
divorces = stav[:, "Rozvedený/rozvedená"] / total_pop

In [48]:
divorces.name = "divorced"

In [49]:
mean_age_obec.name = "mean_age"

In [None]:
pd.concat([divorces, mean_age_obec], axis=1).to_csv("../../tree_regression/data/cz_age_divorces_2021.csv")