In [7]:
import pandas as pd
import numpy as np

In [2]:
# dictionaries to hold yearly data
dfs_raw = {}
dfs_clean = {}

Source of data: [Slovenská správa ciest](https://www.cdb.sk/sk/statisticke-vystupy.alej)

In [48]:
dfs_raw[2021] = pd.read_csv('./Data_raw/sr_co_most_dc_2021-01-01.csv',
                            sep=';',
                            encoding='cp1250')

dfs_raw[2022] = pd.read_csv('./Data_raw/sr_co_mosty-dc_zoznam_2022-01-01.csv',
                            sep=';',
                            encoding='cp1250')

### Cleaning dataframes

In [57]:
# 'global' variables

dict_stav_slovom_na_kod = {
    'Bezchybný':1,
    "Veľmi dobrý":2,
    "Dobrý":3,
    "Uspokojivý":4,
    "Zlý":5,
    "Veľmi zlý":6,
    "Havarijný":7,
    "Neznámy":-1
}

#### 2021

In [14]:
df = dfs_raw[2021][
        ['Trieda cesty', 'Číslo cesty', 'Správcovské číslo mostu',
         'Identifikačné číslo mostu', 'rok postavenia', 
         'stavebný stav - kód', 'Stavebný stav', 
         'LongitudeE', 'LatitudeN']
    ].copy()

# rename columns
df.columns = ['ck_trieda', 'ck_cislo', 'spravcovske_cislo',
              'ID_most', 'rok_postavenia', 
              'stav_kod', 'stav_slovom', 
              'lon', 'lat']

# treat unknown values
df['stav_kod'] = df['stav_kod'].replace(np.nan, -1)
df['stav_kod'] = df['stav_kod'].astype(int)
df = df.sort_values(by='stav_kod', ascending=True)
df['stav_kod'] = df['stav_kod'].astype(str)

df['stav_slovom'] =  df['stav_slovom'].replace(np.nan, 'Neznámy')

# clean road category values
df['ck_trieda'] = df['ck_trieda'].replace(
        {
            'diaľnica':'Diaľnica',
            'privádzač diaľničný':'Diaľnica',
            'cesta I. triedy':'Cesta I. triedy',
            'cesta II. triedy':'Cesta II. triedy',
            'II. trieda - miestna zberná (MZ)':'Cesta II. triedy',
            'cesta III. triedy':'Cesta III. triedy',
            'III. trieda - miestna obslužná (MO)':'Cesta III. triedy',
            'miestna neurčená':'Miestna cesta'
        }
    )

df['year_data'] = 2021

# save to clean dict
dfs_clean[2021] = df.copy()
del df

#### 2022

In [59]:
dfs_raw[2022]['Most /DC'].unique()

df = dfs_raw[2022][
        ['Trieda PK', 'Číslo PK', 'správcovské číslo',
         'ID mosta', 'ID DC', 'Rok postavenia', 
         'Stavebno-technický stav', 
         'Longitude', 'Latitude', 'Most /DC']
    ]

df.columns = ['ck_trieda', 'ck_cislo', 'spravcovske_cislo',
              'ID_most', 'ID_dc', 'rok_postavenia', 
              'stav_slovom', 
              'lon', 'lat', 'typ_udaju']
    

# split into bridges and bridge units
df_m = df[df['typ_udaju']=='Most'].copy()
df_dc = df[df['typ_udaju']=='Dilatačný celok'].copy()

# assign bridge ID to brige unit
df_dc['ID_most'] = df_dc['ID_dc'].apply(lambda x: x.split('.')[0])

# merge data
df = pd.merge(
        df_m[['ck_trieda', 'ck_cislo', 'spravcovske_cislo', 'rok_postavenia', 'ID_most']],
        df_dc[['ID_most', 'stav_slovom', 'lon', 'lat']].drop_duplicates(),
        how='left',
        on='ID_most'
    )

# treat unknown
df['stav_slovom'] = df['stav_slovom'].replace(np.nan, 'Neznámy')

# clean road category values
df['ck_trieda'] = df['ck_trieda'].replace(
        {
            'diaľnica':'Diaľnica',
            'cesta I. triedy':'Cesta I. triedy',
            'cesta II. triedy':'Cesta II. triedy',
            'II. trieda - miestna zberná (MZ)':'Cesta II. triedy',
            'cesta III. triedy':'Cesta III. triedy',
            'III. trieda - miestna obslužná (MO)':'Cesta III. triedy',
            'miestna neurčená':'Miestna cesta',
            'účelová cesta (UK)':'Miestna cesta'
        }
    )

# year to int
df['rok_postavenia'] = df['rok_postavenia'].astype(int)

# technical condition code
df['stav_kod'] = df['stav_slovom'].apply(lambda x: dict_stav_slovom_na_kod[x])

df['year_data'] = 2022

# save to clean dict
dfs_clean[2022] = df.copy()
del df

In [56]:
dfs_clean[2021]

Unnamed: 0,ck_trieda,ck_cislo,spravcovske_cislo,ID_most,rok_postavenia,stav_kod,stav_slovom,lon,lat,year_data
7555,Diaľnica,D00001,224,M9754,2014,-1,Neznámy,18.979470,49.107550,2021
8107,Diaľnica,R00002,119,M9605,2015,-1,Neznámy,19.400380,48.537860,2021
8108,Diaľnica,R00002,122,M9603,2015,-1,Neznámy,19.401590,48.540080,2021
7425,Diaľnica,D00001,126,M382,2002,-1,Neznámy,17.991579,48.904434,2021
7478,Diaľnica,D00001,SO 201-20,M9932,2021,-1,Neznámy,18.627110,49.226470,2021
...,...,...,...,...,...,...,...,...,...,...
6500,Cesta III. triedy,3354,4,M4959,1964,7,Havarijný,21.184798,48.858030,2021
238,Cesta I. triedy,15,13,M6548,1960,7,Havarijný,21.661445,49.161305,2021
1668,Cesta I. triedy,74,43,M7607,1959,7,Havarijný,22.371678,48.915007,2021
139,Cesta I. triedy,10,238,M1576,1928,7,Havarijný,18.433049,49.379726,2021


In [60]:
dfs_clean[2022]

Unnamed: 0,ck_trieda,ck_cislo,spravcovske_cislo,rok_postavenia,ID_most,stav_slovom,lon,lat,stav_kod,year_data
0,Cesta I. triedy,10,235,1937,M262,Uspokojivý,18.409047,49.395757,4,2022
1,Cesta I. triedy,10,236,1920,M1915,Zlý,18.425421,49.379733,5,2022
2,Cesta I. triedy,10,237,1925,M4803,Veľmi dobrý,18.431719,49.380404,2,2022
3,Cesta I. triedy,10,238,1928,M1576,Havarijný,18.433049,49.379726,7,2022
4,Cesta I. triedy,10,239,1937,M2309,Dobrý,18.475029,49.369646,3,2022
...,...,...,...,...,...,...,...,...,...,...
9224,Miestna cesta,UK,4,2006,M83,Bezchybný,18.827060,49.208220,1,2022
9225,Miestna cesta,UK,3,2006,M6982,Bezchybný,18.825870,49.208570,1,2022
9226,Miestna cesta,UK,5,2006,M6329,Bezchybný,18.832590,49.206580,1,2022
9227,Miestna cesta,UK,3,2006,M6982,Bezchybný,18.825870,49.208570,1,2022


### Join dataframes


dict_values([              ck_trieda ck_cislo spravcovske_cislo ID_most  rok_postavenia  \
7555           Diaľnica   D00001               224   M9754            2014   
8107           Diaľnica   R00002               119   M9605            2015   
8108           Diaľnica   R00002               122   M9603            2015   
7425           Diaľnica   D00001               126    M382            2002   
7478           Diaľnica   D00001         SO 201-20   M9932            2021   
...                 ...      ...               ...     ...             ...   
6500  Cesta III. triedy     3354                 4   M4959            1964   
238     Cesta I. triedy       15                13   M6548            1960   
1668    Cesta I. triedy       74                43   M7607            1959   
139     Cesta I. triedy       10               238   M1576            1928   
5092  Cesta III. triedy     2300                 1   M3200            1945   

     stav_kod stav_slovom        lon        lat  y

In [63]:
df_all = pd.concat(dfs_clean.values(), ignore_index=True)

In [64]:
df_all.to_csv('../Data/sr')

Unnamed: 0,ck_trieda,ck_cislo,spravcovske_cislo,ID_most,rok_postavenia,stav_kod,stav_slovom,lon,lat,year_data
0,Diaľnica,D00001,224,M9754,2014,-1,Neznámy,18.979470,49.107550,2021
1,Diaľnica,R00002,119,M9605,2015,-1,Neznámy,19.400380,48.537860,2021
2,Diaľnica,R00002,122,M9603,2015,-1,Neznámy,19.401590,48.540080,2021
3,Diaľnica,D00001,126,M382,2002,-1,Neznámy,17.991579,48.904434,2021
4,Diaľnica,D00001,SO 201-20,M9932,2021,-1,Neznámy,18.627110,49.226470,2021
...,...,...,...,...,...,...,...,...,...,...
17448,Miestna cesta,UK,4,M83,2006,1,Bezchybný,18.827060,49.208220,2022
17449,Miestna cesta,UK,3,M6982,2006,1,Bezchybný,18.825870,49.208570,2022
17450,Miestna cesta,UK,5,M6329,2006,1,Bezchybný,18.832590,49.206580,2022
17451,Miestna cesta,UK,3,M6982,2006,1,Bezchybný,18.825870,49.208570,2022
