In [25]:
import pandas as pd
import numpy as np


pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)

## Loading main dataset of primary metrics

In [26]:
df = pd.read_csv('../../DATA/MODEL/primary_features_df.csv')

priamry_processed_values = ['CA','CASH','CL','EQ','NCL','TA','TL','CC','INV','CF_NETTO','CF_SELFFIN','EAT','EBIT','SAL','COST','INT','YIE']

df = df[['financial_report_id', 'financial_report_template_id'] + priamry_processed_values]

display(df.iloc[:, 2:].describe().T)

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
CA,1763209.0,324555.334765,5085188.0,-14784391.0,7136.0,24607.0,96218.0,3288340000.0
CASH,1763209.0,72274.69928,2585032.0,-14841975.0,3521.0,9068.0,30022.0,2654427000.0
CL,1763209.0,241501.590665,2632965.0,-6092623.0,1621.0,14372.0,80965.0,1076414000.0
EQ,1763209.0,239314.768814,3998296.0,-435636281.0,2309.0,9493.0,42976.0,655240700.0
NCL,1763209.0,284897.494519,75160930.0,-16335478.0,0.0,211.0,13968.0,99599410000.0
TA,1763209.0,793599.922333,77061350.0,-5099469.0,10080.0,40150.0,176720.0,99599560000.0
TL,1763209.0,526399.085185,75234970.0,-3861110.0,2384.0,22308.0,121890.0,99599540000.0
CC,1763209.0,182021.779505,3389468.0,-9825583.0,109.0,5072.0,33283.0,1400580000.0
INV,1763209.0,70258.855979,837520.5,-5750264.0,0.0,0.0,1790.0,221500000.0
CF_NETTO,1763209.0,52056.263993,866239.4,-185606218.0,-116.0,2270.0,16529.0,299504100.0


## Computing secondary metrics - financial ratios

In [27]:
def get_ratios(row):
    row['L3'] = ( row['CA'] ) / ( row['CL'] ) if row['CL'] > 0 else None
    row['L2'] = ( row['CASH']+row['CC'] ) / ( row['CL'] ) if row['CL'] > 0 else None
    row['L1'] = ( row['CASH'] ) / ( row['CL'] ) if row['CL'] > 0 else None
    row['CF_CL'] = ( row['CF_NETTO'] ) / ( row['CL'] ) if row['CL'] > 0 else None
    row['CASH_TA'] = ( row['CASH'] ) / ( row['TA'] ) if row['TA'] > 0 else None
    row['SAL_TA'] = ( row['SAL'] ) / ( row['TA'] ) if row['TA'] > 0 else None
    row['TL_SAL'] = ( row['TL'] ) / ( row['SAL'] ) if row['SAL'] > 0 else None
    row['INV_COST'] = ( row['INV'] * 360 ) / ( row['COST'] ) if row['COST'] > 0 else None
    row['INV_SAL'] = ( row['INV'] * 360 ) / ( row['SAL'] ) if row['SAL'] > 0 else None
    row['CC_SAL'] = ( row['CC'] ) / ( row['SAL'] ) if row['SAL'] > 0 else None
    row['TA_SAL'] = ( row['TA'] * 360 ) / ( row['SAL'] ) if row['SAL'] > 0 else None
    row['TL_TA'] = ( row['TL'] ) / ( row['TA'] ) if row['TA'] > 0 else None
    row['CF_TL'] = ( row['CF_NETTO'] ) / ( row['TL'] ) if row['TL'] > 0 else None
    row['CL_TA'] = ( row['CL'] ) / ( row['TA'] ) if row['TA'] > 0 else None
    row['NCL_TA'] = ( row['NCL'] ) / ( row['TA'] ) if row['TA'] > 0 else None
    row['EQ_TL'] = ( row['EQ'] ) / ( row['TL'] ) if row['TL'] > 0 else None
    row['EQ_TA'] = ( row['EQ'] ) / ( row['TA'] ) if row['TA'] > 0 else None
    row['EBIT_INT'] = ( row['EBIT']+row['INT'] ) / ( row['INT'] ) if row['INT'] > 0 else None
    row['CL_CC'] = ( row['CL'] ) / ( row['CC'] ) if row['CC'] > 0 else None
    row['ROE'] = ( row['EAT'] ) / ( row['EQ'] ) if row['EQ'] > 0 else None
    row['EAT_TA'] = ( row['EAT'] ) / ( row['TA'] ) if row['TA'] > 0 else None
    row['ROA_BRUTTO'] = ( row['EBIT'] ) / ( row['TA'] ) if row['TA'] > 0 else None
    row['CF_TA'] = ( row['CF_NETTO'] ) / ( row['TA'] ) if row['TA'] > 0 else None
    row['CF_SAL'] = ( row['CF_NETTO'] ) / ( row['SAL'] ) if row['SAL'] > 0 else None
    row['ROS'] = ( row['EAT'] ) / ( row['SAL'] ) if row['SAL'] > 0 else None
    row['EAT_YIE'] = ( row['EAT'] ) / ( row['YIE'] ) if row['YIE'] > 0 else None
    row['ROI'] = ( row['EAT']+row['INT'] ) / ( row['TA'] ) if row['TA'] > 0 else None
    row['ROA_NETTO'] = ( row['EAT'] ) / ( row['TA'] ) if row['TA'] > 0 else None

    return row


records = []
k = 0
size = df.shape[0]

for row_dict in df.to_dict(orient="records"):
    # if k >= 100:
    #     break

    if (k + 1) % 50000 == 0:
        print(f'{k + 1} / {size}')

    k += 1

    records.append(get_ratios(row_dict))


df_extended = pd.DataFrame(records)

ratios = ['CF_NETTO', 'CF_SELFFIN', 'EAT', 'EBIT', 'SAL', 'COST', 'INT', 'YIE',
       'L3', 'L2', 'L1', 'CF_CL', 'CASH_TA', 'SAL_TA', 'TL_SAL', 'INV_COST',
       'INV_SAL', 'CC_SAL', 'TA_SAL', 'TL_TA', 'CF_TL', 'CL_TA', 'NCL_TA',
       'EQ_TL', 'EQ_TA', 'EBIT_INT', 'CL_CC', 'ROE', 'EAT_TA', 'ROA_BRUTTO',
       'CF_TA', 'CF_SAL', 'ROS', 'EAT_YIE', 'ROI', 'ROA_NETTO']


df_extended[ratios].describe().T

50000 / 1763209
100000 / 1763209
150000 / 1763209
200000 / 1763209
250000 / 1763209
300000 / 1763209
350000 / 1763209
400000 / 1763209
450000 / 1763209
500000 / 1763209
550000 / 1763209
600000 / 1763209
650000 / 1763209
700000 / 1763209
750000 / 1763209
800000 / 1763209
850000 / 1763209
900000 / 1763209
950000 / 1763209
1000000 / 1763209
1050000 / 1763209
1100000 / 1763209
1150000 / 1763209
1200000 / 1763209
1250000 / 1763209
1300000 / 1763209
1350000 / 1763209
1400000 / 1763209
1450000 / 1763209
1500000 / 1763209
1550000 / 1763209
1600000 / 1763209
1650000 / 1763209
1700000 / 1763209
1750000 / 1763209


Unnamed: 0,count,mean,std,min,25%,50%,75%,max
CF_NETTO,1763209.0,52056.263993,866239.4,-185606200.0,-116.0,2270.0,16529.0,299504100.0
CF_SELFFIN,1763209.0,45755.347963,847378.5,-185606200.0,-480.0,1780.0,14476.0,299501200.0
EAT,1763209.0,18692.317924,754162.2,-185606200.0,-1130.0,493.0,6913.0,299501200.0
EBIT,1763209.0,26329.236008,794822.1,-185606200.0,-780.0,888.0,8730.0,299504100.0
SAL,1763209.0,683802.999623,7299065.0,-30496930.0,4000.0,36797.0,160424.0,1708491000.0
COST,1763209.0,712780.732116,7804105.0,-75998730.0,5612.0,36862.0,164869.0,1645877000.0
INT,1763209.0,9067.491379,275814.0,-453189.0,0.0,0.0,343.0,87204480.0
YIE,1763209.0,739110.216345,7979590.0,-30496930.0,5010.0,39318.0,171748.0,1708499000.0
L3,1609284.0,36.243925,5865.823,-19398.07,0.77943,1.744192,5.782889,6632828.0
L2,1609284.0,34.187229,5814.287,-19398.07,0.540963,1.499037,5.348945,6632828.0


## Joining meta information (dimensions) about financial statements

In [28]:
statements_reports_table = pd.read_csv('../../DATA/TRANSFORM/financial_statements/financial_statements_reports_table.csv')
statements_table = pd.read_csv('../../DATA/TRANSFORM/financial_statements/financial_statements_table.csv')
entity_details_table = pd.read_csv('../../DATA/TRANSFORM/entity_details/financial_statements_register_entity_details.csv')

  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)


In [29]:
entity_details_table = entity_details_table[['id', 'establishment_date', 'sknace_code', 'entity_state']].rename(columns={'id': 'entity_id'})

statements_table = statements_table[['financial_statement_id', 'entity_id', 'entity_ico', 'year', 'month', 'period_length', 'consolidated', 'approved_date', 'type', 'entity_name']]

display(df_extended.shape[0])

df_statements_extended = (df_extended
                          .drop_duplicates('financial_report_id')
                          .merge(
                              statements_reports_table.drop_duplicates('financial_report_id'),
                              on = 'financial_report_id',
                              how = 'left')
                          .merge(
                              statements_table,
                              on = 'financial_statement_id',
                              how = 'left')
                          .merge(
                              entity_details_table.drop_duplicates('entity_id'),
                              on = 'entity_id',
                              how = 'left'))

df_statements_extended = df_statements_extended

display(df_statements_extended.shape[0])

statements_reports_table = None
statements_table = None
entity_details_table = None

1763209

1763149

In [33]:
df_statements_extended.query('period_length == 12', inplace=True)
df_statements_extended.query('SAL != 0', inplace=True)

In [34]:
display(df_statements_extended.shape)

df_statements_extended = (df_statements_extended
                          .query('2014 <= year <= 2021')
                          .sort_values(['approved_date', 'TA', 'SAL'], ascending=[False, False, False])
                          .groupby(['entity_ico', 'year']).first()
                          .reset_index())

df_statements_extended

display(df_statements_extended.shape)

(1384486, 61)

(1328531, 61)

In [35]:
df_statements_extended = df_statements_extended[
    ['financial_report_id', 'financial_statement_id',
     'financial_report_order_number', 'entity_id', 'approved_date',
     'entity_ico', 'year', 'month', 'financial_report_template_id',
     'entity_name', 'establishment_date', 'sknace_code', 'entity_state',
     
     
     'CA', 'CASH', 'CL', 'EQ', 'NCL', 'TA', 'TL',
     'CC', 'INV', 'CF_NETTO', 'CF_SELFFIN', 'EAT', 'EBIT', 'SAL', 'COST',
     'INT', 'YIE',
     
     'L3', 'L2', 'L1', 'CF_CL', 'CASH_TA', 'SAL_TA', 'TL_SAL',
     'INV_COST', 'INV_SAL', 'CC_SAL', 'TA_SAL', 'TL_TA', 'CF_TL', 'CL_TA',
     'NCL_TA', 'EQ_TL', 'EQ_TA', 'EBIT_INT', 'CL_CC', 'ROE', 'EAT_TA',
     'ROA_BRUTTO', 'CF_TA', 'CF_SAL', 'ROS', 'EAT_YIE', 'ROI', 'ROA_NETTO'
     ]
]

df_statements_extended

Unnamed: 0,financial_report_id,financial_statement_id,financial_report_order_number,entity_id,approved_date,entity_ico,year,month,financial_report_template_id,entity_name,establishment_date,sknace_code,entity_state,CA,CASH,CL,EQ,NCL,TA,TL,CC,INV,CF_NETTO,CF_SELFFIN,EAT,EBIT,SAL,COST,INT,YIE,L3,L2,L1,CF_CL,CASH_TA,SAL_TA,TL_SAL,INV_COST,INV_SAL,CC_SAL,TA_SAL,TL_TA,CF_TL,CL_TA,NCL_TA,EQ_TL,EQ_TA,EBIT_INT,CL_CC,ROE,EAT_TA,ROA_BRUTTO,CF_TA,CF_SAL,ROS,EAT_YIE,ROI,ROA_NETTO
0,4890214,2575566,2,25527,2015-07-31,671.0,2014,1,699.0,"Kerametal, akciová spoločnosť,",1970-02-02,46180.0,,5174843.0,1479.0,1366130.0,3807863.0,1255.0,5175248.0,1367385.0,5173364.0,0.0,-547786.0,-548266.0,-550029.0,-549549.0,6350.0,560563.0,0.0,11014.0,3.787958,3.787958,0.001083,-0.400976,0.000286,0.001227,215.336220,0.000000,0.00000,814.702992,293399.886614,0.264216,-0.400608,0.263974,0.000243,2.784778,0.735784,,0.264070,-0.144446,-0.106281,-0.106188,-0.105847,-86.265512,-86.618740,-49.939078,-0.106281,-0.106281
1,5392251,2976863,2,25527,2016-12-31,671.0,2015,1,699.0,"Kerametal, akciová spoločnosť, Bratislava",1970-02-02,46180.0,,5174245.0,1868.0,1693023.0,3480567.0,1255.0,5174845.0,1694278.0,5172377.0,0.0,-326822.0,-327302.0,-327302.0,-326822.0,3633.0,336975.0,0.0,10153.0,3.056217,3.056217,0.001103,-0.193040,0.000361,0.000702,466.357831,0.000000,0.00000,1423.720617,512783.980182,0.327407,-0.192898,0.327164,0.000243,2.054307,0.672593,,0.327320,-0.094037,-0.063249,-0.063156,-0.063156,-89.959262,-90.091385,-32.236974,-0.063249,-0.063249
2,5887438,3375702,2,25527,2017-12-27,671.0,2016,1,699.0,"Kerametal, akciová spoločnosť, Bratislava",1970-02-02,46180.0,,14679.0,1034.0,1697982.0,-1683081.0,0.0,14901.0,1697982.0,13645.0,0.0,-5163168.0,-5163648.0,-5163648.0,-5163168.0,1994.0,5166660.0,0.0,3492.0,0.008645,0.008645,0.000609,-3.040767,0.069391,0.133817,851.545637,0.000000,0.00000,6.843029,2690.250752,113.950876,-3.040767,113.950876,0.000000,-0.991224,-112.950876,,124.439868,,-346.530300,-346.498087,-346.498087,-2589.352056,-2589.592778,-1478.707904,-346.530300,-346.530300
3,6361460,3757740,1,25527,2018-12-17,671.0,2017,1,699.0,"Kerametal, akciová spoločnosť, Bratislava",1970-02-02,46180.0,,14501.0,942.0,1729112.0,-1714376.0,0.0,14736.0,1729112.0,13559.0,0.0,-30827.0,-31307.0,-31307.0,-30827.0,1993.0,33542.0,11844.0,2715.0,0.008386,0.008386,0.000545,-0.017828,0.063925,0.135247,867.592574,0.000000,0.00000,6.803312,2661.796287,117.339305,-0.017828,117.339305,0.000000,-0.991478,-116.339305,-1.602752,127.525039,,-2.124525,-2.091952,-2.091952,-15.467637,-15.708480,-11.531123,-1.320779,-2.124525
4,4593745,2340588,1,16410,2015-06-11,698.0,2014,1,699.0,"TECHNOPOL, a.s.",1970-02-18,70220.0,,1543495.0,315617.0,3796505.0,17186564.0,102542.0,21085611.0,3899047.0,1227526.0,352.0,-245840.0,-246827.0,-275229.0,-274242.0,282838.0,611268.0,51413.0,337026.0,0.406557,0.406464,0.083134,-0.064754,0.014968,0.013414,13.785443,0.207307,0.44803,4.340032,26838.048494,0.184915,-0.063051,0.180052,0.004863,4.407888,0.815085,-4.334098,3.092810,-0.016014,-0.013053,-0.013006,-0.011659,-0.869190,-0.973098,-0.816640,-0.010615,-0.013053
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1328526,7840332,4932355,1,2037303,2022-02-02,54313872.0,2021,1,687.0,Hanc Academy s. r. o.,2021-12-24,68200.0,,7050.0,7050.0,307.0,6743.0,0.0,7050.0,307.0,0.0,0.0,2050.0,1743.0,1743.0,2050.0,2050.0,0.0,0.0,2050.0,22.964169,22.964169,22.964169,6.677524,1.000000,0.290780,0.149756,,0.00000,0.000000,1238.048780,0.043546,6.677524,0.043546,0.000000,21.964169,0.956454,,,0.258490,0.247234,0.290780,0.290780,1.000000,0.850244,0.850244,0.247234,0.247234
1328527,8168668,5208864,1,2045087,2022-06-26,54318599.0,2021,1,687.0,SmartSolve - Quality Assurance s.r.o.,2021-12-28,82110.0,,5446.0,1946.0,67.0,5379.0,0.0,5446.0,67.0,3500.0,0.0,446.0,379.0,379.0,446.0,3500.0,3054.0,0.0,3500.0,81.283582,81.283582,29.044776,6.656716,0.357326,0.642674,0.019143,0.000000,0.00000,1.000000,560.160000,0.012303,6.656716,0.012303,0.000000,80.283582,0.987697,,0.019143,0.070459,0.069592,0.081895,0.081895,0.127429,0.108286,0.108286,0.069592,0.069592
1328528,4458450,2225050,1,289353,2015-02-27,60748940.0,2014,1,699.0,VALPEX s.r.o.,1991-07-17,46900.0,,69384.0,47818.0,5008.0,-5515.0,70391.0,69884.0,75399.0,21566.0,0.0,-758.0,-1718.0,-1718.0,-758.0,174779.0,179373.0,2.0,178615.0,13.854633,13.854633,9.548323,-0.151358,0.684248,2.500987,0.431396,0.000000,0.00000,0.123390,143.943151,1.078916,-0.010053,0.071662,1.007255,-0.073144,-0.078916,-378.000000,0.232217,,-0.024584,-0.010847,-0.010847,-0.004337,-0.009830,-0.009618,-0.024555,-0.024584
1328529,5030426,2678436,1,289353,2016-03-12,60748940.0,2015,1,699.0,VALPEX s.r.o.,1991-07-17,46900.0,,29950.0,25961.0,2010.0,-9614.0,37633.0,30029.0,39643.0,3989.0,0.0,-3139.0,-4099.0,-4099.0,-3139.0,120316.0,126841.0,3.0,123702.0,14.900498,14.900498,12.915920,-1.561692,0.864531,4.006660,0.329491,0.000000,0.00000,0.033154,89.850394,1.320157,-0.079182,0.066935,1.253222,-0.242514,-0.320157,-1045.333333,0.503886,,-0.136501,-0.104532,-0.104532,-0.026090,-0.034069,-0.033136,-0.136401,-0.136501


## Joining financial issues indicators

In [None]:
# pomer VI a zavazkov je mensi ako 8, t.j EQ_TL < 0.08
# 2016 : 0.04, 2017 : 0.06, 2018 : 0.08, 
# EQ < 0
# L3 < 1

In [36]:
def process_issues(row):
    EQ_TL = row.EQ_TL
    EQ = row.EQ
    L3 = row.L3
    y = row.year

    if y <= 2016: threat_criterion = 0.04
    elif y == 2017: threat_criterion = 0.06
    else: threat_criterion = 0.08

    if EQ_TL < threat_criterion or EQ < 0 or L3 < 1:
        return 1
    return 0
    

In [37]:
legal_financial_issues_criterion = []
for i, row in df_statements_extended.iterrows():
    legal_financial_issues_criterion.append(
        (row.entity_ico,
        row.year,
        process_issues(row))      
    )

financial_issues_criterion_df = pd.DataFrame(legal_financial_issues_criterion, columns=['entity_ico', 'year', 'RUZ_indirect_criterion'])
display(financial_issues_criterion_df)

Unnamed: 0,entity_ico,year,RUZ_indirect_criterion
0,671.0,2014,0
1,671.0,2015,0
2,671.0,2016,1
3,671.0,2017,1
4,698.0,2014,1
...,...,...,...
1328526,54313872.0,2021,0
1328527,54318599.0,2021,0
1328528,60748940.0,2014,1
1328529,60748940.0,2015,1


In [38]:
financial_issues_criterion_copy = financial_issues_criterion_df.copy()
financial_issues_criterion_next_year = financial_issues_criterion_copy.copy()
financial_issues_criterion_next_year['year'] -= 1
financial_issues_criterion_next_year.rename(columns={'RUZ_indirect_criterion': 'RUZ_indirect_criterion_1y_off'}, inplace=True)


financial_issues_criterion_next_next_year = financial_issues_criterion_copy.copy()
financial_issues_criterion_next_next_year['year'] -= 2
financial_issues_criterion_next_next_year.rename(columns={'RUZ_indirect_criterion': 'RUZ_indirect_criterion_2y_off'}, inplace=True)

financial_issues_criterion_copy = (financial_issues_criterion_copy
                                   .merge(financial_issues_criterion_next_year, on = ['entity_ico', 'year'], how = 'left')
                                   .merge(financial_issues_criterion_next_next_year, on = ['entity_ico', 'year'], how = 'left'))

indirect_criterion_1y_off_function = lambda x: 1 if x.RUZ_indirect_criterion == 1 else x.RUZ_indirect_criterion_1y_off
indirect_criterion_2y_off_function = lambda x: 1 if x.RUZ_indirect_criterion_1y_off == 1 else x.RUZ_indirect_criterion_2y_off

financial_issues_criterion_copy['RUZ_indirect_criterion_1y_off'] = financial_issues_criterion_copy.apply(indirect_criterion_1y_off_function, axis=1)
financial_issues_criterion_copy['RUZ_indirect_criterion_2y_off'] = financial_issues_criterion_copy.apply(indirect_criterion_2y_off_function, axis=1)

financial_issues_criterion_copy.drop_duplicates(['entity_ico', 'year'])


Unnamed: 0,entity_ico,year,RUZ_indirect_criterion,RUZ_indirect_criterion_1y_off,RUZ_indirect_criterion_2y_off
0,671.0,2014,0,0.0,1.0
1,671.0,2015,0,1.0,1.0
2,671.0,2016,1,1.0,1.0
3,671.0,2017,1,1.0,1.0
4,698.0,2014,1,1.0,1.0
...,...,...,...,...,...
1328526,54313872.0,2021,0,,
1328527,54318599.0,2021,0,,
1328528,60748940.0,2014,1,1.0,1.0
1328529,60748940.0,2015,1,1.0,1.0


In [40]:
financial_issues_criterion_copy['RUZ_indirect_criterion'].value_counts()

0    864463
1    464068
Name: RUZ_indirect_criterion, dtype: int64

In [41]:
issues_indicators_table = pd.read_csv('../../DATA/MODEL/issues_indicators.csv')

financial_ratios_table = df_statements_extended.merge(issues_indicators_table, on = ['entity_ico', 'year'], how = 'left').merge(financial_issues_criterion_copy, on = ['entity_ico', 'year'], how = 'left')

for col in issues_indicators_table.columns:
    financial_ratios_table[col].fillna(0, inplace=True)

financial_ratios_table

Unnamed: 0,financial_report_id,financial_statement_id,financial_report_order_number,entity_id,approved_date,entity_ico,year,month,financial_report_template_id,entity_name,establishment_date,sknace_code,entity_state,CA,CASH,CL,EQ,NCL,TA,TL,CC,INV,CF_NETTO,CF_SELFFIN,EAT,EBIT,SAL,COST,INT,YIE,L3,L2,L1,CF_CL,CASH_TA,SAL_TA,TL_SAL,INV_COST,INV_SAL,CC_SAL,TA_SAL,TL_TA,CF_TL,CL_TA,NCL_TA,EQ_TL,EQ_TA,EBIT_INT,CL_CC,ROE,EAT_TA,ROA_BRUTTO,CF_TA,CF_SAL,ROS,EAT_YIE,ROI,ROA_NETTO,DPHZ_vat_registration_cancelled,DPHZ_vat_registration_cancelled_1y_off,DPHZ_vat_registration_cancelled_2y_off,RO_cancelled,RO_cancelled_1y_off,RO_cancelled_2y_off,RUZ_cancelled,RUZ_cancelled_1y_off,RUZ_cancelled_2y_off,RUZ_established,RUZ_established_1y_off,RUZ_established_2y_off,RU_konkurz,RU_konkurz_1y_off,RU_konkurz_2y_off,RU_ostatne_konania,RU_ostatne_konania_1y_off,RU_ostatne_konania_2y_off,RUZ_indirect_criterion,RUZ_indirect_criterion_1y_off,RUZ_indirect_criterion_2y_off
0,4890214,2575566,2,25527,2015-07-31,671.0,2014,1,699.0,"Kerametal, akciová spoločnosť,",1970-02-02,46180.0,,5174843.0,1479.0,1366130.0,3807863.0,1255.0,5175248.0,1367385.0,5173364.0,0.0,-547786.0,-548266.0,-550029.0,-549549.0,6350.0,560563.0,0.0,11014.0,3.787958,3.787958,0.001083,-0.400976,0.000286,0.001227,215.336220,0.000000,0.00000,814.702992,293399.886614,0.264216,-0.400608,0.263974,0.000243,2.784778,0.735784,,0.264070,-0.144446,-0.106281,-0.106188,-0.105847,-86.265512,-86.618740,-49.939078,-0.106281,-0.106281,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,1.0
1,5392251,2976863,2,25527,2016-12-31,671.0,2015,1,699.0,"Kerametal, akciová spoločnosť, Bratislava",1970-02-02,46180.0,,5174245.0,1868.0,1693023.0,3480567.0,1255.0,5174845.0,1694278.0,5172377.0,0.0,-326822.0,-327302.0,-327302.0,-326822.0,3633.0,336975.0,0.0,10153.0,3.056217,3.056217,0.001103,-0.193040,0.000361,0.000702,466.357831,0.000000,0.00000,1423.720617,512783.980182,0.327407,-0.192898,0.327164,0.000243,2.054307,0.672593,,0.327320,-0.094037,-0.063249,-0.063156,-0.063156,-89.959262,-90.091385,-32.236974,-0.063249,-0.063249,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1.0,1.0
2,5887438,3375702,2,25527,2017-12-27,671.0,2016,1,699.0,"Kerametal, akciová spoločnosť, Bratislava",1970-02-02,46180.0,,14679.0,1034.0,1697982.0,-1683081.0,0.0,14901.0,1697982.0,13645.0,0.0,-5163168.0,-5163648.0,-5163648.0,-5163168.0,1994.0,5166660.0,0.0,3492.0,0.008645,0.008645,0.000609,-3.040767,0.069391,0.133817,851.545637,0.000000,0.00000,6.843029,2690.250752,113.950876,-3.040767,113.950876,0.000000,-0.991224,-112.950876,,124.439868,,-346.530300,-346.498087,-346.498087,-2589.352056,-2589.592778,-1478.707904,-346.530300,-346.530300,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1,1.0,1.0
3,6361460,3757740,1,25527,2018-12-17,671.0,2017,1,699.0,"Kerametal, akciová spoločnosť, Bratislava",1970-02-02,46180.0,,14501.0,942.0,1729112.0,-1714376.0,0.0,14736.0,1729112.0,13559.0,0.0,-30827.0,-31307.0,-31307.0,-30827.0,1993.0,33542.0,11844.0,2715.0,0.008386,0.008386,0.000545,-0.017828,0.063925,0.135247,867.592574,0.000000,0.00000,6.803312,2661.796287,117.339305,-0.017828,117.339305,0.000000,-0.991478,-116.339305,-1.602752,127.525039,,-2.124525,-2.091952,-2.091952,-15.467637,-15.708480,-11.531123,-1.320779,-2.124525,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1,1.0,1.0
4,4593745,2340588,1,16410,2015-06-11,698.0,2014,1,699.0,"TECHNOPOL, a.s.",1970-02-18,70220.0,,1543495.0,315617.0,3796505.0,17186564.0,102542.0,21085611.0,3899047.0,1227526.0,352.0,-245840.0,-246827.0,-275229.0,-274242.0,282838.0,611268.0,51413.0,337026.0,0.406557,0.406464,0.083134,-0.064754,0.014968,0.013414,13.785443,0.207307,0.44803,4.340032,26838.048494,0.184915,-0.063051,0.180052,0.004863,4.407888,0.815085,-4.334098,3.092810,-0.016014,-0.013053,-0.013006,-0.011659,-0.869190,-0.973098,-0.816640,-0.010615,-0.013053,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1328526,7840332,4932355,1,2037303,2022-02-02,54313872.0,2021,1,687.0,Hanc Academy s. r. o.,2021-12-24,68200.0,,7050.0,7050.0,307.0,6743.0,0.0,7050.0,307.0,0.0,0.0,2050.0,1743.0,1743.0,2050.0,2050.0,0.0,0.0,2050.0,22.964169,22.964169,22.964169,6.677524,1.000000,0.290780,0.149756,,0.00000,0.000000,1238.048780,0.043546,6.677524,0.043546,0.000000,21.964169,0.956454,,,0.258490,0.247234,0.290780,0.290780,1.000000,0.850244,0.850244,0.247234,0.247234,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,,
1328527,8168668,5208864,1,2045087,2022-06-26,54318599.0,2021,1,687.0,SmartSolve - Quality Assurance s.r.o.,2021-12-28,82110.0,,5446.0,1946.0,67.0,5379.0,0.0,5446.0,67.0,3500.0,0.0,446.0,379.0,379.0,446.0,3500.0,3054.0,0.0,3500.0,81.283582,81.283582,29.044776,6.656716,0.357326,0.642674,0.019143,0.000000,0.00000,1.000000,560.160000,0.012303,6.656716,0.012303,0.000000,80.283582,0.987697,,0.019143,0.070459,0.069592,0.081895,0.081895,0.127429,0.108286,0.108286,0.069592,0.069592,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,,
1328528,4458450,2225050,1,289353,2015-02-27,60748940.0,2014,1,699.0,VALPEX s.r.o.,1991-07-17,46900.0,,69384.0,47818.0,5008.0,-5515.0,70391.0,69884.0,75399.0,21566.0,0.0,-758.0,-1718.0,-1718.0,-758.0,174779.0,179373.0,2.0,178615.0,13.854633,13.854633,9.548323,-0.151358,0.684248,2.500987,0.431396,0.000000,0.00000,0.123390,143.943151,1.078916,-0.010053,0.071662,1.007255,-0.073144,-0.078916,-378.000000,0.232217,,-0.024584,-0.010847,-0.010847,-0.004337,-0.009830,-0.009618,-0.024555,-0.024584,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,1.0,1.0
1328529,5030426,2678436,1,289353,2016-03-12,60748940.0,2015,1,699.0,VALPEX s.r.o.,1991-07-17,46900.0,,29950.0,25961.0,2010.0,-9614.0,37633.0,30029.0,39643.0,3989.0,0.0,-3139.0,-4099.0,-4099.0,-3139.0,120316.0,126841.0,3.0,123702.0,14.900498,14.900498,12.915920,-1.561692,0.864531,4.006660,0.329491,0.000000,0.00000,0.033154,89.850394,1.320157,-0.079182,0.066935,1.253222,-0.242514,-0.320157,-1045.333333,0.503886,,-0.136501,-0.104532,-0.104532,-0.026090,-0.034069,-0.033136,-0.136401,-0.136501,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,1.0,1.0


## Joining SKNACE codebook

In [42]:
codebook_sknace_raw = pd.read_csv('../../DATA/RAW/register_uz_apr23/preprocessed/codebook_sk-nace.csv')
codebook_sknace_divisions_raw = pd.read_csv('../../DATA/RAW/register_uz_apr23/preprocessed/codebook_sk-nace_divisions_su.csv', sep= '|')


codebook_sknace = codebook_sknace_raw[['kod', 'nazov.sk']]
codebook_sknace.columns = ['sknace_code', 'sknace_group']

codebook_sknace['sknace_code'] = pd.to_numeric(codebook_sknace['sknace_code'])
codebook_sknace['sknace_division_code'] = codebook_sknace['sknace_code'].apply(lambda x: x//1000)

# display(codebook_sknace)
# display(codebook_sknace_divisions_raw)

codebook_sknace_divisions = codebook_sknace_divisions_raw[['code', 'officialTitle', 'note']]
codebook_sknace_divisions.columns = ['sknace_division_code', 'sknace_division_name', 'sknace_division']
codebook_sknace_divisions['sknace_division_code'] = pd.to_numeric(codebook_sknace_divisions['sknace_division_code'])

codebook_sknace = codebook_sknace.merge(codebook_sknace_divisions, on = 'sknace_division_code', how = 'left')

codebook_sknace



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  codebook_sknace['sknace_code'] = pd.to_numeric(codebook_sknace['sknace_code'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  codebook_sknace_divisions['sknace_division_code'] = pd.to_numeric(codebook_sknace_divisions['sknace_division_code'])


Unnamed: 0,sknace_code,sknace_group,sknace_division_code,sknace_division_name,sknace_division
0,1110,Pestov.obilnín,1,"Pestovanie plodín a chov zvierat, poľovníctvo ...",A
1,1120,Pestov.ryže,1,"Pestovanie plodín a chov zvierat, poľovníctvo ...",A
2,1130,"Pestov.zeleniny,melónov",1,"Pestovanie plodín a chov zvierat, poľovníctvo ...",A
3,1140,Pestov.cukrovej trstiny,1,"Pestovanie plodín a chov zvierat, poľovníctvo ...",A
4,1150,Pestov.tabaku,1,"Pestovanie plodín a chov zvierat, poľovníctvo ...",A
...,...,...,...,...,...
641,96090,Ost.osob.služby i.n.,96,Ostatné osobné služby,S
642,97000,Čin.domácn.ako zamestn.,97,Činnosti domácností ako zamestnávateľov domáce...,T
643,98100,Čin.v dom.produk.tovary,98,Nediferencované činnosti v domácnostiach produ...,T
644,98200,Čin.v dom.produk.služby,98,Nediferencované činnosti v domácnostiach produ...,T


In [43]:
merged_df = financial_ratios_table.merge(codebook_sknace[['sknace_code', 'sknace_division_name', 'sknace_division']], on = 'sknace_code', how = 'left')
merged_df['sknace_subcategory'] = merged_df['sknace_code'] // 1000

# display(merged_df['sknace_subcategory'].value_counts().to_frame())

In [44]:
merged_df

Unnamed: 0,financial_report_id,financial_statement_id,financial_report_order_number,entity_id,approved_date,entity_ico,year,month,financial_report_template_id,entity_name,establishment_date,sknace_code,entity_state,CA,CASH,CL,EQ,NCL,TA,TL,CC,INV,CF_NETTO,CF_SELFFIN,EAT,EBIT,SAL,COST,INT,YIE,L3,L2,L1,CF_CL,CASH_TA,SAL_TA,TL_SAL,INV_COST,INV_SAL,CC_SAL,TA_SAL,TL_TA,CF_TL,CL_TA,NCL_TA,EQ_TL,EQ_TA,EBIT_INT,CL_CC,ROE,EAT_TA,ROA_BRUTTO,CF_TA,CF_SAL,ROS,EAT_YIE,ROI,ROA_NETTO,DPHZ_vat_registration_cancelled,DPHZ_vat_registration_cancelled_1y_off,DPHZ_vat_registration_cancelled_2y_off,RO_cancelled,RO_cancelled_1y_off,RO_cancelled_2y_off,RUZ_cancelled,RUZ_cancelled_1y_off,RUZ_cancelled_2y_off,RUZ_established,RUZ_established_1y_off,RUZ_established_2y_off,RU_konkurz,RU_konkurz_1y_off,RU_konkurz_2y_off,RU_ostatne_konania,RU_ostatne_konania_1y_off,RU_ostatne_konania_2y_off,RUZ_indirect_criterion,RUZ_indirect_criterion_1y_off,RUZ_indirect_criterion_2y_off,sknace_division_name,sknace_division,sknace_subcategory
0,4890214,2575566,2,25527,2015-07-31,671.0,2014,1,699.0,"Kerametal, akciová spoločnosť,",1970-02-02,46180.0,,5174843.0,1479.0,1366130.0,3807863.0,1255.0,5175248.0,1367385.0,5173364.0,0.0,-547786.0,-548266.0,-550029.0,-549549.0,6350.0,560563.0,0.0,11014.0,3.787958,3.787958,0.001083,-0.400976,0.000286,0.001227,215.336220,0.000000,0.00000,814.702992,293399.886614,0.264216,-0.400608,0.263974,0.000243,2.784778,0.735784,,0.264070,-0.144446,-0.106281,-0.106188,-0.105847,-86.265512,-86.618740,-49.939078,-0.106281,-0.106281,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,1.0,"Veľkoobchod, okrem motorových vozidiel a motoc...",G,46.0
1,5392251,2976863,2,25527,2016-12-31,671.0,2015,1,699.0,"Kerametal, akciová spoločnosť, Bratislava",1970-02-02,46180.0,,5174245.0,1868.0,1693023.0,3480567.0,1255.0,5174845.0,1694278.0,5172377.0,0.0,-326822.0,-327302.0,-327302.0,-326822.0,3633.0,336975.0,0.0,10153.0,3.056217,3.056217,0.001103,-0.193040,0.000361,0.000702,466.357831,0.000000,0.00000,1423.720617,512783.980182,0.327407,-0.192898,0.327164,0.000243,2.054307,0.672593,,0.327320,-0.094037,-0.063249,-0.063156,-0.063156,-89.959262,-90.091385,-32.236974,-0.063249,-0.063249,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1.0,1.0,"Veľkoobchod, okrem motorových vozidiel a motoc...",G,46.0
2,5887438,3375702,2,25527,2017-12-27,671.0,2016,1,699.0,"Kerametal, akciová spoločnosť, Bratislava",1970-02-02,46180.0,,14679.0,1034.0,1697982.0,-1683081.0,0.0,14901.0,1697982.0,13645.0,0.0,-5163168.0,-5163648.0,-5163648.0,-5163168.0,1994.0,5166660.0,0.0,3492.0,0.008645,0.008645,0.000609,-3.040767,0.069391,0.133817,851.545637,0.000000,0.00000,6.843029,2690.250752,113.950876,-3.040767,113.950876,0.000000,-0.991224,-112.950876,,124.439868,,-346.530300,-346.498087,-346.498087,-2589.352056,-2589.592778,-1478.707904,-346.530300,-346.530300,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1,1.0,1.0,"Veľkoobchod, okrem motorových vozidiel a motoc...",G,46.0
3,6361460,3757740,1,25527,2018-12-17,671.0,2017,1,699.0,"Kerametal, akciová spoločnosť, Bratislava",1970-02-02,46180.0,,14501.0,942.0,1729112.0,-1714376.0,0.0,14736.0,1729112.0,13559.0,0.0,-30827.0,-31307.0,-31307.0,-30827.0,1993.0,33542.0,11844.0,2715.0,0.008386,0.008386,0.000545,-0.017828,0.063925,0.135247,867.592574,0.000000,0.00000,6.803312,2661.796287,117.339305,-0.017828,117.339305,0.000000,-0.991478,-116.339305,-1.602752,127.525039,,-2.124525,-2.091952,-2.091952,-15.467637,-15.708480,-11.531123,-1.320779,-2.124525,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1,1.0,1.0,"Veľkoobchod, okrem motorových vozidiel a motoc...",G,46.0
4,4593745,2340588,1,16410,2015-06-11,698.0,2014,1,699.0,"TECHNOPOL, a.s.",1970-02-18,70220.0,,1543495.0,315617.0,3796505.0,17186564.0,102542.0,21085611.0,3899047.0,1227526.0,352.0,-245840.0,-246827.0,-275229.0,-274242.0,282838.0,611268.0,51413.0,337026.0,0.406557,0.406464,0.083134,-0.064754,0.014968,0.013414,13.785443,0.207307,0.44803,4.340032,26838.048494,0.184915,-0.063051,0.180052,0.004863,4.407888,0.815085,-4.334098,3.092810,-0.016014,-0.013053,-0.013006,-0.011659,-0.869190,-0.973098,-0.816640,-0.010615,-0.013053,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,1.0,1.0,Vedenie firiem; poradenstvo v oblasti riadenia,M,70.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1328526,7840332,4932355,1,2037303,2022-02-02,54313872.0,2021,1,687.0,Hanc Academy s. r. o.,2021-12-24,68200.0,,7050.0,7050.0,307.0,6743.0,0.0,7050.0,307.0,0.0,0.0,2050.0,1743.0,1743.0,2050.0,2050.0,0.0,0.0,2050.0,22.964169,22.964169,22.964169,6.677524,1.000000,0.290780,0.149756,,0.00000,0.000000,1238.048780,0.043546,6.677524,0.043546,0.000000,21.964169,0.956454,,,0.258490,0.247234,0.290780,0.290780,1.000000,0.850244,0.850244,0.247234,0.247234,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,,,Činnosti v oblasti nehnuteľností,L,68.0
1328527,8168668,5208864,1,2045087,2022-06-26,54318599.0,2021,1,687.0,SmartSolve - Quality Assurance s.r.o.,2021-12-28,82110.0,,5446.0,1946.0,67.0,5379.0,0.0,5446.0,67.0,3500.0,0.0,446.0,379.0,379.0,446.0,3500.0,3054.0,0.0,3500.0,81.283582,81.283582,29.044776,6.656716,0.357326,0.642674,0.019143,0.000000,0.00000,1.000000,560.160000,0.012303,6.656716,0.012303,0.000000,80.283582,0.987697,,0.019143,0.070459,0.069592,0.081895,0.081895,0.127429,0.108286,0.108286,0.069592,0.069592,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,,,"Administratívne, pomocné kancelárske a iné obc...",N,82.0
1328528,4458450,2225050,1,289353,2015-02-27,60748940.0,2014,1,699.0,VALPEX s.r.o.,1991-07-17,46900.0,,69384.0,47818.0,5008.0,-5515.0,70391.0,69884.0,75399.0,21566.0,0.0,-758.0,-1718.0,-1718.0,-758.0,174779.0,179373.0,2.0,178615.0,13.854633,13.854633,9.548323,-0.151358,0.684248,2.500987,0.431396,0.000000,0.00000,0.123390,143.943151,1.078916,-0.010053,0.071662,1.007255,-0.073144,-0.078916,-378.000000,0.232217,,-0.024584,-0.010847,-0.010847,-0.004337,-0.009830,-0.009618,-0.024555,-0.024584,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,1.0,1.0,"Veľkoobchod, okrem motorových vozidiel a motoc...",G,46.0
1328529,5030426,2678436,1,289353,2016-03-12,60748940.0,2015,1,699.0,VALPEX s.r.o.,1991-07-17,46900.0,,29950.0,25961.0,2010.0,-9614.0,37633.0,30029.0,39643.0,3989.0,0.0,-3139.0,-4099.0,-4099.0,-3139.0,120316.0,126841.0,3.0,123702.0,14.900498,14.900498,12.915920,-1.561692,0.864531,4.006660,0.329491,0.000000,0.00000,0.033154,89.850394,1.320157,-0.079182,0.066935,1.253222,-0.242514,-0.320157,-1045.333333,0.503886,,-0.136501,-0.104532,-0.104532,-0.026090,-0.034069,-0.033136,-0.136401,-0.136501,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,1.0,1.0,"Veľkoobchod, okrem motorových vozidiel a motoc...",G,46.0


In [45]:
merged_df.to_csv('../../DATA/MODEL/financial_ratios_df.csv', index = False)