In [1]:
import pandas as pd
import numpy as np

import ast


pd.set_option('display.max_rows', 100)

In [2]:
templates_table = pd.read_csv('../../DATA/RAW/register_uz_apr23/preprocessed/templates_dataset.csv')
reports_table = pd.read_csv('../../DATA/TRANSFORM/financial_statements/financial_reports_table_sample.csv')
statements_table = pd.read_csv('../../DATA/TRANSFORM/financial_statements/financial_statements_table.csv')

statements_table = statements_table[['financial_statement_id', 'entity_id', 'entity_ico', 'year', 'month']]

  exec(code_obj, self.user_global_ns, self.user_ns)


In [3]:
reports_table = reports_table.merge(statements_table, on = 'financial_statement_id', how = 'left')
statements_table = None

In [11]:
templates_table.query('template_id in [687, 699, 22, 21, 943]')[['template_id', 'template_name']].drop_duplicates()

Unnamed: 0,template_id,template_name
2526,21,Súvaha Úč POD 1-01
2673,22,Výkaz ziskov a strát Úč POD 2-01
4770,687,Úč MUJ
5263,699,Úč POD
7158,943,Výkaz vybraných údajov


In [12]:
def print_template(template_id):
    table = templates_table.query('template_id == @template_id')[['table_name', 'type', 'row', 'column', 'text']]
    columns = table.query('type == "column"')
    
    header_width = int(columns['column'].max())
    header_height = int(columns['row'].max())
    tables_headers = []
    for table_name in columns['table_name'].unique():
        print(table_name)
        table_header = []
        for row_number in range(1, header_height + 1):
            header_row = []
            for col_number in range(1, header_width + 1):
                try:
                    header_row.append(columns.query('table_name == @table_name and column == @col_number and row == @row_number').iloc[0,4])
                except IndexError:
                    header_row.append('-')
                    
            print(header_row)  
            table_header.append(header_row)
        tables_headers.append(table_header)
        
        display(table.query('type == "row"').query('table_name == @table_name')[['row', 'text']].sort_values('row').set_index('row'))
        print()


In [13]:
templates_codes = {
    'MUJ': 687,
    'POD': 699,
    'POD_1': 21,
    'POD_2': 22,
    'VVU': 943,
}

In [14]:
# for key, value in templates_codes.items():
#     print(key)
#     print_template(value)

# print_template(templates_codes['POD'])

In [78]:
class Report:

    def __init__(self, template_id):
        self.template_id = template_id
        self.title_page = None
        self.tables = None


class ReportContentParser:

    def parse(self, template_id, content_string):

        report = Report(template_id)

        content = ast.literal_eval(content_string)
        
        display('title page:', content.get('titulnaStrana'))

        tables_content = content.get('tabulky')
        if tables_content is None:
            print('DOES NOT CONTAIN TABLES')
            return

        values = []
        if template_id == 687: values = self.__parse_MUJ(tables_content)
        if template_id == 699: values = self.__parse_POD(tables_content)         
        if template_id == 21: values = self.__parse_POD_1(tables_content)        
        if template_id == 22: values = self.__parse_POD_2(tables_content)
        print(values)
    
    def __parse_title_page(self, title_page_content): return None
    
    def __parse_MUJ(self, tables_content):
        # 687

        assets = tables_content[0]['data']
        assets = np.reshape(np.array(assets), (len(assets)//2, 2))[:, 0]
        assets[assets=='']='0'
        assets = assets.astype(float)

        liab = tables_content[1]['data']
        liab = np.reshape(np.array(liab), (len(liab)//2, 2))[:, 0]
        liab[liab=='']='0'
        liab = liab.astype(float)

        income = tables_content[2]['data']
        income = np.reshape(np.array(income), (len(income)//2, 2))[:, 0]
        income[income=='']='0'
        income = income.astype(float)

        ca = assets[14]+assets[16]+assets[20]
        ta = assets[0]
        eq = liab[1]
        cl = liab[14] + liab[21] + liab[20]
        ncl = liab[11] + liab[13]
        tl = cl + ncl
        cash = assets[20]
        cf = income[37] + income[13]
        sal = income[1] + income[2] + income[5] + income[20]
        eat = income[37]
        ebit = income[30] + income[35] + income[37]

        values = [ca, ta, eq, cl, ncl, tl, cash, cf, sal, eat, ebit]
        display(pd.DataFrame([values], columns = ['ca', 'ta', 'eq', 'cl', 'ncl', 'tl', 'cash', 'cf', 'sal', 'eat', 'ebit']))
        return values

    
    def __parse_POD(self, tables_content):
        # 699
        assets = tables_content[0]['data']
        assets = np.reshape(np.array(assets), (len(assets)//4, 4))[:, 2]
        assets[assets=='']='0'
        assets = assets.astype(float)

        liab = tables_content[1]['data']
        liab = np.reshape(np.array(liab), (len(liab)//2, 2))[:, 0]
        liab[liab=='']='0'
        liab = liab.astype(float)

        S = np.concatenate([assets, liab])

        income = tables_content[2]['data']
        income = np.reshape(np.array(income), (len(income)//2, 2))[:, 0]
        income[income=='']='0'
        income = income.astype(float)

        V = income

        ca = S[70] + S[52] + S[33] # vyrost
        cl = S[121] # vyrost
        cc = S[52]
        ta = S[0]
        eq = S[79]
        tl = S[100]
        cash = S[70]
        cf = V[60] + V[21]
        sal = V[2] + V[3] + V[4] + V[7] + V[29]
        eat = V[60]
        ebit = V[26] - V[7] + V[23] + V[12] + V[22] + V[24] - V[8]
        ebt = V[55]
        it = V[38] + V[48]
        stock = S[33]
        yie = V[1] + V[28]
        cost = V[9] + V[44]

        values = [ca, cc, ta, eq, cl, tl - cl, tl, cash, cf, sal, eat, ebit, ebt, stock, cost, it, yie]
        display(pd.DataFrame([values], columns = ['ca', 'cc', 'ta', 'eq', 'cl', 'ncl', 'tl', 'cash', 'cf', 'sal', 'eat', 'ebit', 'ebt', 'stock', 'cost', 'it', 'yie']))
        return values
    
    def __parse_POD_1(self, tables_content):
        # 21
        for table in tables_content:
            name = table.get('nazov')
            print(name)            
            data = table.get('data')
            if data is None:
                print('DOES NOT CONTAIN DATA')
            print(data)
        return []
    
    def __parse_POD_2(self, tables_content):
        # 22
        for table in tables_content:
            name = table.get('nazov')
            print(name)
            data = table.get('data')
            if data is None:
                print('DOES NOT CONTAIN DATA')
            print(data)
        return []


In [79]:
def get_random_report(query_string = None):
    if query_string is None:
        report = reports_table.sample(1).iloc[0, :]
    else:
        report = reports_table.query(query_string).sample(1).iloc[0, :]
    
    print('ICO', int(report.entity_ico))
    print(f'https://www.registeruz.sk/cruz-public/domain/accountingentity/show/{(report.entity_id)}')

    print()
    print(f'{report.month}/{report.year}')

    parser = ReportContentParser()
    parser.parse(report.template_id, report.content)



In [80]:
for template_id in reports_table['template_id'].unique():
    print('template:', template_id)
    qstring = f'template_id == {template_id}'
    get_random_report(qstring)
    print()

template: 699.0
ICO 48173193
https://www.registeruz.sk/cruz-public/domain/accountingentity/show/1596818

1/2017


'title page:'

{'nazovUctovnejJednotky': 'TNG s.r.o.',
 'ico': '48173193',
 'dic': '2120086463',
 'typUctovnejJednotky': 'veľká',
 'adresa': {'ulica': 'Ružová Dolina',
  'cislo': '8',
  'psc': '82109',
  'mesto': 'Bratislava'},
 'skNace': '46900',
 'typZavierky': 'Riadna',
 'obdobieOd': '2017-01',
 'obdobieDo': '2017-12',
 'predchadzajuceObdobieOd': '2016-01',
 'predchadzajuceObdobieDo': '2016-12',
 'datumSchvalenia': '2018-03-29',
 'datumZostavenia': '2018-03-29'}

Unnamed: 0,ca,cc,ta,eq,cl,ncl,tl,cash,cf,sal,eat,ebit,ebt,stock,cost,it,yie
0,18055.0,16676.0,18055.0,12740.0,5315.0,0.0,5315.0,1379.0,-551.0,2666577.0,-551.0,-629.0,-551.0,0.0,2667546.0,0.0,2666995.0


[18055.0, 16676.0, 18055.0, 12740.0, 5315.0, 0.0, 5315.0, 1379.0, -551.0, 2666577.0, -551.0, -629.0, -551.0, 0.0, 2667546.0, 0.0, 2666995.0]

template: 687.0
ICO 51126486
https://www.registeruz.sk/cruz-public/domain/accountingentity/show/1732641

1/2017


'title page:'

{'nazovUctovnejJednotky': 'Salvezza s.r.o.',
 'ico': '51126486',
 'dic': '2120611042',
 'adresa': {'ulica': 'POĽNOHOSPODÁRSKA',
  'cislo': '5',
  'psc': '82107',
  'mesto': 'Bratislava-Vrakuňa'},
 'skNace': '46900',
 'typZavierky': 'Riadna',
 'obdobieOd': '2017-01',
 'obdobieDo': '2017-12',
 'predchadzajuceObdobieOd': '2016-01',
 'predchadzajuceObdobieDo': '2016-12',
 'datumSchvalenia': '2018-03-26',
 'datumZostavenia': '2018-03-26',
 'oznacenieObchodnehoRegistra': 'OR Okresného súdu Bratislava I, Oddiel:Sro, vložka:123349/B'}

Unnamed: 0,ca,ta,eq,cl,ncl,tl,cash,cf,sal,eat,ebit
0,6639.0,6639.0,6639.0,0.0,0.0,0.0,6639.0,0.0,0.0,0.0,0.0


[6639.0, 6639.0, 6639.0, 0.0, 0.0, 0.0, 6639.0, 0.0, 0.0, 0.0, 0.0]

template: 22.0
ICO 46731865
https://www.registeruz.sk/cruz-public/domain/accountingentity/show/1228055

1/2013


'title page:'

{'nazovUctovnejJednotky': 'SWAG s. r. o.',
 'ico': '46731865',
 'dic': '2023580966',
 'adresa': {'ulica': 'Rozvodná',
  'cislo': '2960/7',
  'psc': '83101',
  'mesto': 'Bratislava 3 - Nové Mesto'},
 'skNace': '18120',
 'typZavierky': 'Riadna',
 'obdobieOd': '2013-01',
 'obdobieDo': '2013-12',
 'predchadzajuceObdobieOd': '2012-01',
 'predchadzajuceObdobieDo': '2012-12',
 'datumZostavenia': '2014-03-27'}

{'sk': 'Výkaz ziskov a strát'}
['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '114', '', '', '', '114', '', '-114', '', '', '', '', '', '', '', '', '', '', '', '166', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '-280', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '-280', '', '', '', '', '', '', '', '-280', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '-280', '', '', '', '-280']
[]

template: 21.0
ICO 44739419
https://www.registeruz.sk/cruz-public/domain/accountingentity/show/769703

1/2013


'title page:'

{'nazovUctovnejJednotky': 'PE-KA-DOS spol. s r.o.',
 'ico': '44739419',
 'dic': '2022813419',
 'adresa': {'ulica': 'Šintava',
  'cislo': '14',
  'psc': '92551',
  'mesto': 'Šintava'},
 'skNace': '49410',
 'typZavierky': 'Riadna',
 'obdobieOd': '2013-01',
 'obdobieDo': '2013-12',
 'predchadzajuceObdobieOd': '2012-01',
 'predchadzajuceObdobieDo': '2012-12',
 'datumZostavenia': '2014-03-27'}

{'sk': 'Strana aktív'}
['128486', '26679', '101807', '138605', '108414', '26679', '81735', '88492', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '108414', '26679', '81735', '88492', '', '', '', '', '', '', '', '', '108414', '26679', '81735', '88492', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '20072', '', '20072', '50113', '50', '', '50', '50', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '50', '', '50', '50', '4594', '', '4594', '3678', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '4594', '', '4594', '3678', '', '', '', '', '', '', '', '', '14777', '', '14777', '41449', '14777', '', '14777', '41449', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '