In [1]:
import xml.etree.ElementTree as ET

def xml_to_dict(file_name):
    tree = ET.parse(file_name)
    root = tree.getroot()

    def recurse(node):
        if len(node) == 0:
            return node.text
        else:
            result = {}
            for child in node:
                if child.tag not in result:
                    result[child.tag] = recurse(child)
                else:
                    if type(result[child.tag]) is list:
                        result[child.tag].append(recurse(child))
                    else:
                        result[child.tag] = [result[child.tag], recurse(child)]
            return result

    return {root.tag: recurse(root)}


In [2]:
result_dict = xml_to_dict("./db/pbd.xml")

print(list(result_dict.keys()))

['DatabaseModel']


In [3]:
import re

def sql_to_dict(sql_text):
    # Wyszukuje wszystkie definicje tabel
    tables = re.findall(    r"CREATE TABLE (.*?);", sql_text, re.DOTALL)
    references = re.findall(r"ALTER TABLE (.*?);", sql_text, re.DOTALL)


    result = {}
    for table in tables:
        # Wyszukuje nazwę tabeli i definicję
        match = re.match(r"(\w*)\s*(.*)", table, re.DOTALL)
        if match:
            table_name = match.group(1)
            table_definition = match.group(2)
            result[table_name] = f'CREATE TABLE {table_name} {table_definition};'
    for ref in references:
        match = re.match(r"(\w*)\s*(.*)", ref, re.DOTALL)
        if match:
            table_name = match.group(1)
            ref_definition = match.group(2)
            result[table_name] += f'\nALTER TABLE {table_name} {ref_definition};'
    return result


In [4]:
with open('./db/db.sql', 'rt', encoding='utf-8') as f:
    d = sql_to_dict(f.read())

In [5]:

import re

def process_roles(input_text):
    # Split the text into sections based on comments
    sections = re.split(r'-- ', input_text)
    
    roles = []

    for section in sections[1:]:  # Skip the first split as it will be empty
        # Extract the comment (now the first line)
        comment = section.split('\n', 1)[0]

        # Find the role name
        role_name_match = re.search(r'CREATE ROLE \[?([^\]]+)\]?', section)
        role_name = role_name_match.group(1) if role_name_match else None

        # Store the information in the list
        roles.append({
            'name': role_name,
            'comment': comment.strip(),
            'create': section.strip()
        })

    return roles



In [6]:
with open('./db/roles.sql', 'rt', encoding='utf-8') as f:
    print(process_roles(f.read())[3])

{'name': 'HeadMasterRole;\n\nGRANT SELECT ON TotalIncomeForProducts TO HeadMasterRole;\nGRANT SELECT ON RevenueSummaryByProductType TO HeadMasterRole;\n\nGRANT EXECUTE ON AddEmployee TO HeadMasterRole;\nGRANT EXECUTE ON AddRole TO HeadMasterRole;\nGRANT EXECUTE ON ModifyRole TO HeadMasterRole;\nGRANT EXECUTE ON AddEmployeeRole TO HeadMasterRole;\nGRANT EXECUTE ON RemoveEmployeeRole TO HeadMasterRole;\n\nGRANT EXECUTE ON CreateSemesterOfStudies TO HeadMasterRole;\nGRANT EXECUTE ON ModifyStudies TO HeadMasterRole;\nGRANT EXECUTE ON AddFieldOfStudy TO HeadMasterRole;\nGRANT EXECUTE ON DeleteFieldOfStudies TO HeadMasterRole;\n\nGRANT SELECT ON EmployeeStatistics TO HeadMasterRole;\nGRANT SELECT ON EmployeeTimeTable TO HeadMasterRole;\nGRANT SELECT ON ActivityConflicts TO HeadMasterRole;\n\nGRANT EXECUTE ON EnrollUserWithoutImmediatePayment TO HeadMasterRole;\nGRANT EXECUTE ON ChangeProductPrice TO HeadMasterRole;\n\n', 'comment': 'Dyrektor - odroczenie płatności, widoki dotyczące finansów,

In [7]:
def filter_checks(string:str):
    return '\n'.join([ line for line in string.split('\n') if 'CHECK' not in line])

In [8]:
def extract_sql_entities(sql_text):
    pattern = r"(-- [^\n]+)\n(CREATE OR ALTER (?:VIEW|FUNCTION|PROCEDURE) [^\n]+) AS\n([\s\S]+?)\nGO"

    matches = re.findall(pattern, sql_text)

    extracted_data = []
    for match in matches:
        comment, create_statement, body = match
        entity_name = create_statement.split()[4]  # Assumes the name is the 5th word
        entity_type = "procedure" if "PROCEDURE" in create_statement else "function" if "FUNCTION" in create_statement else "view"
        
        extracted_data.append({
            "name": entity_name,
            "type": entity_type,
            "create": create_statement + " AS\n" + body,
            "comment": comment.lstrip('-- ').strip()

        })

    return extracted_data


def extract_actions(sql_string):
    # Regular expression to match the pattern, including full names with schema
    pattern = r"(?:--(.*?)\n)?\b(CREATE(?:\s+OR\s+ALTER)?\s+(FUNCTION|PROCEDURE|TRIGGER)\s+([\w.]+).*?;\s*GO)"
    matches = re.findall(pattern, sql_string, re.DOTALL | re.IGNORECASE)

    result = []
    for match in matches:
        comment = match[0].strip() if match[0] else ''
        result.append({
            "name": match[3],
            "type": match[2].lower(),
            "comment": comment,
            "create": match[1].strip()
        })

    return result

In [9]:
def parse_indexes(text):
    # Split the text into lines
    lines = text.split('\n')

    indexes = []
    current_index = None
    current_create = ''
    current_comment = ''

    for line in lines:
        # Check for a comment
        if line.strip().startswith('--'):
            current_comment = line.strip().lstrip('--').strip()

        # Check for a CREATE INDEX statement
        elif line.strip().startswith('CREATE INDEX'):
            if current_index is not None:
                indexes.append({
                    'name': current_index,
                    'create': current_create,
                    'comment': current_comment
                })

            # Extract the index name
            current_index = re.findall(r'CREATE INDEX (\w+) ON', line)[0]
            current_create = line
            current_comment = ''

    # Add the last index if exists
    if current_index is not None:
        indexes.append({
            'name': current_index,
            'create': current_create,
            'comment': current_comment
        })

    return indexes


In [19]:
file = open('./output.org', 'w', encoding='utf-8')
print(r'#+LATEX_HEADER: \usepackage[a4paper, left=2.25cm, right=2.25cm, top=1.25cm, bottom=1.25cm]{geometry}', file=file)

print(r'#+LATEX_HEADER: \usepackage{lmodern}', file=file)
print(r'#+LATEX_HEADER: \renewcommand{\contentsname}{Spis treści}', file=file)
print(r'#+author: Piotr Karamon, Kyrylo Iakymenko, Joanna Konieczny', file=file)

print(r'#+title: Dokumentacja', file=file)
print(r'#+date: ', file=file)
print(file=file)

with open('./wklad.org', 'rt' , encoding='utf-8') as f:
    print(f.read(), file=file)

with open('./funkcje_system.org', 'rt', encoding='utf-8') as f:
    print(f.read(),file=file)

with open('./schemat.org', 'rt', encoding='utf-8') as f:
    print(f.read(),file=file)

print('* Tabele', file=file)
for table in result_dict['DatabaseModel']['Tables']['Table']:
    print(f'** ={table["Name"]}= ', file=file)
    print(f'{table["Description"]}', file=file)

    columns = table["Columns"]["Column"]
    if isinstance(columns, dict):
        columns = [columns]
        
    for column in columns:
        description = '- ' + column["Description"] if column["Description"] is not None else ""
        print(f"+ ={column['Name']}= {description}", file=file)

    table_checks = table['TableChecks']
    table_df = filter_checks(d[table["Name"]])
    print(f'#+begin_src sql\n{table_df}\n#+end_src', file=file)

    if table_checks is None:
        continue
    print('Warunki integralnościowe:\n\n', file=file)
    for table_check_key in table_checks:
        table_check = table_checks[table_check_key]
        if isinstance(table_check, dict):
            table_check = [table_check]
        
        for tb in table_check:
            print(f'+ ={tb["Name"]}=', file=file)
            if tb["Description"] is not None:
                print(f'\n    {tb["Description"]}', file=file)

            print(f'    #+begin_src sql\nCONSTRAINT {tb["Name"]} CHECK\n({tb["CheckExpression"]})\n   #+end_src', file=file)

print('* Widoki', file=file)
# Example usage with the previous SQL content
with open('./db/views.sql', 'rt', encoding='utf-8') as f:
    extracted_entities = extract_sql_entities(f.read())

    for entity in extracted_entities:
        print(f'** ={entity["name"]}=', file=file)
        print(f'{entity["comment"]}', file=file)
        print(f"#+begin_src sql\n{entity['create']}\n#+end_src", file=file)
print('* Funkcje i procedury', file=file)
with open('./db/fp.sql', 'rt', encoding='utf-8') as f:
    extracted_fs = extract_actions(f.read())
    for f in extracted_fs:
        print(f'** ={f["name"]}=', file=file)
        print(f'{f["comment"]}', file=file)
        print(f"#+begin_src sql\n{f['create']}\n#+end_src", file=file)

print('* Triggery', file=file)
with open('./db/triggers.sql', 'rt', encoding='utf-8') as f:
    for action in extract_actions(f.read()):
        print(f'** ={action["name"]}=', file=file)
        print(f'{action["comment"]}', file=file)
        print(f"#+begin_src sql\n{action['create']}\n#+end_src", file=file)

print('* Indeksy', file=file)
with open('./db/indexes.sql', 'rt', encoding='utf-8') as f:
    extracted_indexes = parse_indexes(f.read())
    for index in extracted_indexes:
        print(f'** ={index["name"]}=', file=file)
        print(f'{index["comment"]}', file=file)
        print(f"#+begin_src sql\n{index['create']}\n#+end_src", file=file)

with open('./roles.org', 'rt', encoding='utf-8') as f:
    print(f.read(), file=file)
    print(file=file)

with open('./generowanie_danych.org', 'rt', encoding='utf-8') as f:
    print(f.read(), file=file)

file.close()
