In [1]:
import re
import csv

input_path = '../../sql/schema/database_denormalization.sql'
output_path = 'data_dictionary.csv'

with open(input_path, encoding='utf-8') as f:
    ddl = f.read()

tables = re.findall(r'CREATE TABLE\s+`?(\w+)`?\s*\((.+?)\);', ddl, re.DOTALL)

foreign_keys = re.findall(
    r'ALTER TABLE\s+`?(\w+)`?\s+ADD FOREIGN KEY\s*\(`?(\w+)`?\)\s+REFERENCES\s+`?(\w+)`?\s*\(`?(\w+)`?\)',
    ddl, re.DOTALL
)

fk_dict = {}
for src_table, src_col, ref_table, ref_col in foreign_keys:
    fk_dict.setdefault(src_table, {})[src_col] = f'{ref_table}({ref_col})'

rows = []
for table, columns in tables:
    for col in columns.split('\n'):
        col = col.strip().rstrip(',')
        m = re.match(r'`(.+?)`\s+([A-Z]+[A-Z0-9_\(\),]*)\s*(.*)', col, re.I)
        if m:
            column_name, data_type, rest = m.groups()
            comment_match = re.search(r"COMMENT\s+'([^']*)'", rest)
            comment = comment_match.group(1) if comment_match else ''
            not_null = 'NOT NULL' if 'NOT NULL' in rest.upper() else ''
            default = ''
            default_match = re.search(r'DEFAULT\s+([^\s,]+)', rest, re.I)
            if default_match:
                default = default_match.group(1)
            pk_flag = 'PK' if 'AUTO_INCREMENT' in rest.upper() or 'PRIMARY KEY' in rest.upper() else ''
            fk_flag = fk_dict.get(table, {}).get(column_name, '')
            rows.append([table, column_name, data_type, comment, not_null, default, pk_flag, fk_flag])

with open(output_path, 'w', newline='', encoding='utf-8') as f:
    writer = csv.writer(f)
    writer.writerow(['Table Name', 'Field Name', 'Data Type', 'Comment', 'Not Null', 'Default', 'Primary Key', 'Foreign Key'])
    writer.writerows(rows)

print(f"Đã xuất data dictionary ra file: {output_path}")

Đã xuất data dictionary ra file: data_dictionary.csv
