From 73fef764d0f627a1674753161cd62581b5a35864 Mon Sep 17 00:00:00 2001 From: andrew Date: Wed, 15 Oct 2025 16:35:16 +0300 Subject: [PATCH 1/2] use own dicts serializer instead of json.dump --- .../ast/mindsdb/create_database.py | 5 +- .../ast/mindsdb/create_predictor.py | 7 ++- mindsdb_sql_parser/ast/select/select.py | 8 +-- mindsdb_sql_parser/lexer.py | 2 +- mindsdb_sql_parser/parser.py | 6 +-- mindsdb_sql_parser/utils.py | 54 +++++++++++++++++++ tests/test_mindsdb/test_databases.py | 29 +++++++++- 7 files changed, 94 insertions(+), 17 deletions(-) diff --git a/mindsdb_sql_parser/ast/mindsdb/create_database.py b/mindsdb_sql_parser/ast/mindsdb/create_database.py index ed33998..e3c0616 100644 --- a/mindsdb_sql_parser/ast/mindsdb/create_database.py +++ b/mindsdb_sql_parser/ast/mindsdb/create_database.py @@ -1,6 +1,5 @@ -import json from mindsdb_sql_parser.ast.base import ASTNode -from mindsdb_sql_parser.utils import indent +from mindsdb_sql_parser.utils import indent, dump_json class CreateDatabase(ASTNode): @@ -49,6 +48,6 @@ def get_string(self, *args, **kwargs): parameters_str = '' if self.parameters: - parameters_str = f', PARAMETERS = {json.dumps(self.parameters)}' + parameters_str = f', PARAMETERS = {dump_json(self.parameters)}' out_str = f'CREATE{replace_str} DATABASE {"IF NOT EXISTS " if self.if_not_exists else ""}{self.name.to_string()} {engine_str}{parameters_str}' return out_str diff --git a/mindsdb_sql_parser/ast/mindsdb/create_predictor.py b/mindsdb_sql_parser/ast/mindsdb/create_predictor.py index 51e8bc6..9e95f4e 100644 --- a/mindsdb_sql_parser/ast/mindsdb/create_predictor.py +++ b/mindsdb_sql_parser/ast/mindsdb/create_predictor.py @@ -1,6 +1,5 @@ -import json from mindsdb_sql_parser.ast.base import ASTNode -from mindsdb_sql_parser.utils import indent +from mindsdb_sql_parser.utils import indent, dump_json from mindsdb_sql_parser.ast.select import Identifier from mindsdb_sql_parser.ast.select.operation import Object @@ -101,13 +100,13 @@ def get_string(self, *args, **kwargs): for key, value in self.using.items(): if isinstance(value, Object): args = [ - f'{k}={json.dumps(v)}' + f'{k}={dump_json(v)}' for k, v in value.params.items() ] args_str = ', '.join(args) value = f'{value.type}({args_str})' else: - value = json.dumps(value) + value = dump_json(value) using_ar.append(f'{Identifier(key).to_string()}={value}') diff --git a/mindsdb_sql_parser/ast/select/select.py b/mindsdb_sql_parser/ast/select/select.py index c5eaeb7..acda439 100644 --- a/mindsdb_sql_parser/ast/select/select.py +++ b/mindsdb_sql_parser/ast/select/select.py @@ -1,9 +1,9 @@ from typing import List, Union -import json from mindsdb_sql_parser.ast.base import ASTNode -from mindsdb_sql_parser.utils import indent +from mindsdb_sql_parser.utils import indent, dump_json from mindsdb_sql_parser.ast.select.operation import Object + class Select(ASTNode): def __init__(self, @@ -158,7 +158,7 @@ def get_string(self, *args, **kwargs): for key, value in self.using.items(): if isinstance(value, Object): args = [ - f'{k}={json.dumps(v)}' + f'{k}={dump_json(v)}' for k, v in value.params.items() ] args_str = ', '.join(args) @@ -166,7 +166,7 @@ def get_string(self, *args, **kwargs): if isinstance(value, Identifier): value = value.to_string() else: - value = json.dumps(value) + value = dump_json(value) using_ar.append(f'{Identifier(key).to_string()}={value}') diff --git a/mindsdb_sql_parser/lexer.py b/mindsdb_sql_parser/lexer.py index 6eba3ec..ad902e8 100644 --- a/mindsdb_sql_parser/lexer.py +++ b/mindsdb_sql_parser/lexer.py @@ -353,7 +353,7 @@ def INTEGER(self, t): def QUOTE_STRING(self, t): return t - @_(r'"(?:\\.|[^"])*"') + @_(r'"(?:\\.|[^"])*(?:""(?:\\.|[^"])*)*"') def DQUOTE_STRING(self, t): return t diff --git a/mindsdb_sql_parser/parser.py b/mindsdb_sql_parser/parser.py index fd8d099..3482ded 100644 --- a/mindsdb_sql_parser/parser.py +++ b/mindsdb_sql_parser/parser.py @@ -26,7 +26,7 @@ from mindsdb_sql_parser.exceptions import ParsingException from mindsdb_sql_parser.ast.mindsdb.retrain_predictor import RetrainPredictor from mindsdb_sql_parser.ast.mindsdb.finetune_predictor import FinetunePredictor -from mindsdb_sql_parser.utils import ensure_select_keyword_order, JoinType, tokens_to_string +from mindsdb_sql_parser.utils import ensure_select_keyword_order, JoinType, tokens_to_string, unquote from mindsdb_sql_parser.logger import ParserLogger from mindsdb_sql_parser.lexer import MindsDBLexer @@ -2024,11 +2024,11 @@ def integer(self, p): @_('QUOTE_STRING') def quote_string(self, p): - return p[0].replace('\\"', '"').replace("\\'", "'").replace("''", "'").strip('\'') + return unquote(p[0]).strip('\'') @_('DQUOTE_STRING') def dquote_string(self, p): - return p[0].replace('\\"', '"').replace("\\'", "'").strip('\"') + return unquote(p[0], is_double_quoted=True).strip('\"') # for raw query diff --git a/mindsdb_sql_parser/utils.py b/mindsdb_sql_parser/utils.py index 8420249..07c3831 100644 --- a/mindsdb_sql_parser/utils.py +++ b/mindsdb_sql_parser/utils.py @@ -89,3 +89,57 @@ def tokens_to_string(tokens): # last line content += line return content + + +def unquote(s, is_double_quoted=False): + s = s.replace('\\"', '"').replace("\\'", "'") + if is_double_quoted: + s = s.replace('""', '"') + else: + s = s.replace("''", "'") + return s + + +def dump_json(obj) -> str: + ''' + dump dict into json-like string using: + - single quotes for strings + - the same quoting rules as `unquote` function + ''' + + + if isinstance(obj, dict): + items = [] + for k, v in obj.items(): + # keys must be strings in JSON + if not isinstance(k, str): + k = str(k) + items.append(f'{dump_json(k)}: {dump_json(v)}') + return "{" + ", ".join(items) + "}" + + if isinstance(obj, (list, tuple)): + items = [ + dump_json(i) for i in obj + ] + return "[" + ", ".join(items) + "]" + + if isinstance(obj, str): + obj = obj.replace("'", "''") + return f"'{obj}'" + + if isinstance(obj, (int, float)): + if obj != obj: # NaN + return "null" + if obj == float('inf'): + return "null" + if obj == float('-inf'): + return "null" + return str(obj) + + if obj is None: + return "null" + + if isinstance(obj, bool): + return "true" if obj else "false" + + return dump_json(str(obj)) diff --git a/tests/test_mindsdb/test_databases.py b/tests/test_mindsdb/test_databases.py index 6c09819..e3ddfc4 100644 --- a/tests/test_mindsdb/test_databases.py +++ b/tests/test_mindsdb/test_databases.py @@ -119,7 +119,6 @@ def test_create_project(self): assert str(ast).lower() == str(expected_ast).lower() assert ast.to_tree() == expected_ast.to_tree() - def test_create_database_using(self): sql = "CREATE DATABASE db using ENGINE = 'mysql', PARAMETERS = {'A': 1}" @@ -130,7 +129,6 @@ def test_create_database_using(self): assert str(ast).lower() == str(expected_ast).lower() assert ast.to_tree() == expected_ast.to_tree() - def test_alter_database(self): sql = "ALTER DATABASE db PARAMETERS = {'A': 1, 'B': 2}" ast = parse_sql(sql) @@ -139,3 +137,30 @@ def test_alter_database(self): assert str(ast) == str(expected_ast) assert ast.to_tree() == expected_ast.to_tree() + + def test_parser_render(self): + + password = "a dm\\in123\"_.,';:!@#$%^&*()\n<>`{}[]" + + ''' + quoting rules: + ' => '' (in single quoted strings) + " => "" (in double quoted strings) + ''' + for symbol in ("'", '"'): + sql = f""" + CREATE DATABASE db WITH engine = 'postgres' + PARAMETERS = {{ + 'password': {symbol}{password.replace(symbol, symbol * 2)}{symbol} + }} + """ + + # check parsing + query = parse_sql(sql) + assert query.parameters['password'] == password + + # check render + sql2 = str(query) + query2 = parse_sql(sql2) + assert query2.parameters['password'] == password + From 286f00915500d18f45b56e028898dc8aaa5a1fd7 Mon Sep 17 00:00:00 2001 From: andrew Date: Wed, 15 Oct 2025 17:41:08 +0300 Subject: [PATCH 2/2] change var name --- tests/test_mindsdb/test_databases.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_mindsdb/test_databases.py b/tests/test_mindsdb/test_databases.py index e3ddfc4..219e7f8 100644 --- a/tests/test_mindsdb/test_databases.py +++ b/tests/test_mindsdb/test_databases.py @@ -140,7 +140,7 @@ def test_alter_database(self): def test_parser_render(self): - password = "a dm\\in123\"_.,';:!@#$%^&*()\n<>`{}[]" + value = "a dm\\in123\"_.,';:!@#$%^&*()\n<>`{}[]" ''' quoting rules: @@ -151,16 +151,16 @@ def test_parser_render(self): sql = f""" CREATE DATABASE db WITH engine = 'postgres' PARAMETERS = {{ - 'password': {symbol}{password.replace(symbol, symbol * 2)}{symbol} + 'password': {symbol}{value.replace(symbol, symbol * 2)}{symbol} }} """ # check parsing query = parse_sql(sql) - assert query.parameters['password'] == password + assert query.parameters['password'] == value # check render sql2 = str(query) query2 = parse_sql(sql2) - assert query2.parameters['password'] == password + assert query2.parameters['password'] == value