diff --git a/mindsdb_sql_parser/ast/mindsdb/create_database.py b/mindsdb_sql_parser/ast/mindsdb/create_database.py index ed33998..e3c0616 100644 --- a/mindsdb_sql_parser/ast/mindsdb/create_database.py +++ b/mindsdb_sql_parser/ast/mindsdb/create_database.py @@ -1,6 +1,5 @@ -import json from mindsdb_sql_parser.ast.base import ASTNode -from mindsdb_sql_parser.utils import indent +from mindsdb_sql_parser.utils import indent, dump_json class CreateDatabase(ASTNode): @@ -49,6 +48,6 @@ def get_string(self, *args, **kwargs): parameters_str = '' if self.parameters: - parameters_str = f', PARAMETERS = {json.dumps(self.parameters)}' + parameters_str = f', PARAMETERS = {dump_json(self.parameters)}' out_str = f'CREATE{replace_str} DATABASE {"IF NOT EXISTS " if self.if_not_exists else ""}{self.name.to_string()} {engine_str}{parameters_str}' return out_str diff --git a/mindsdb_sql_parser/ast/mindsdb/create_predictor.py b/mindsdb_sql_parser/ast/mindsdb/create_predictor.py index 51e8bc6..9e95f4e 100644 --- a/mindsdb_sql_parser/ast/mindsdb/create_predictor.py +++ b/mindsdb_sql_parser/ast/mindsdb/create_predictor.py @@ -1,6 +1,5 @@ -import json from mindsdb_sql_parser.ast.base import ASTNode -from mindsdb_sql_parser.utils import indent +from mindsdb_sql_parser.utils import indent, dump_json from mindsdb_sql_parser.ast.select import Identifier from mindsdb_sql_parser.ast.select.operation import Object @@ -101,13 +100,13 @@ def get_string(self, *args, **kwargs): for key, value in self.using.items(): if isinstance(value, Object): args = [ - f'{k}={json.dumps(v)}' + f'{k}={dump_json(v)}' for k, v in value.params.items() ] args_str = ', '.join(args) value = f'{value.type}({args_str})' else: - value = json.dumps(value) + value = dump_json(value) using_ar.append(f'{Identifier(key).to_string()}={value}') diff --git a/mindsdb_sql_parser/ast/select/select.py b/mindsdb_sql_parser/ast/select/select.py index c5eaeb7..acda439 100644 --- a/mindsdb_sql_parser/ast/select/select.py +++ b/mindsdb_sql_parser/ast/select/select.py @@ -1,9 +1,9 @@ from typing import List, Union -import json from mindsdb_sql_parser.ast.base import ASTNode -from mindsdb_sql_parser.utils import indent +from mindsdb_sql_parser.utils import indent, dump_json from mindsdb_sql_parser.ast.select.operation import Object + class Select(ASTNode): def __init__(self, @@ -158,7 +158,7 @@ def get_string(self, *args, **kwargs): for key, value in self.using.items(): if isinstance(value, Object): args = [ - f'{k}={json.dumps(v)}' + f'{k}={dump_json(v)}' for k, v in value.params.items() ] args_str = ', '.join(args) @@ -166,7 +166,7 @@ def get_string(self, *args, **kwargs): if isinstance(value, Identifier): value = value.to_string() else: - value = json.dumps(value) + value = dump_json(value) using_ar.append(f'{Identifier(key).to_string()}={value}') diff --git a/mindsdb_sql_parser/lexer.py b/mindsdb_sql_parser/lexer.py index 6eba3ec..ad902e8 100644 --- a/mindsdb_sql_parser/lexer.py +++ b/mindsdb_sql_parser/lexer.py @@ -353,7 +353,7 @@ def INTEGER(self, t): def QUOTE_STRING(self, t): return t - @_(r'"(?:\\.|[^"])*"') + @_(r'"(?:\\.|[^"])*(?:""(?:\\.|[^"])*)*"') def DQUOTE_STRING(self, t): return t diff --git a/mindsdb_sql_parser/parser.py b/mindsdb_sql_parser/parser.py index fd8d099..3482ded 100644 --- a/mindsdb_sql_parser/parser.py +++ b/mindsdb_sql_parser/parser.py @@ -26,7 +26,7 @@ from mindsdb_sql_parser.exceptions import ParsingException from mindsdb_sql_parser.ast.mindsdb.retrain_predictor import RetrainPredictor from mindsdb_sql_parser.ast.mindsdb.finetune_predictor import FinetunePredictor -from mindsdb_sql_parser.utils import ensure_select_keyword_order, JoinType, tokens_to_string +from mindsdb_sql_parser.utils import ensure_select_keyword_order, JoinType, tokens_to_string, unquote from mindsdb_sql_parser.logger import ParserLogger from mindsdb_sql_parser.lexer import MindsDBLexer @@ -2024,11 +2024,11 @@ def integer(self, p): @_('QUOTE_STRING') def quote_string(self, p): - return p[0].replace('\\"', '"').replace("\\'", "'").replace("''", "'").strip('\'') + return unquote(p[0]).strip('\'') @_('DQUOTE_STRING') def dquote_string(self, p): - return p[0].replace('\\"', '"').replace("\\'", "'").strip('\"') + return unquote(p[0], is_double_quoted=True).strip('\"') # for raw query diff --git a/mindsdb_sql_parser/utils.py b/mindsdb_sql_parser/utils.py index 8420249..07c3831 100644 --- a/mindsdb_sql_parser/utils.py +++ b/mindsdb_sql_parser/utils.py @@ -89,3 +89,57 @@ def tokens_to_string(tokens): # last line content += line return content + + +def unquote(s, is_double_quoted=False): + s = s.replace('\\"', '"').replace("\\'", "'") + if is_double_quoted: + s = s.replace('""', '"') + else: + s = s.replace("''", "'") + return s + + +def dump_json(obj) -> str: + ''' + dump dict into json-like string using: + - single quotes for strings + - the same quoting rules as `unquote` function + ''' + + + if isinstance(obj, dict): + items = [] + for k, v in obj.items(): + # keys must be strings in JSON + if not isinstance(k, str): + k = str(k) + items.append(f'{dump_json(k)}: {dump_json(v)}') + return "{" + ", ".join(items) + "}" + + if isinstance(obj, (list, tuple)): + items = [ + dump_json(i) for i in obj + ] + return "[" + ", ".join(items) + "]" + + if isinstance(obj, str): + obj = obj.replace("'", "''") + return f"'{obj}'" + + if isinstance(obj, (int, float)): + if obj != obj: # NaN + return "null" + if obj == float('inf'): + return "null" + if obj == float('-inf'): + return "null" + return str(obj) + + if obj is None: + return "null" + + if isinstance(obj, bool): + return "true" if obj else "false" + + return dump_json(str(obj)) diff --git a/tests/test_mindsdb/test_databases.py b/tests/test_mindsdb/test_databases.py index 6c09819..219e7f8 100644 --- a/tests/test_mindsdb/test_databases.py +++ b/tests/test_mindsdb/test_databases.py @@ -119,7 +119,6 @@ def test_create_project(self): assert str(ast).lower() == str(expected_ast).lower() assert ast.to_tree() == expected_ast.to_tree() - def test_create_database_using(self): sql = "CREATE DATABASE db using ENGINE = 'mysql', PARAMETERS = {'A': 1}" @@ -130,7 +129,6 @@ def test_create_database_using(self): assert str(ast).lower() == str(expected_ast).lower() assert ast.to_tree() == expected_ast.to_tree() - def test_alter_database(self): sql = "ALTER DATABASE db PARAMETERS = {'A': 1, 'B': 2}" ast = parse_sql(sql) @@ -139,3 +137,30 @@ def test_alter_database(self): assert str(ast) == str(expected_ast) assert ast.to_tree() == expected_ast.to_tree() + + def test_parser_render(self): + + value = "a dm\\in123\"_.,';:!@#$%^&*()\n<>`{}[]" + + ''' + quoting rules: + ' => '' (in single quoted strings) + " => "" (in double quoted strings) + ''' + for symbol in ("'", '"'): + sql = f""" + CREATE DATABASE db WITH engine = 'postgres' + PARAMETERS = {{ + 'password': {symbol}{value.replace(symbol, symbol * 2)}{symbol} + }} + """ + + # check parsing + query = parse_sql(sql) + assert query.parameters['password'] == value + + # check render + sql2 = str(query) + query2 = parse_sql(sql2) + assert query2.parameters['password'] == value +