Skip to content

Commit

Permalink
Accept key value delimiter as part of value
Browse files Browse the repository at this point in the history
  • Loading branch information
yokofly committed Jan 25, 2024
1 parent 4846158 commit ae69d23
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 16 deletions.
22 changes: 21 additions & 1 deletion src/Functions/keyvaluepair/impl/NeedleFactory.h
Expand Up @@ -38,7 +38,7 @@ class NeedleFactory
return SearchSymbols {std::string{needles.data(), needles.size()}};
}

SearchSymbols getReadNeedles(const Configuration & extractor_configuration)
SearchSymbols getReadKeyNeedles(const Configuration & extractor_configuration)
{
const auto & [key_value_delimiter, quoting_character, pair_delimiters]
= extractor_configuration;
Expand All @@ -57,6 +57,26 @@ class NeedleFactory

return SearchSymbols {std::string{needles.data(), needles.size()}};
}

SearchSymbols getReadValueNeedles(const Configuration & extractor_configuration)
{
const auto & [key_value_delimiter, quoting_character, pair_delimiters]
= extractor_configuration;

std::vector<char> needles;

needles.push_back(quoting_character);

std::copy(pair_delimiters.begin(), pair_delimiters.end(), std::back_inserter(needles));

if constexpr (WITH_ESCAPING)
{
needles.push_back('\\');
}

return SearchSymbols {std::string{needles.data(), needles.size()}};
}

SearchSymbols getReadQuotedNeedles(const Configuration & extractor_configuration)
{
const auto quoting_character = extractor_configuration.quoting_character;
Expand Down
19 changes: 6 additions & 13 deletions src/Functions/keyvaluepair/impl/StateHandlerImpl.h
@@ -1,4 +1,3 @@

#pragma once

#include <Functions/keyvaluepair/impl/Configuration.h>
Expand Down Expand Up @@ -42,7 +41,8 @@ class StateHandlerImpl : public StateHandler
NeedleFactory<WITH_ESCAPING> needle_factory;

wait_needles = needle_factory.getWaitNeedles(configuration);
read_needles = needle_factory.getReadNeedles(configuration);
read_key_needles = needle_factory.getReadKeyNeedles(configuration);
read_value_needles = needle_factory.getReadValueNeedles(configuration);
read_quoted_needles = needle_factory.getReadQuotedNeedles(configuration);
}

Expand Down Expand Up @@ -78,7 +78,7 @@ class StateHandlerImpl : public StateHandler

size_t pos = 0;

while (const auto * p = find_first_symbols_or_null({file.begin() + pos, file.end()}, read_needles))
while (const auto * p = find_first_symbols_or_null({file.begin() + pos, file.end()}, read_key_needles))
{
auto character_position = p - file.begin();
size_t next_pos = character_position + 1u;
Expand Down Expand Up @@ -192,10 +192,6 @@ class StateHandlerImpl : public StateHandler
{
return {pos + 1u, State::READING_QUOTED_VALUE};
}
else if (isKeyValueDelimiter(current_character))
{
return {pos, State::WAITING_KEY};
}

if constexpr (WITH_ESCAPING)
{
Expand All @@ -219,7 +215,7 @@ class StateHandlerImpl : public StateHandler

size_t pos = 0;

while (const auto * p = find_first_symbols_or_null({file.begin() + pos, file.end()}, read_needles))
while (const auto * p = find_first_symbols_or_null({file.begin() + pos, file.end()}, read_value_needles))
{
const size_t character_position = p - file.begin();
size_t next_pos = character_position + 1u;
Expand All @@ -238,10 +234,6 @@ class StateHandlerImpl : public StateHandler
}
}
}
else if (isKeyValueDelimiter(*p))
{
return {next_pos, State::WAITING_KEY};
}
else if (isPairDelimiter(*p))
{
value.append(file.begin() + pos, file.begin() + character_position);
Expand Down Expand Up @@ -301,7 +293,8 @@ class StateHandlerImpl : public StateHandler

private:
SearchSymbols wait_needles;
SearchSymbols read_needles;
SearchSymbols read_key_needles;
SearchSymbols read_value_needles;
SearchSymbols read_quoted_needles;

/*
Expand Down
Expand Up @@ -333,6 +333,18 @@ WITH
SELECT
x;
{}
-- key value delimiter should be considered valid part of value
WITH
extract_key_value_pairs('formula=1+2=3 argument1=1 argument2=2 result=3, char="=" char2== string="foo=bar"', '=') AS s_map,
CAST(
array_map(
(x) -> (x, s_map[x]), array_sort(map_keys(s_map))
),
'map(string,string)'
) AS x
SELECT
x;
{'argument1':'1','argument2':'2','char':'=','char2':'=','formula':'1+2=3','result':'3','string':'foo=bar'}
-- check str_to_map alias (it is case-insensitive)
WITH
sTr_tO_mAp('name:neymar, age:31 team:psg,nationality:brazil') AS s_map,
Expand Down
Expand Up @@ -121,9 +121,9 @@ SELECT
x;

-- semi-colon as pair delimiter
-- expected output: {'age':'31','name':'neymar','team':'psg'}
-- expected output: {'age':'31','anotherkey':'anothervalue','name':'neymar','random_key':'value_with_comma,still_part_of_value:still_part_of_value','team':'psg'}
WITH
extract_key_value_pairs('name:neymar;age:31;team:psg;invalid1:invalid1,invalid2:invalid2', ':', ';') AS s_map,
extract_key_value_pairs('name:neymar;age:31;team:psg;random_key:value_with_comma,still_part_of_value:still_part_of_value;anotherkey:anothervalue', ':', ';') AS s_map,
CAST(
array_map(
(x) -> (x, s_map[x]), array_sort(map_keys(s_map))
Expand Down Expand Up @@ -469,6 +469,18 @@ WITH
SELECT
x;

-- key value delimiter should be considered valid part of value
WITH
extract_key_value_pairs('formula=1+2=3 argument1=1 argument2=2 result=3, char="=" char2== string="foo=bar"', '=') AS s_map,
CAST(
array_map(
(x) -> (x, s_map[x]), array_sort(map_keys(s_map))
),
'map(string,string)'
) AS x
SELECT
x;

-- check str_to_map alias (it is case-insensitive)
WITH
sTr_tO_mAp('name:neymar, age:31 team:psg,nationality:brazil') AS s_map,
Expand Down

0 comments on commit ae69d23

Please sign in to comment.