-
Notifications
You must be signed in to change notification settings - Fork 1.6k
/
ob_expr_regexp_context.h
143 lines (123 loc) · 5 KB
/
ob_expr_regexp_context.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#ifndef OCEANBASE_SQL_ENGINE_REGEX_OB_POSIX_REGEX_
#define OCEANBASE_SQL_ENGINE_REGEX_OB_POSIX_REGEX_
#include "easy_define.h" // for conflict of macro likely
#include "lib/utility/ob_print_utils.h"
#include "lib/charset/ob_mysql_global.h"
#include "lib/charset/ob_ctype.h"
#include <sys/types.h>
#include <assert.h>
#include "lib/charset/ob_charset.h"
#include <icu/i18n/unicode/uregex.h>
#include "sql/engine/expr/ob_expr_operator.h"
// this regex is compatible with mysql 8.0
namespace oceanbase
{
namespace sql
{
class ObExprRegexContext : public ObExprOperatorCtx
{
public:
ObExprRegexContext();
virtual ~ObExprRegexContext();
public:
static const char *icu_version_string() { return U_ICU_VERSION; }
inline bool is_inited() const { return inited_; }
void destroy();
void reset();
// The previous regex compile result can be used if pattern not change, if %reusable is true.
// %string_buf must be the same with previous init too if %reusable is true.
int init(ObExprStringBuf &string_buf,
ObSQLSessionInfo *session_info,
const ObString &origin_pattern,
const uint32_t cflags,
const bool reusable,
const ObCollationType cs_type);
int match(ObExprStringBuf &string_buf,
const ObString &text,
const int64_t start,
bool &result) const;
int find(ObExprStringBuf &string_buf,
const ObString &text,
const int64_t start,
const int64_t occurrence,
const int64_t return_option,
const int64_t subexpr,
int64_t &result) const;
int count(ObExprStringBuf &string_buf,
const ObString &text,
const int32_t start,
int64_t &result) const;
int substr(ObExprStringBuf &string_buf,
const ObString &text,
const int64_t start,
const int64_t occurrence,
const int64_t subexpr,
ObString &result) const;
int replace(ObExprStringBuf &string_buf,
const ObString &text_string,
const ObString &replace_string,
const int64_t start,
const int64_t occurrence,
ObString &result) const;
int append_head(ObExprStringBuf &string_buf,
const int32_t current_pos,
UChar *&replace_buff,
int32_t &buff_size,
int32_t &buff_pos) const;
int append_replace_str(ObExprStringBuf &string_buf,
const UChar *u_replace,
const int32_t u_replace_length,
UChar *&replace_buff,
int32_t &buff_size,
int32_t &buff_pos) const;
int append_tail(ObExprStringBuf &string_buf,
UChar *&replace_buff,
int32_t &buff_size,
int32_t &buff_pos) const;
static int get_regexp_flags(const ObString &match_param,
const bool is_case_sensitive,
uint32_t &flags);
static int check_need_utf8(ObRawExpr *expr, bool &is_nstring);
static inline bool is_binary_string(const ObExprResType &type) {
return CS_TYPE_BINARY == type.get_collation_type() && ob_is_string_tc(type.get_type());
}
static inline bool is_binary_compatible(const ObExprResType &type) {
return CS_TYPE_BINARY == type.get_collation_type() || !ob_is_string_or_lob_type(type.get_type());
}
TO_STRING_KV(K_(inited));
static int check_binary_compatible(const ObExprResType *types, int64_t num);
private:
int preprocess_pattern(common::ObExprStringBuf &string_buf,
const common::ObString &origin_pattern,
common::ObString &pattern);
int check_icu_regexp_status(UErrorCode u_error_code, const UParseError *parse_error = NULL) const;
int get_valid_unicode_string(ObExprStringBuf &string_buf,
const ObString &origin_str,
UChar *&u_str,
int32_t &u_str_len) const;
int get_valid_replace_string(ObIAllocator &alloc,
const ObString &origin_replace,
UChar *&u_replace,
int32_t &u_replace_len) const;
private:
bool inited_;
ObInplaceAllocator pattern_allocator_;
common::ObString pattern_;
int cflags_;
ObInplaceAllocator pattern_wc_allocator_;
URegularExpression *regexp_engine_;
};
}
}
#endif //OCEANBASE_SQL_ENGINE_REGEX_OB_POSIX_REGEX_