Skip to content

Commit

Permalink
Refactor regular expression handling in hba.c
Browse files Browse the repository at this point in the history
AuthToken gains a regular expression, and IdentLine is changed so as it
uses an AuthToken rather than tracking separately the ident user string
used for the regex compilation and its generated regex_t.  In the case
of pg_ident.conf, a set of AuthTokens is built in the pre-parsing phase
of the file, and an extra regular expression is compiled when building
the list of IdentLines, after checking the sanity of the fields in a
pre-parsed entry.

The logic in charge of computing and executing regular expressions is
now done in a new set of routines called respectively
regcomp_auth_token() and regexec_auth_token() that are wrappers around
pg_regcomp() and pg_regexec(), working on AuthTokens.  While on it, this
patch adds a routine able to free an AuthToken, free_auth_token(), to
simplify a bit the logic around the requirement of using a specific free
routine for computed regular expressions.  Note that there are no
functional or behavior changes introduced by this commit.

The goal of this patch is to ease the use of regular expressions with
more items of pg_hba.conf (user list, database list, potentially
hostnames) where AuthTokens are used extensively.  This will be tackled
later in a separate patch.

Author: Bertrand Drouvot, Michael Paquier
Discussion: https://postgr.es/m/fff0d7c1-8ad4-76a1-9db3-0ab6ec338bf7@amazon.com
  • Loading branch information
michaelpq committed Oct 19, 2022
1 parent 8bf66de commit fc579e1
Show file tree
Hide file tree
Showing 3 changed files with 119 additions and 71 deletions.
160 changes: 103 additions & 57 deletions src/backend/libpq/hba.c
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ typedef struct check_network_data
} check_network_data;


#define token_has_regexp(t) (t->regex != NULL)
#define token_is_keyword(t, k) (!t->quoted && strcmp(t->string, k) == 0)
#define token_matches(t, k) (strcmp(t->string, k) == 0)

Expand All @@ -80,9 +81,10 @@ static MemoryContext parsed_hba_context = NULL;
* pre-parsed content of ident mapping file: list of IdentLine structs.
* parsed_ident_context is the memory context where it lives.
*
* NOTE: the IdentLine structs can contain pre-compiled regular expressions
* that live outside the memory context. Before destroying or resetting the
* memory context, they need to be explicitly free'd.
* NOTE: the IdentLine structs can contain AuthTokens with pre-compiled
* regular expressions that live outside the memory context. Before
* destroying or resetting the memory context, they need to be explicitly
* free'd.
*/
static List *parsed_ident_lines = NIL;
static MemoryContext parsed_ident_context = NULL;
Expand Down Expand Up @@ -117,6 +119,9 @@ static List *tokenize_inc_file(List *tokens, const char *outer_filename,
const char *inc_filename, int elevel, char **err_msg);
static bool parse_hba_auth_opt(char *name, char *val, HbaLine *hbaline,
int elevel, char **err_msg);
static int regcomp_auth_token(AuthToken *token);
static int regexec_auth_token(const char *match, AuthToken *token,
size_t nmatch, regmatch_t pmatch[]);


/*
Expand Down Expand Up @@ -267,14 +272,26 @@ make_auth_token(const char *token, bool quoted)

toklen = strlen(token);
/* we copy string into same palloc block as the struct */
authtoken = (AuthToken *) palloc(sizeof(AuthToken) + toklen + 1);
authtoken = (AuthToken *) palloc0(sizeof(AuthToken) + toklen + 1);
authtoken->string = (char *) authtoken + sizeof(AuthToken);
authtoken->quoted = quoted;
authtoken->regex = NULL;
memcpy(authtoken->string, token, toklen + 1);

return authtoken;
}

/*
* Free an AuthToken, that may include a regular expression that needs
* to be cleaned up explicitly.
*/
static void
free_auth_token(AuthToken *token)
{
if (token_has_regexp(token))
pg_regfree(token->regex);
}

/*
* Copy a AuthToken struct into freshly palloc'd memory.
*/
Expand All @@ -286,6 +303,56 @@ copy_auth_token(AuthToken *in)
return out;
}

/*
* Compile the regular expression and store it in the AuthToken given in
* input. Returns the result of pg_regcomp().
*/
static int
regcomp_auth_token(AuthToken *token)
{
pg_wchar *wstr;
int wlen;
int rc;

Assert(token->regex == NULL);

if (token->string[0] != '/')
return 0; /* nothing to compile */

token->regex = (regex_t *) palloc0(sizeof(regex_t));
wstr = palloc((strlen(token->string + 1) + 1) * sizeof(pg_wchar));
wlen = pg_mb2wchar_with_len(token->string + 1,
wstr, strlen(token->string + 1));

rc = pg_regcomp(token->regex, wstr, wlen, REG_ADVANCED, C_COLLATION_OID);

pfree(wstr);
return rc;
}

/*
* Execute a regular expression computed in an AuthToken, checking for a match
* with the string specified in "match". The caller may optionally give an
* array to store the matches. Returns the result of pg_regexec().
*/
static int
regexec_auth_token(const char *match, AuthToken *token, size_t nmatch,
regmatch_t pmatch[])
{
pg_wchar *wmatchstr;
int wmatchlen;
int r;

Assert(token->string[0] == '/' && token->regex);

wmatchstr = palloc((strlen(match) + 1) * sizeof(pg_wchar));
wmatchlen = pg_mb2wchar_with_len(match, wmatchstr, strlen(match));

r = pg_regexec(token->regex, wmatchstr, wmatchlen, 0, NULL, nmatch, pmatch, 0);

pfree(wmatchstr);
return r;
}

/*
* Tokenize one HBA field from a line, handling file inclusion and comma lists.
Expand Down Expand Up @@ -2307,6 +2374,7 @@ parse_ident_line(TokenizedAuthLine *tok_line, int elevel)
List *tokens;
AuthToken *token;
IdentLine *parsedline;
int rc;

Assert(tok_line->fields != NIL);
field = list_head(tok_line->fields);
Expand All @@ -2326,7 +2394,9 @@ parse_ident_line(TokenizedAuthLine *tok_line, int elevel)
tokens = lfirst(field);
IDENT_MULTI_VALUE(tokens);
token = linitial(tokens);
parsedline->ident_user = pstrdup(token->string);

/* Copy the ident user token */
parsedline->token = copy_auth_token(token);

/* Get the PG rolename token */
field = lnext(tok_line->fields, field);
Expand All @@ -2336,40 +2406,27 @@ parse_ident_line(TokenizedAuthLine *tok_line, int elevel)
token = linitial(tokens);
parsedline->pg_role = pstrdup(token->string);

if (parsedline->ident_user[0] == '/')
/*
* Now that the field validation is done, compile a regex from the user
* token, if necessary.
*/
rc = regcomp_auth_token(parsedline->token);
if (rc)
{
/*
* When system username starts with a slash, treat it as a regular
* expression. Pre-compile it.
*/
int r;
pg_wchar *wstr;
int wlen;

wstr = palloc((strlen(parsedline->ident_user + 1) + 1) * sizeof(pg_wchar));
wlen = pg_mb2wchar_with_len(parsedline->ident_user + 1,
wstr, strlen(parsedline->ident_user + 1));

r = pg_regcomp(&parsedline->re, wstr, wlen, REG_ADVANCED, C_COLLATION_OID);
if (r)
{
char errstr[100];
char errstr[100];

pg_regerror(r, &parsedline->re, errstr, sizeof(errstr));
ereport(elevel,
(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
errmsg("invalid regular expression \"%s\": %s",
parsedline->ident_user + 1, errstr),
errcontext("line %d of configuration file \"%s\"",
pg_regerror(rc, parsedline->token->regex, errstr, sizeof(errstr));
ereport(elevel,
(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
errmsg("invalid regular expression \"%s\": %s",
parsedline->token->string + 1, errstr),
errcontext("line %d of configuration file \"%s\"",
line_num, IdentFileName)));

*err_msg = psprintf("invalid regular expression \"%s\": %s",
parsedline->ident_user + 1, errstr);
*err_msg = psprintf("invalid regular expression \"%s\": %s",
parsedline->token->string + 1, errstr);

pfree(wstr);
return NULL;
}
pfree(wstr);
return NULL;
}

return parsedline;
Expand All @@ -2394,44 +2451,35 @@ check_ident_usermap(IdentLine *identLine, const char *usermap_name,
return;

/* Match? */
if (identLine->ident_user[0] == '/')
if (token_has_regexp(identLine->token))
{
/*
* When system username starts with a slash, treat it as a regular
* expression. In this case, we process the system username as a
* regular expression that returns exactly one match. This is replaced
* for \1 in the database username string, if present.
* Process the system username as a regular expression that returns
* exactly one match. This is replaced for \1 in the database username
* string, if present.
*/
int r;
regmatch_t matches[2];
pg_wchar *wstr;
int wlen;
char *ofs;
char *regexp_pgrole;

wstr = palloc((strlen(ident_user) + 1) * sizeof(pg_wchar));
wlen = pg_mb2wchar_with_len(ident_user, wstr, strlen(ident_user));

r = pg_regexec(&identLine->re, wstr, wlen, 0, NULL, 2, matches, 0);
r = regexec_auth_token(ident_user, identLine->token, 2, matches);
if (r)
{
char errstr[100];

if (r != REG_NOMATCH)
{
/* REG_NOMATCH is not an error, everything else is */
pg_regerror(r, &identLine->re, errstr, sizeof(errstr));
pg_regerror(r, identLine->token->regex, errstr, sizeof(errstr));
ereport(LOG,
(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
errmsg("regular expression match for \"%s\" failed: %s",
identLine->ident_user + 1, errstr)));
identLine->token->string + 1, errstr)));
*error_p = true;
}

pfree(wstr);
return;
}
pfree(wstr);

if ((ofs = strstr(identLine->pg_role, "\\1")) != NULL)
{
Expand All @@ -2443,7 +2491,7 @@ check_ident_usermap(IdentLine *identLine, const char *usermap_name,
ereport(LOG,
(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
errmsg("regular expression \"%s\" has no subexpressions as requested by backreference in \"%s\"",
identLine->ident_user + 1, identLine->pg_role)));
identLine->token->string + 1, identLine->pg_role)));
*error_p = true;
return;
}
Expand Down Expand Up @@ -2490,13 +2538,13 @@ check_ident_usermap(IdentLine *identLine, const char *usermap_name,
if (case_insensitive)
{
if (pg_strcasecmp(identLine->pg_role, pg_role) == 0 &&
pg_strcasecmp(identLine->ident_user, ident_user) == 0)
pg_strcasecmp(identLine->token->string, ident_user) == 0)
*found_p = true;
}
else
{
if (strcmp(identLine->pg_role, pg_role) == 0 &&
strcmp(identLine->ident_user, ident_user) == 0)
strcmp(identLine->token->string, ident_user) == 0)
*found_p = true;
}
}
Expand Down Expand Up @@ -2646,8 +2694,7 @@ load_ident(void)
foreach(parsed_line_cell, new_parsed_lines)
{
newline = (IdentLine *) lfirst(parsed_line_cell);
if (newline->ident_user[0] == '/')
pg_regfree(&newline->re);
free_auth_token(newline->token);
}
MemoryContextDelete(ident_context);
return false;
Expand All @@ -2659,8 +2706,7 @@ load_ident(void)
foreach(parsed_line_cell, parsed_ident_lines)
{
newline = (IdentLine *) lfirst(parsed_line_cell);
if (newline->ident_user[0] == '/')
pg_regfree(&newline->re);
free_auth_token(newline->token);
}
}
if (parsed_ident_context != NULL)
Expand Down
2 changes: 1 addition & 1 deletion src/backend/utils/adt/hbafuncs.c
Original file line number Diff line number Diff line change
Expand Up @@ -467,7 +467,7 @@ fill_ident_line(Tuplestorestate *tuple_store, TupleDesc tupdesc,
if (ident != NULL)
{
values[index++] = CStringGetTextDatum(ident->usermap);
values[index++] = CStringGetTextDatum(ident->ident_user);
values[index++] = CStringGetTextDatum(ident->token->string);
values[index++] = CStringGetTextDatum(ident->pg_role);
}
else
Expand Down
28 changes: 15 additions & 13 deletions src/include/libpq/hba.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,20 @@ typedef enum ClientCertName
clientCertDN
} ClientCertName;

/*
* A single string token lexed from an authentication configuration file
* (pg_ident.conf or pg_hba.conf), together with whether the token has
* been quoted. If "string" begins with a slash, it may optionally
* contain a regular expression (currently used for pg_ident.conf when
* building IdentLines).
*/
typedef struct AuthToken
{
char *string;
bool quoted;
regex_t *regex;
} AuthToken;

typedef struct HbaLine
{
int linenumber;
Expand Down Expand Up @@ -127,22 +141,10 @@ typedef struct IdentLine
int linenumber;

char *usermap;
char *ident_user;
char *pg_role;
regex_t re;
AuthToken *token;
} IdentLine;

/*
* A single string token lexed from an authentication configuration file
* (pg_ident.conf or pg_hba.conf), together with whether the token has
* been quoted.
*/
typedef struct AuthToken
{
char *string;
bool quoted;
} AuthToken;

/*
* TokenizedAuthLine represents one line lexed from an authentication
* configuration file. Each item in the "fields" list is a sub-list of
Expand Down

0 comments on commit fc579e1

Please sign in to comment.