Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add regex filtering support for domains on the Query Log #1611

Merged
merged 23 commits into from Jan 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
d37c0d1
Add regex filtering support for domains on the Query Log (new config …
DL6ER Aug 1, 2023
299850e
Merge branch 'development-v6' into new/queryLogRegex
DL6ER Sep 29, 2023
cc795fb
Merge branch 'development-v6' into new/queryLogRegex
DL6ER Oct 7, 2023
24bacce
Merge branch 'development-v6' into new/queryLogRegex
DL6ER Oct 8, 2023
7487e2e
Merge branch 'development-v6' into new/queryLogRegex
DL6ER Oct 17, 2023
aa55962
Merge branch 'development-v6' into new/queryLogRegex
DL6ER Nov 4, 2023
c89a239
Backslashs need to be escaped to avoid invalid escape sequences in th…
DL6ER Nov 4, 2023
ee367a6
Merge branch 'development-v6' into new/queryLogRegex
DL6ER Jan 7, 2024
23d116c
Regex filtering is filtering: We need to do full counting to get the …
DL6ER Jan 10, 2024
cff605b
Further simplify skipping logic
DL6ER Jan 10, 2024
0eb1aaa
Extend webserver.api.excludeClients and webserver.api.excludeDomains …
DL6ER Jan 11, 2024
ded6692
Clarify which API endpoints are affected by the exclusion settings
DL6ER Jan 11, 2024
4024af9
Only compare against valid filter strings
DL6ER Jan 11, 2024
aa82858
Remove excludeClients from Client activity over time (/api/history/cl…
DL6ER Jan 13, 2024
e9e4309
Remove webserver.api.excludeRegex and instead allow regex to be used …
DL6ER Jan 13, 2024
b7f49e9
Add Pi-hole v5 -> v6 regex migration for webserver.api.exclude{Domain…
DL6ER Jan 13, 2024
e35aa78
Only free API data when the API was started
DL6ER Jan 13, 2024
55339f0
Adjust webserver.api.exclude{Clients,Domains} description
DL6ER Jan 13, 2024
5c4355f
Compile exclude regexes only once, not N^2 times
DL6ER Jan 16, 2024
862d492
Merge branch 'development-v6' into new/queryLogRegex
DL6ER Jan 16, 2024
24b6df4
Reduce code duplication by factoring out filter regex compilation
DL6ER Jan 20, 2024
bafbc78
Apply review comments
DL6ER Jan 20, 2024
619a8b1
Fix pointer magic going wrong
DL6ER Jan 20, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/api/api.h
Expand Up @@ -15,6 +15,8 @@
// type cJSON
#include "webserver/cJSON/cJSON.h"
#include "webserver/http-common.h"
// regex_t
#include "regex_r.h"

// Common definitions
#define LOCALHOSTv4 "127.0.0.1"
Expand Down Expand Up @@ -43,6 +45,7 @@ int api_history_database_clients(struct ftl_conn *api);
// Query methods
int api_queries(struct ftl_conn *api);
int api_queries_suggestions(struct ftl_conn *api);
bool compile_filter_regex(struct ftl_conn *api, const char *path, cJSON *json, regex_t **regex, unsigned int *N_regex);

// Statistics methods (database)
int api_stats_database_top_items(struct ftl_conn *api);
Expand Down
3 changes: 3 additions & 0 deletions src/api/auth.c
Expand Up @@ -56,6 +56,9 @@ void init_api(void)

void free_api(void)
{
if(auth_data == NULL)
return;

// Store sessions in database
backup_db_sessions(auth_data, max_sessions);
max_sessions = 0;
Expand Down
4 changes: 2 additions & 2 deletions src/api/docs/content/specs/config.yaml
Expand Up @@ -699,8 +699,8 @@ components:
pwhash: ''
totp_secret: ''
app_pwhash: ''
excludeClients: [ '1.2.3.4', 'localhost', 'fe80::345' ]
excludeDomains: [ 'google.de', 'pi-hole.net' ]
excludeClients: [ '1\.2\.3\.4', 'localhost', 'fe80::345' ]
excludeDomains: [ 'google\\.de', 'pi-hole\.net' ]
maxHistory: 86400
maxClients: 10
allow_destructive: true
Expand Down
29 changes: 4 additions & 25 deletions src/api/history.c
Expand Up @@ -91,31 +91,10 @@ int api_history_clients(struct ftl_conn *api)
if(skipclient == NULL || temparray == NULL)
{
unlock_shm();
return send_json_error(api, 500, "internal_error",
"Failed to allocate memory for client history", NULL);
}

// Check if the user wants to exclude any clients, this code path is
// only taken if the user has configured the web interface to exclude
// clients (it will most often be skipped)
unsigned int excludeClients = cJSON_GetArraySize(config.webserver.api.excludeClients.v.json);
if(excludeClients > 0)
{
for(int clientID = 0; clientID < counters->clients; clientID++)
{
// Get client pointer
const clientsData* client = getClient(clientID, true);
if(client == NULL)
continue;
// Check if this client should be skipped
for(unsigned int i = 0; i < excludeClients; i++)
{
cJSON *item = cJSON_GetArrayItem(config.webserver.api.excludeClients.v.json, i);
if(strcmp(getstr(client->ippos), item->valuestring) == 0 ||
strcmp(getstr(client->namepos), item->valuestring) == 0)
skipclient[clientID] = true;
}
}
return send_json_error(api, 500,
"internal_error",
"Failed to allocate memory for skipclient array",
NULL);
}

// Skip clients included in others (in alias-clients)
Expand Down
199 changes: 184 additions & 15 deletions src/api/queries.c
Expand Up @@ -19,7 +19,6 @@
#include "database/aliasclients.h"
// get_memdb()
#include "database/query-table.h"

// dbopen(false, ), dbclose()
#include "database/common.h"

Expand Down Expand Up @@ -438,6 +437,26 @@ int api_queries(struct ftl_conn *api)
}
}

// We use this boolean to memorize if we are filtering at all. It is used
// later to decide if we can short-circuit the query counting for
// performance reasons.
bool filtering = false;

// Regex filtering?
regex_t *regex_domains = NULL;
unsigned int N_regex_domains = 0;
if(compile_filter_regex(api, "webserver.api.excludeDomains",
config.webserver.api.excludeDomains.v.json,
&regex_domains, &N_regex_domains))
filtering = true;

regex_t *regex_clients = NULL;
unsigned int N_regex_clients = 0;
if(compile_filter_regex(api, "webserver.api.excludeClients",
config.webserver.api.excludeClients.v.json,
&regex_clients, &N_regex_clients))
filtering = true;

// Finish preparing query string
querystr_finish(querystr, sort_col, sort_dir);

Expand All @@ -462,10 +481,6 @@ int api_queries(struct ftl_conn *api)
sqlite3_errstr(rc));
}

// We use this boolean to memorize if we are filtering at all. It is used
// later to decide if we can short-circuit the query counting for
// performance reasons.
bool filtering = false;
// Bind items to prepared statement
if(api->request->query_string != NULL)
{
Expand Down Expand Up @@ -711,13 +726,74 @@ int api_queries(struct ftl_conn *api)
log_debug(DEBUG_API, " with cursor: %lu, start: %u, length: %d", cursor, start, length);

cJSON *queries = JSON_NEW_ARRAY();
unsigned int added = 0, recordsCounted = 0;
unsigned int added = 0, recordsCounted = 0, regex_skipped = 0;
bool skipTheRest = false;
while((rc = sqlite3_step(read_stmt)) == SQLITE_ROW)
{
// Increase number of records from the database
recordsCounted++;

// Apply possible domain regex filters to Query Log
const char *domain = (const char*)sqlite3_column_text(read_stmt, 4); // d.domain
if(N_regex_domains > 0)
{
bool match = false;
// Iterate over all regex filters
for(unsigned int i = 0; i < N_regex_domains; i++)
{
// Check if the domain matches the regex
if(regexec(&regex_domains[i], domain, 0, NULL, 0) == 0)
{
// Domain matches
match = true;
break;
}
}
if(match)
{
// Domain matches, we skip it and adjust the
// counter
recordsCounted--;
regex_skipped++;
continue;
}
}

// Apply possible client regex filters to Query Log
const char *client_ip = (const char*)sqlite3_column_text(read_stmt, 10); // c.ip
const char *client_name = NULL;
if(sqlite3_column_type(read_stmt, 11) == SQLITE_TEXT && sqlite3_column_bytes(read_stmt, 11) > 0)
client_name = (const char*)sqlite3_column_text(read_stmt, 11); // c.name
if(N_regex_clients > 0)
{
bool match = false;
// Iterate over all regex filters
for(unsigned int i = 0; i < N_regex_clients; i++)
{
// Check if the domain matches the regex
if(regexec(&regex_clients[i], client_ip, 0, NULL, 0) == 0)
{
// Client IP matches
match = true;
break;
}
else if(client_name != NULL && regexec(&regex_clients[i], client_name, 0, NULL, 0) == 0)
{
// Client name matches
match = true;
break;
}
}
if(match)
{
// Domain matches, we skip it and adjust the
// counter
recordsCounted--;
regex_skipped++;
continue;
}
}

// Skip all records once we have enough (but still count them)
if(skipTheRest)
continue;
Expand Down Expand Up @@ -753,7 +829,27 @@ int api_queries(struct ftl_conn *api)
{
// Skip everything AFTER we added the requested number
// of queries if length is > 0.
break;
continue;
}

// Check if we have reached the limit
if(added >= (unsigned int)length)
{
if(filtering)
{
// We are filtering, so we have to continue to
// step over the remaining rows to get the
// correct number of total records
skipTheRest = true;
continue;
}
else
{
// We are not filtering, so we can stop here
// The total number of records is the number
// of records in the database
break;
}
}

// Build item object
Expand All @@ -770,7 +866,7 @@ int api_queries(struct ftl_conn *api)
JSON_COPY_STR_TO_OBJECT(item, "type", get_query_type_str(query.type, &query, buffer));
JSON_REF_STR_IN_OBJECT(item, "status", get_query_status_str(query.status));
JSON_REF_STR_IN_OBJECT(item, "dnssec", get_query_dnssec_str(query.dnssec));
JSON_COPY_STR_TO_OBJECT(item, "domain", sqlite3_column_text(read_stmt, 4)); // d.domain
JSON_COPY_STR_TO_OBJECT(item, "domain", domain);

if(sqlite3_column_type(read_stmt, 5) == SQLITE_TEXT &&
sqlite3_column_bytes(read_stmt, 5) > 0)
Expand All @@ -784,11 +880,9 @@ int api_queries(struct ftl_conn *api)
JSON_ADD_ITEM_TO_OBJECT(item, "reply", reply);

cJSON *client = JSON_NEW_OBJECT();
JSON_COPY_STR_TO_OBJECT(client, "ip", sqlite3_column_text(read_stmt, 10)); // c.ip

if(sqlite3_column_type(read_stmt, 11) == SQLITE_TEXT &&
sqlite3_column_bytes(read_stmt, 11) > 0)
JSON_COPY_STR_TO_OBJECT(client, "name", sqlite3_column_text(read_stmt, 11)); // c.name
JSON_COPY_STR_TO_OBJECT(client, "ip", client_ip);
if(client_name != NULL)
JSON_COPY_STR_TO_OBJECT(client, "name", client_name);
else
JSON_ADD_NULL_TO_OBJECT(client, "name");
JSON_ADD_ITEM_TO_OBJECT(item, "client", client);
Expand Down Expand Up @@ -836,8 +930,8 @@ int api_queries(struct ftl_conn *api)

added++;
}
log_debug(DEBUG_API, "Sending %u of %lu in memory and %lu on disk queries (counted %u)",
added, mem_dbnum, disk_dbnum, recordsCounted);
log_debug(DEBUG_API, "Sending %u of %lu in memory and %lu on disk queries (counted %u, skipped %u)",
added, mem_dbnum, disk_dbnum, recordsCounted, regex_skipped);
cJSON *json = JSON_NEW_OBJECT();
JSON_ADD_ITEM_TO_OBJECT(json, "queries", queries);

Expand Down Expand Up @@ -866,5 +960,80 @@ int api_queries(struct ftl_conn *api)
// Finalize statements
sqlite3_finalize(read_stmt);

// Free regex memory if allocated
if(N_regex_domains > 0)
{
// Free individual regexes
for(unsigned int i = 0; i < N_regex_domains; i++)
regfree(&regex_domains[i]);

// Free array of regex pointers
free(regex_domains);
}
if(N_regex_clients > 0)
{
// Free individual regexes
for(unsigned int i = 0; i < N_regex_clients; i++)
regfree(&regex_clients[i]);

// Free array of regex po^inters
free(regex_clients);
}

JSON_SEND_OBJECT(json);
}

bool compile_filter_regex(struct ftl_conn *api, const char *path, cJSON *json, regex_t **regex, unsigned int *N_regex)
{

const int N = cJSON_GetArraySize(json);
if(N < 1)
return false;

// Set number of regexes (positive = unsigned integer)
*N_regex = N;

// Allocate memory for regex array
*regex = calloc(N, sizeof(regex_t));
if(*regex == NULL)
{
return send_json_error(api, 500,
"internal_error",
"Internal server error, failed to allocate memory for regex array",
NULL);
}

// Compile regexes
unsigned int i = 0;
cJSON *filter = NULL;
cJSON_ArrayForEach(filter, json)
{
// Skip non-string, invalid and empty values
if(!cJSON_IsString(filter) || filter->valuestring == NULL || strlen(filter->valuestring) == 0)
{
log_warn("Skipping invalid regex at %s.%u", path, i);
continue;
}

// Compile regex
int rc = regcomp(&(*regex)[i], filter->valuestring, REG_EXTENDED);
if(rc != 0)
{
// Failed to compile regex
char errbuf[1024] = { 0 };
regerror(rc, &(*regex)[i], errbuf, sizeof(errbuf));
log_err("Failed to compile regex \"%s\": %s",
filter->valuestring, errbuf);
return send_json_error(api, 400,
"bad_request",
"Failed to compile regex",
filter->valuestring);
}

i++;
}

// We are filtering, so we have to continue to step over the
// remaining rows to get the correct number of total records
return true;
}