Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LMDB - fix integration, restoring ability of use lmdb with nginx-modsecurity #2688

Merged
merged 6 commits into from
Apr 29, 2022
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
96 changes: 75 additions & 21 deletions src/collection/backend/lmdb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
#include <string>
#include <memory>

#include <pthread.h>

#include "modsecurity/variable_value.h"
#include "src/utils/regex.h"
#include "src/variables/variable.h"
Expand All @@ -36,22 +38,18 @@ namespace backend {
#ifdef WITH_LMDB

LMDB::LMDB(std::string name) :
Collection(name), m_env(NULL) {
MDB_txn *txn;
mdb_env_create(&m_env);
mdb_env_open(m_env, "./modsec-shared-collections",
MDB_WRITEMAP | MDB_NOSUBDIR, 0664);
mdb_txn_begin(m_env, NULL, 0, &txn);
mdb_dbi_open(txn, NULL, MDB_CREATE | MDB_DUPSORT, &m_dbi);
mdb_txn_commit(txn);
}

Collection(name), m_env(NULL), isOpen(false) {}

LMDB::~LMDB() {
mdb_env_close(m_env);
int LMDB::txn_begin(unsigned int flags, MDB_txn **ret) {
if (!isOpen) {
MDBEnvProvider* provider = MDBEnvProvider::GetInstance();
m_env = provider->GetEnv();
m_dbi = *(provider->GetDBI());
isOpen = true;
}
return mdb_txn_begin(m_env, NULL, flags, ret);
}


void LMDB::string2val(const std::string& str, MDB_val *val) {
val->mv_size = sizeof(char)*(str.size());
val->mv_data = const_cast<char *>(str.c_str());
Expand Down Expand Up @@ -159,7 +157,7 @@ std::unique_ptr<std::string> LMDB::resolveFirst(const std::string& var) {

string2val(var, &mdb_key);

rc = mdb_txn_begin(m_env, NULL, MDB_RDONLY, &txn);
rc = txn_begin(MDB_RDONLY, &txn);
lmdb_debug(rc, "txn", "resolveFirst");
if (rc != 0) {
goto end_txn;
Expand Down Expand Up @@ -192,7 +190,7 @@ bool LMDB::storeOrUpdateFirst(const std::string &key,
string2val(key, &mdb_key);
string2val(value, &mdb_value);

rc = mdb_txn_begin(m_env, NULL, 0, &txn);
rc = txn_begin(0, &txn);
lmdb_debug(rc, "txn", "storeOrUpdateFirst");
if (rc != 0) {
goto end_txn;
Expand Down Expand Up @@ -240,7 +238,7 @@ void LMDB::resolveSingleMatch(const std::string& var,
MDB_val mdb_value_ret;
MDB_cursor *cursor;

rc = mdb_txn_begin(m_env, NULL, MDB_RDONLY, &txn);
rc = txn_begin(MDB_RDONLY, &txn);
lmdb_debug(rc, "txn", "resolveSingleMatch");
if (rc != 0) {
goto end_txn;
Expand Down Expand Up @@ -271,7 +269,7 @@ void LMDB::store(std::string key, std::string value) {
int rc;
MDB_stat mst;

rc = mdb_txn_begin(m_env, NULL, 0, &txn);
rc = txn_begin(0, &txn);
lmdb_debug(rc, "txn", "store");
if (rc != 0) {
goto end_txn;
Expand Down Expand Up @@ -310,7 +308,7 @@ bool LMDB::updateFirst(const std::string &key,
MDB_val mdb_value;
MDB_val mdb_value_ret;

rc = mdb_txn_begin(m_env, NULL, 0, &txn);
rc = txn_begin(0, &txn);
lmdb_debug(rc, "txn", "updateFirst");
if (rc != 0) {
goto end_txn;
Expand Down Expand Up @@ -364,7 +362,7 @@ void LMDB::del(const std::string& key) {
MDB_val mdb_value_ret;
MDB_stat mst;

rc = mdb_txn_begin(m_env, NULL, 0, &txn);
rc = txn_begin(0, &txn);
lmdb_debug(rc, "txn", "del");
if (rc != 0) {
goto end_txn;
Expand Down Expand Up @@ -411,7 +409,7 @@ void LMDB::resolveMultiMatches(const std::string& var,
size_t keySize = var.size();
MDB_cursor *cursor;

rc = mdb_txn_begin(m_env, NULL, MDB_RDONLY, &txn);
rc = txn_begin(MDB_RDONLY, &txn);
lmdb_debug(rc, "txn", "resolveMultiMatches");
if (rc != 0) {
goto end_txn;
Expand Down Expand Up @@ -465,7 +463,7 @@ void LMDB::resolveRegularExpression(const std::string& var,

Utils::Regex r(var, true);

rc = mdb_txn_begin(m_env, NULL, MDB_RDONLY, &txn);
rc = txn_begin(MDB_RDONLY, &txn);
lmdb_debug(rc, "txn", "resolveRegularExpression");
if (rc != 0) {
goto end_txn;
Expand Down Expand Up @@ -503,6 +501,62 @@ void LMDB::resolveRegularExpression(const std::string& var,
return;
}


MDBEnvProvider* MDBEnvProvider::provider_ = nullptr;

MDBEnvProvider* MDBEnvProvider::GetInstance() {
if (provider_==nullptr) {
provider_ = new MDBEnvProvider();
}
return provider_;
}

void MDBEnvProvider::Finalize() {
if (provider_!=nullptr) {
provider_->close();
provider_ = nullptr;
}
}

MDBEnvProvider::MDBEnvProvider() :
m_env(NULL), initialized(false) {
pthread_mutex_init(&m_lock, NULL);
Copy link
Contributor

@martinhsv martinhsv Apr 19, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just wondering if the call to pthread_mutex_init might be better inside the if-block at line 538 in init().

As it is, it looks like it might at least be theoretically possible to have a thread-safety issue here where pthread_mutex_init could get called on an already-initialized m_lock.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently, m_lock mutex is responsible for controlling access to flag: initialized and preventing calling mdb_env_create multiple times. If we moved it into if-block it wouldn't fulfill its aim. It's worth noticing - that the init() function is designed to be called from multiple threads simultaneously.

To my mind, it's not possible to call multiple pthread_mutex_init on the same structure via current implementation (but of course, I'm not an expert ;) ). Even if we create a few new instances directly through the constructor: new MDBEnvProvider(), there will be an independent m_lock structure in each instance, am I right?

Copy link
Contributor

@martinhsv martinhsv Apr 20, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right. My earlier comments weren't fully thought through.

However, I do still wonder about a sequence of events like this:
Thread 1:

  • calls GetInstance and runs to line 508
  • line 508 is executed, the boolean expression is evaluated to true
  • execution advances to the beginning of 509.

Thread 2:

  • calls GetInstance and runs to line 508
  • line 508 is executed, the boolean expression is again evaluated to true
  • execution advances all the way to the end of GetInstance(); an MDBEnvProvider has been constructed -- let's call it MDBEnvProvider object A
  • MDBEnvProvider object A is returned to the caller of GetInstance

Thread 1:

  • completes executing GetInstance; a different new MDBEnvProvider is constructed ('MDBEnvProvider object B')
  • MDBEnvProvider object B is returned to the caller of GetInstance

And we have two objects instead of 1. (And now both of these separate MDBEnvProvider objects can have init() called. That would include having mdb_dbi_open called both times -- and this possibility is part of what prompted the original #2601 fix.)

It's been a while since I've done anything with Singletons, so I've had to do a bit of review.

Your mechanism (static member pointer variable, check for NULL, then create the object via new) is the Gang-of-Four mechanism, but it seems not fully thread-safe.

Since the code base is firmly in C++11, might a Meyer's Singleton be a better implementation?

(Apologies if I've mucked something up, as I said, it's been a while since I've used a this Design Pattern.)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for your detailed clarification, I agree entirely. I've made a silent assumption that GetInstance() is called only by the main thread. I've been biased by focusing on nginx-connector use case - where this assumption is fulfilled indeed.

I'll replace this implementation with one that is thread-safe, as You suggested.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@martinhsv I reimplemented the singleton pattern to a thread-safe version (Meyer's Singleton). Thanks a lot for all suggestions.

}

MDB_env* MDBEnvProvider::GetEnv() {
init();
return m_env;
}

MDB_dbi* MDBEnvProvider::GetDBI() {
init();
return &m_dbi;
}

void MDBEnvProvider::init() {
pthread_mutex_lock(&m_lock);
if (!initialized) {
ziollek marked this conversation as resolved.
Show resolved Hide resolved
MDB_txn *txn;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now that we have guaranteed that only one MDBEnvProvider can be constructed in a given process, is it perhaps worth considering moving the contents of this initialization block to the MDBEnvProvider constructor?

There's nothing unsafe about the structure right now, but making that change (I think) would allow us to omit using m_lock every time GetEnv and GetDBI are called. (Indeed we could get rid of the init() function entirely then. And, I think, get rid of the m_lock member variable entirely.)

(I'm mostly interested in the compute cycles saved by omitting this locking/unlocking on each of those calls if it's not truly necessary. It's not a huge savings, but still.)

Thoughts?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I completely agree. It could be significantly simplified and any locking is not necessary.

initialized = true;
mdb_env_create(&m_env);
mdb_env_open(m_env, "./modsec-shared-collections",
MDB_WRITEMAP | MDB_NOSUBDIR, 0664);
mdb_txn_begin(m_env, NULL, 0, &txn);
mdb_dbi_open(txn, NULL, MDB_CREATE | MDB_DUPSORT, &m_dbi);
mdb_txn_commit(txn);
}
pthread_mutex_unlock(&m_lock);
}

void MDBEnvProvider::close() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This close function isn't currently called anywhere.

I'm thinking we'd want an MDBEnvProvider destructor to call this?

(I suppose in practice it doesn't matter that much since the object won't be destructed until the process shuts down.)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Of course, now it should be moved to destructor. I will fix that

pthread_mutex_lock(&m_lock);
if (initialized) {
mdb_dbi_close(m_env, m_dbi);
mdb_env_close(m_env);
}
pthread_mutex_unlock(&m_lock);
}

#endif

} // namespace backend
Expand Down
50 changes: 49 additions & 1 deletion src/collection/backend/lmdb.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,57 @@ namespace modsecurity {
namespace collection {
namespace backend {


/**
* The MDBEnvProvider class defines the `GetInstance` method that serves as an
* alternative to constructor and lets clients access the same instance of this
* class over and over. Its used to provide single MDB_env instance for each collection
* that uses lmdb to store and retrieve data. That approach satisfies lmdb requirement:
*
* "LMDB uses POSIX locks on files, and these locks have issues if one process opens
* a file multiple times. Because of this, do not mdb_env_open() a file multiple
* times from a single process."
*
* Creation of MDB_env is delayed to moment when first transaction is opened.
* This approach prevents passing env object to forked processes.
* In that way next lmdb requirement be satisfied:
*
* "Use an MDB_env* in the process which opened it, without fork()ing."
*/
class MDBEnvProvider {
protected:
static MDBEnvProvider* provider_;
MDBEnvProvider();
public:
MDBEnvProvider(MDBEnvProvider &other) = delete;
void operator=(const MDBEnvProvider &) = delete;

/**
* This is the static method that controls the access to the singleton
* instance. On the first run, it creates a singleton object and places it
* into the static field. On subsequent runs, it returns the client existing
* object stored in the static field.
*/
static MDBEnvProvider* GetInstance();
static void Finalize();

MDB_env* GetEnv();
MDB_dbi* GetDBI();

private:
MDB_env *m_env;
MDB_dbi m_dbi;
pthread_mutex_t m_lock;

bool initialized;
void init();
void close();
};

class LMDB :
public Collection {
public:
explicit LMDB(std::string name);
~LMDB();
void store(std::string key, std::string value) override;

bool storeOrUpdateFirst(const std::string &key,
Expand All @@ -75,11 +121,13 @@ class LMDB :
variables::KeyExclusions &ke) override;

private:
int txn_begin(unsigned int flags, MDB_txn **ret);
void string2val(const std::string& str, MDB_val *val);
void inline lmdb_debug(int rc, std::string op, std::string scope);

MDB_env *m_env;
MDB_dbi m_dbi;
bool isOpen;
};

} // namespace backend
Expand Down
3 changes: 3 additions & 0 deletions src/modsecurity.cc
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,9 @@ ModSecurity::~ModSecurity() {
delete m_ip_collection;
delete m_session_collection;
delete m_user_collection;
#ifdef WITH_LMDB
collection::backend::MDBEnvProvider::Finalize();
#endif
}


Expand Down
4 changes: 2 additions & 2 deletions test/cppcheck_suppressions.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,14 @@ functionStatic:src/engine/lua.h:71
functionConst:src/utils/geo_lookup.h:49
useInitializationList:src/operators/rbl.h:69
constStatement:test/common/modsecurity_test.cc:82
danglingTemporaryLifetime:src/modsecurity.cc:206
danglingTemporaryLifetime:src/modsecurity.cc:209
functionStatic:src/operators/geo_lookup.h:35
duplicateBreak:src/operators/validate_utf8_encoding.cc
duplicateBranch:src/request_body_processor/multipart.cc:91
syntaxError:src/transaction.cc:62
noConstructor:src/variables/variable.h:152
duplicateBranch:src/request_body_processor/multipart.cc:93
danglingTempReference:src/modsecurity.cc:206
danglingTempReference:src/modsecurity.cc:209
knownConditionTrueFalse:src/operators/validate_url_encoding.cc:77
knownConditionTrueFalse:src/operators/verify_svnr.cc:87
rethrowNoCurrentException:headers/modsecurity/transaction.h:307
Expand Down