-
Notifications
You must be signed in to change notification settings - Fork 1.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
LMDB - fix integration, restoring ability of use lmdb with nginx-modsecurity #2688
Changes from 4 commits
46f4089
1fa95ec
3b50b26
89186b7
00483e4
82326ff
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,6 +22,8 @@ | |
#include <string> | ||
#include <memory> | ||
|
||
#include <pthread.h> | ||
|
||
#include "modsecurity/variable_value.h" | ||
#include "src/utils/regex.h" | ||
#include "src/variables/variable.h" | ||
|
@@ -36,22 +38,18 @@ namespace backend { | |
#ifdef WITH_LMDB | ||
|
||
LMDB::LMDB(std::string name) : | ||
Collection(name), m_env(NULL) { | ||
MDB_txn *txn; | ||
mdb_env_create(&m_env); | ||
mdb_env_open(m_env, "./modsec-shared-collections", | ||
MDB_WRITEMAP | MDB_NOSUBDIR, 0664); | ||
mdb_txn_begin(m_env, NULL, 0, &txn); | ||
mdb_dbi_open(txn, NULL, MDB_CREATE | MDB_DUPSORT, &m_dbi); | ||
mdb_txn_commit(txn); | ||
} | ||
|
||
Collection(name), m_env(NULL), isOpen(false) {} | ||
|
||
LMDB::~LMDB() { | ||
mdb_env_close(m_env); | ||
int LMDB::txn_begin(unsigned int flags, MDB_txn **ret) { | ||
if (!isOpen) { | ||
MDBEnvProvider* provider = MDBEnvProvider::GetInstance(); | ||
m_env = provider->GetEnv(); | ||
m_dbi = *(provider->GetDBI()); | ||
isOpen = true; | ||
} | ||
return mdb_txn_begin(m_env, NULL, flags, ret); | ||
} | ||
|
||
|
||
void LMDB::string2val(const std::string& str, MDB_val *val) { | ||
val->mv_size = sizeof(char)*(str.size()); | ||
val->mv_data = const_cast<char *>(str.c_str()); | ||
|
@@ -159,7 +157,7 @@ std::unique_ptr<std::string> LMDB::resolveFirst(const std::string& var) { | |
|
||
string2val(var, &mdb_key); | ||
|
||
rc = mdb_txn_begin(m_env, NULL, MDB_RDONLY, &txn); | ||
rc = txn_begin(MDB_RDONLY, &txn); | ||
lmdb_debug(rc, "txn", "resolveFirst"); | ||
if (rc != 0) { | ||
goto end_txn; | ||
|
@@ -192,7 +190,7 @@ bool LMDB::storeOrUpdateFirst(const std::string &key, | |
string2val(key, &mdb_key); | ||
string2val(value, &mdb_value); | ||
|
||
rc = mdb_txn_begin(m_env, NULL, 0, &txn); | ||
rc = txn_begin(0, &txn); | ||
lmdb_debug(rc, "txn", "storeOrUpdateFirst"); | ||
if (rc != 0) { | ||
goto end_txn; | ||
|
@@ -240,7 +238,7 @@ void LMDB::resolveSingleMatch(const std::string& var, | |
MDB_val mdb_value_ret; | ||
MDB_cursor *cursor; | ||
|
||
rc = mdb_txn_begin(m_env, NULL, MDB_RDONLY, &txn); | ||
rc = txn_begin(MDB_RDONLY, &txn); | ||
lmdb_debug(rc, "txn", "resolveSingleMatch"); | ||
if (rc != 0) { | ||
goto end_txn; | ||
|
@@ -271,7 +269,7 @@ void LMDB::store(std::string key, std::string value) { | |
int rc; | ||
MDB_stat mst; | ||
|
||
rc = mdb_txn_begin(m_env, NULL, 0, &txn); | ||
rc = txn_begin(0, &txn); | ||
lmdb_debug(rc, "txn", "store"); | ||
if (rc != 0) { | ||
goto end_txn; | ||
|
@@ -310,7 +308,7 @@ bool LMDB::updateFirst(const std::string &key, | |
MDB_val mdb_value; | ||
MDB_val mdb_value_ret; | ||
|
||
rc = mdb_txn_begin(m_env, NULL, 0, &txn); | ||
rc = txn_begin(0, &txn); | ||
lmdb_debug(rc, "txn", "updateFirst"); | ||
if (rc != 0) { | ||
goto end_txn; | ||
|
@@ -364,7 +362,7 @@ void LMDB::del(const std::string& key) { | |
MDB_val mdb_value_ret; | ||
MDB_stat mst; | ||
|
||
rc = mdb_txn_begin(m_env, NULL, 0, &txn); | ||
rc = txn_begin(0, &txn); | ||
lmdb_debug(rc, "txn", "del"); | ||
if (rc != 0) { | ||
goto end_txn; | ||
|
@@ -411,7 +409,7 @@ void LMDB::resolveMultiMatches(const std::string& var, | |
size_t keySize = var.size(); | ||
MDB_cursor *cursor; | ||
|
||
rc = mdb_txn_begin(m_env, NULL, MDB_RDONLY, &txn); | ||
rc = txn_begin(MDB_RDONLY, &txn); | ||
lmdb_debug(rc, "txn", "resolveMultiMatches"); | ||
if (rc != 0) { | ||
goto end_txn; | ||
|
@@ -465,7 +463,7 @@ void LMDB::resolveRegularExpression(const std::string& var, | |
|
||
Utils::Regex r(var, true); | ||
|
||
rc = mdb_txn_begin(m_env, NULL, MDB_RDONLY, &txn); | ||
rc = txn_begin(MDB_RDONLY, &txn); | ||
lmdb_debug(rc, "txn", "resolveRegularExpression"); | ||
if (rc != 0) { | ||
goto end_txn; | ||
|
@@ -503,6 +501,62 @@ void LMDB::resolveRegularExpression(const std::string& var, | |
return; | ||
} | ||
|
||
|
||
MDBEnvProvider* MDBEnvProvider::provider_ = nullptr; | ||
|
||
MDBEnvProvider* MDBEnvProvider::GetInstance() { | ||
if (provider_==nullptr) { | ||
provider_ = new MDBEnvProvider(); | ||
} | ||
return provider_; | ||
} | ||
|
||
void MDBEnvProvider::Finalize() { | ||
if (provider_!=nullptr) { | ||
provider_->close(); | ||
provider_ = nullptr; | ||
} | ||
} | ||
|
||
MDBEnvProvider::MDBEnvProvider() : | ||
m_env(NULL), initialized(false) { | ||
pthread_mutex_init(&m_lock, NULL); | ||
} | ||
|
||
MDB_env* MDBEnvProvider::GetEnv() { | ||
init(); | ||
return m_env; | ||
} | ||
|
||
MDB_dbi* MDBEnvProvider::GetDBI() { | ||
init(); | ||
return &m_dbi; | ||
} | ||
|
||
void MDBEnvProvider::init() { | ||
pthread_mutex_lock(&m_lock); | ||
if (!initialized) { | ||
ziollek marked this conversation as resolved.
Show resolved
Hide resolved
|
||
MDB_txn *txn; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Now that we have guaranteed that only one MDBEnvProvider can be constructed in a given process, is it perhaps worth considering moving the contents of this initialization block to the MDBEnvProvider constructor? There's nothing unsafe about the structure right now, but making that change (I think) would allow us to omit using m_lock every time GetEnv and GetDBI are called. (Indeed we could get rid of the init() function entirely then. And, I think, get rid of the m_lock member variable entirely.) (I'm mostly interested in the compute cycles saved by omitting this locking/unlocking on each of those calls if it's not truly necessary. It's not a huge savings, but still.) Thoughts? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I completely agree. It could be significantly simplified and any locking is not necessary. |
||
initialized = true; | ||
mdb_env_create(&m_env); | ||
mdb_env_open(m_env, "./modsec-shared-collections", | ||
MDB_WRITEMAP | MDB_NOSUBDIR, 0664); | ||
mdb_txn_begin(m_env, NULL, 0, &txn); | ||
mdb_dbi_open(txn, NULL, MDB_CREATE | MDB_DUPSORT, &m_dbi); | ||
mdb_txn_commit(txn); | ||
} | ||
pthread_mutex_unlock(&m_lock); | ||
} | ||
|
||
void MDBEnvProvider::close() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This close function isn't currently called anywhere. I'm thinking we'd want an MDBEnvProvider destructor to call this? (I suppose in practice it doesn't matter that much since the object won't be destructed until the process shuts down.) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Of course, now it should be moved to destructor. I will fix that |
||
pthread_mutex_lock(&m_lock); | ||
if (initialized) { | ||
mdb_dbi_close(m_env, m_dbi); | ||
mdb_env_close(m_env); | ||
} | ||
pthread_mutex_unlock(&m_lock); | ||
} | ||
|
||
#endif | ||
|
||
} // namespace backend | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just wondering if the call to pthread_mutex_init might be better inside the if-block at line 538 in init().
As it is, it looks like it might at least be theoretically possible to have a thread-safety issue here where pthread_mutex_init could get called on an already-initialized m_lock.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Currently, m_lock mutex is responsible for controlling access to flag: initialized and preventing calling mdb_env_create multiple times. If we moved it into if-block it wouldn't fulfill its aim. It's worth noticing - that the init() function is designed to be called from multiple threads simultaneously.
To my mind, it's not possible to call multiple pthread_mutex_init on the same structure via current implementation (but of course, I'm not an expert ;) ). Even if we create a few new instances directly through the constructor: new MDBEnvProvider(), there will be an independent m_lock structure in each instance, am I right?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Right. My earlier comments weren't fully thought through.
However, I do still wonder about a sequence of events like this:
Thread 1:
Thread 2:
Thread 1:
And we have two objects instead of 1. (And now both of these separate MDBEnvProvider objects can have init() called. That would include having mdb_dbi_open called both times -- and this possibility is part of what prompted the original #2601 fix.)
It's been a while since I've done anything with Singletons, so I've had to do a bit of review.
Your mechanism (static member pointer variable, check for NULL, then create the object via new) is the Gang-of-Four mechanism, but it seems not fully thread-safe.
Since the code base is firmly in C++11, might a Meyer's Singleton be a better implementation?
(Apologies if I've mucked something up, as I said, it's been a while since I've used a this Design Pattern.)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for your detailed clarification, I agree entirely. I've made a silent assumption that GetInstance() is called only by the main thread. I've been biased by focusing on nginx-connector use case - where this assumption is fulfilled indeed.
I'll replace this implementation with one that is thread-safe, as You suggested.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@martinhsv I reimplemented the singleton pattern to a thread-safe version (Meyer's Singleton). Thanks a lot for all suggestions.