Permalink
Browse files

Reduce disk usage

Don't locally index attributes that come are not subspace axes.
  • Loading branch information...
1 parent 24033eb commit ed7bcb3bc8e9cd849d278457d5c37aa75a3d8d7a @rescrv committed Feb 27, 2013
Showing with 62 additions and 16 deletions.
  1. +23 −0 common/configuration.cc
  2. +3 −0 common/configuration.h
  3. +9 −6 daemon/datalayer.cc
  4. +26 −10 daemon/datalayer_encodings.cc
  5. +1 −0 daemon/datalayer_encodings.h
View
@@ -42,6 +42,7 @@ using hyperdex::configuration;
using hyperdex::region_id;
using hyperdex::schema;
using hyperdex::server_id;
+using hyperdex::subspace;
using hyperdex::subspace_id;
using hyperdex::virtual_server_id;
@@ -52,6 +53,7 @@ configuration :: configuration()
, m_region_ids_by_virtual()
, m_server_ids_by_virtual()
, m_schemas_by_region()
+ , m_subspaces_by_region()
, m_subspace_ids_by_region()
, m_subspace_ids_for_prev()
, m_subspace_ids_for_next()
@@ -73,6 +75,7 @@ configuration :: configuration(const configuration& other)
, m_region_ids_by_virtual(other.m_region_ids_by_virtual)
, m_server_ids_by_virtual(other.m_server_ids_by_virtual)
, m_schemas_by_region(other.m_schemas_by_region)
+ , m_subspaces_by_region(other.m_subspaces_by_region)
, m_subspace_ids_by_region(other.m_subspace_ids_by_region)
, m_subspace_ids_for_prev(other.m_subspace_ids_for_prev)
, m_subspace_ids_for_next(other.m_subspace_ids_for_next)
@@ -193,6 +196,22 @@ configuration :: get_schema(const region_id& ri) const
return NULL;
}
+const subspace*
+configuration :: get_subspace(const region_id& ri) const
+{
+ std::vector<uint64_subspace_t>::const_iterator it;
+ it = std::lower_bound(m_subspaces_by_region.begin(),
+ m_subspaces_by_region.end(),
+ uint64_subspace_t(ri.get(), NULL));
+
+ if (it != m_subspaces_by_region.end() && it->first == ri.get())
+ {
+ return it->second;
+ }
+
+ return NULL;
+}
+
virtual_server_id
configuration :: get_virtual(const region_id& ri, const server_id& si) const
{
@@ -796,6 +815,7 @@ configuration :: operator = (const configuration& rhs)
m_region_ids_by_virtual = rhs.m_region_ids_by_virtual;
m_server_ids_by_virtual = rhs.m_server_ids_by_virtual;
m_schemas_by_region = rhs.m_schemas_by_region;
+ m_subspaces_by_region = rhs.m_subspaces_by_region;
m_subspace_ids_by_region = rhs.m_subspace_ids_by_region;
m_subspace_ids_for_prev = rhs.m_subspace_ids_for_prev;
m_subspace_ids_for_next = rhs.m_subspace_ids_for_next;
@@ -816,6 +836,7 @@ configuration :: refill_cache()
m_region_ids_by_virtual.clear();
m_server_ids_by_virtual.clear();
m_schemas_by_region.clear();
+ m_subspaces_by_region.clear();
m_subspace_ids_by_region.clear();
m_subspace_ids_for_prev.clear();
m_subspace_ids_for_next.clear();
@@ -848,6 +869,7 @@ configuration :: refill_cache()
{
region& r(ss.regions[y]);
m_schemas_by_region.push_back(std::make_pair(r.id.get(), &s.sc));
+ m_subspaces_by_region.push_back(std::make_pair(r.id.get(), &ss));
m_subspace_ids_by_region.push_back(std::make_pair(r.id.get(), ss.id.get()));
if (r.replicas.empty())
@@ -893,6 +915,7 @@ configuration :: refill_cache()
std::sort(m_region_ids_by_virtual.begin(), m_region_ids_by_virtual.end());
std::sort(m_server_ids_by_virtual.begin(), m_server_ids_by_virtual.end());
std::sort(m_schemas_by_region.begin(), m_schemas_by_region.end());
+ std::sort(m_subspaces_by_region.begin(), m_subspaces_by_region.end());
std::sort(m_subspace_ids_by_region.begin(), m_subspace_ids_by_region.end());
std::sort(m_subspace_ids_for_prev.begin(), m_subspace_ids_for_prev.end());
std::sort(m_subspace_ids_for_next.begin(), m_subspace_ids_for_next.end());
View
@@ -73,6 +73,7 @@ class configuration
public:
const schema* get_schema(const char* space) const;
const schema* get_schema(const region_id& ri) const;
+ const subspace* get_subspace(const region_id& ri) const;
virtual_server_id get_virtual(const region_id& ri, const server_id& si) const;
subspace_id subspace_of(const region_id& ri) const;
subspace_id subspace_prev(const subspace_id& ss) const;
@@ -126,6 +127,7 @@ class configuration
private:
typedef std::pair<uint64_t, uint64_t> pair_uint64_t;
typedef std::pair<uint64_t, schema*> uint64_schema_t;
+ typedef std::pair<uint64_t, subspace*> uint64_subspace_t;
typedef std::pair<uint64_t, po6::net::location> uint64_location_t;
private:
@@ -135,6 +137,7 @@ class configuration
std::vector<pair_uint64_t> m_region_ids_by_virtual;
std::vector<pair_uint64_t> m_server_ids_by_virtual;
std::vector<uint64_schema_t> m_schemas_by_region;
+ std::vector<uint64_subspace_t> m_subspaces_by_region;
std::vector<pair_uint64_t> m_subspace_ids_by_region;
std::vector<pair_uint64_t> m_subspace_ids_for_prev;
std::vector<pair_uint64_t> m_subspace_ids_for_next;
View
@@ -488,7 +488,6 @@ datalayer :: del(const region_id& ri,
const e::slice& key,
const std::vector<e::slice>& old_value)
{
- const schema* sc = m_daemon->m_config.get_schema(ri);
leveldb::WriteBatch updates;
std::vector<char> backing1;
std::vector<char> backing2;
@@ -499,7 +498,9 @@ datalayer :: del(const region_id& ri,
updates.Delete(lkey);
// apply the index operations
- returncode rc = create_index_changes(sc, ri, key, &old_value, NULL, &updates);
+ const schema* sc = m_daemon->m_config.get_schema(ri);
+ const subspace* su = m_daemon->m_config.get_subspace(ri);
+ returncode rc = create_index_changes(sc, su, ri, key, &old_value, NULL, &updates);
if (rc != SUCCESS)
{
@@ -572,7 +573,6 @@ datalayer :: put(const region_id& ri,
const std::vector<e::slice>& new_value,
uint64_t version)
{
- const schema* sc = m_daemon->m_config.get_schema(ri);
leveldb::WriteBatch updates;
std::vector<char> backing1;
std::vector<char> backing2;
@@ -585,7 +585,9 @@ datalayer :: put(const region_id& ri,
updates.Put(lkey, lval);
// apply the index operations
- returncode rc = create_index_changes(sc, ri, key, NULL, &new_value, &updates);
+ const schema* sc = m_daemon->m_config.get_schema(ri);
+ const subspace* su = m_daemon->m_config.get_subspace(ri);
+ returncode rc = create_index_changes(sc, su, ri, key, NULL, &new_value, &updates);
if (rc != SUCCESS)
{
@@ -661,7 +663,6 @@ datalayer :: overput(const region_id& ri,
const std::vector<e::slice>& new_value,
uint64_t version)
{
- const schema* sc = m_daemon->m_config.get_schema(ri);
leveldb::WriteBatch updates;
std::vector<char> backing1;
std::vector<char> backing2;
@@ -674,7 +675,9 @@ datalayer :: overput(const region_id& ri,
updates.Put(lkey, lval);
// apply the index operations
- returncode rc = create_index_changes(sc, ri, key, &old_value, &new_value, &updates);
+ const schema* sc = m_daemon->m_config.get_schema(ri);
+ const subspace* su = m_daemon->m_config.get_subspace(ri);
+ returncode rc = create_index_changes(sc, su, ri, key, &old_value, &new_value, &updates);
if (rc != SUCCESS)
{
@@ -583,6 +583,7 @@ generate_index(const hyperdex::region_id& ri,
datalayer::returncode
hyperdex :: create_index_changes(const schema* sc,
+ const subspace* su,
const region_id& ri,
const e::slice& key,
const std::vector<e::slice>* old_value,
@@ -598,13 +599,16 @@ hyperdex :: create_index_changes(const schema* sc,
assert(old_value->size() + 1 == sc->attrs_sz);
assert(old_value->size() == new_value->size());
- for (size_t i = 0; i + 1 < sc->attrs_sz; ++i)
+ for (size_t j = 0; j < su->attrs.size(); ++j)
{
- if ((*old_value)[i] != (*new_value)[i])
+ size_t attr = su->attrs[j];
+ assert(attr < sc->attrs_sz);
+
+ if (attr > 0 && (*old_value)[attr - 1] != (*new_value)[attr - 1])
{
- generate_index(ri, i + 1, sc->attrs[i + 1].type, (*old_value)[i], key, &backing, &slice);
+ generate_index(ri, attr, sc->attrs[attr].type, (*old_value)[attr - 1], key, &backing, &slice);
updates->Delete(slice);
- generate_index(ri, i + 1, sc->attrs[i + 1].type, (*new_value)[i], key, &backing, &slice);
+ generate_index(ri, attr, sc->attrs[attr].type, (*new_value)[attr - 1], key, &backing, &slice);
updates->Put(slice, empty);
}
}
@@ -613,20 +617,32 @@ hyperdex :: create_index_changes(const schema* sc,
{
assert(old_value->size() + 1 == sc->attrs_sz);
- for (size_t i = 0; i + 1 < sc->attrs_sz; ++i)
+ for (size_t j = 0; j < su->attrs.size(); ++j)
{
- generate_index(ri, i + 1, sc->attrs[i + 1].type, (*old_value)[i], key, &backing, &slice);
- updates->Delete(slice);
+ size_t attr = su->attrs[j];
+ assert(attr < sc->attrs_sz);
+
+ if (attr > 0)
+ {
+ generate_index(ri, attr, sc->attrs[attr].type, (*old_value)[attr - 1], key, &backing, &slice);
+ updates->Delete(slice);
+ }
}
}
else if (new_value)
{
assert(new_value->size() + 1 == sc->attrs_sz);
- for (size_t i = 0; i + 1 < sc->attrs_sz; ++i)
+ for (size_t j = 0; j < su->attrs.size(); ++j)
{
- generate_index(ri, i + 1, sc->attrs[i + 1].type, (*new_value)[i], key, &backing, &slice);
- updates->Put(slice, empty);
+ size_t attr = su->attrs[j];
+ assert(attr < sc->attrs_sz);
+
+ if (attr > 0)
+ {
+ generate_index(ri, attr, sc->attrs[attr].type, (*new_value)[attr - 1], key, &backing, &slice);
+ updates->Put(slice, empty);
+ }
}
}
@@ -123,6 +123,7 @@ parse_object_key(const leveldb::Slice& s, e::slice* k);
datalayer::returncode
create_index_changes(const schema* sc,
+ const subspace* su,
const region_id& ri,
const e::slice& key,
const std::vector<e::slice>* old_value,

0 comments on commit ed7bcb3

Please sign in to comment.