Skip to content

Commit

Permalink
Don't forget to test decommissioned nodes
Browse files Browse the repository at this point in the history
  • Loading branch information
msgmaxim committed Sep 6, 2019
1 parent d6633a2 commit bab0ad5
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 19 deletions.
2 changes: 1 addition & 1 deletion httpserver/https_client.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ void make_https_request(boost::asio::io_context& ioc,
#else

if (sn_address == "0.0.0.0") {
LOKI_LOG(warn, "Could not initiate request to snode (we don't know "
LOKI_LOG(debug, "Could not initiate request to snode (we don't know "
"their IP yet).");

cb(sn_response_t{SNodeError::NO_REACH, nullptr});
Expand Down
26 changes: 13 additions & 13 deletions httpserver/reachability_testing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,31 +21,31 @@ constexpr std::chrono::minutes UNREACH_GRACE_PERIOD = 120min;

bool reachability_records_t::record_unreachable(const sn_pub_key_t& sn) {

auto it = offline_nodes_.find(sn);
const auto it = offline_nodes_.find(sn);

if (it == offline_nodes_.end()) {
LOKI_LOG(info, "adding a new node to UNREACHABLE: {}", sn);
/// TODO: change this to debug
LOKI_LOG(debug, "Adding a new node to UNREACHABLE: {}", sn);
offline_nodes_.insert({sn, {}});
} else {
LOKI_LOG(info, "node is ALREAY known to be UNREACHABLE: {}", sn);
LOKI_LOG(debug, "Node is ALREAY known to be UNREACHABLE: {}", sn);

it->second.last_tested = steady_clock::now();

const auto elapsed = it->second.last_tested - it->second.first_failure;
const auto elapsed_sec =
std::chrono::duration_cast<std::chrono::seconds>(elapsed).count();
LOKI_LOG(info, " - first time failed {} seconds ago", elapsed_sec);
LOKI_LOG(debug, "First time failed {} seconds ago", elapsed_sec);

/// TODO: Might still want to report as unreachable since this status
/// gets reset to `true` on Lokid restart
if (elapsed > UNREACH_GRACE_PERIOD && !it->second.reported) {
LOKI_LOG(warn, " - will REPORT this node to Lokid!");
if (it->second.reported) {
LOKI_LOG(debug, "Already reported node: {}", sn);
} else if (elapsed > UNREACH_GRACE_PERIOD) {
LOKI_LOG(debug, "Will REPORT this node to Lokid!");
return true;
} else {
if (it->second.reported) {
LOKI_LOG(warn, " - Already reported node: {}", sn);
}
}

}

return false;
Expand All @@ -58,13 +58,13 @@ bool reachability_records_t::record_reachable(const sn_pub_key_t& sn) {
bool reachability_records_t::expire(const sn_pub_key_t& sn) {

if (offline_nodes_.erase(sn)) {
LOKI_LOG(warn, " - removed entry for {}", sn);
LOKI_LOG(debug, "Removed entry for {}", sn);
}
}

void reachability_records_t::set_reported(const sn_pub_key_t& sn) {

auto it = offline_nodes_.find(sn);
const auto it = offline_nodes_.find(sn);
if (it != offline_nodes_.end()) {
it->second.reported = true;
}
Expand All @@ -82,7 +82,7 @@ boost::optional<sn_pub_key_t> reachability_records_t::next_to_test() {
return boost::none;
} else {

LOKI_LOG(warn, "~~~ Selecting to be re-tested: {}", it->first);
LOKI_LOG(debug, "Selecting to be re-tested: {}", it->first);

return it->first;
}
Expand Down
4 changes: 2 additions & 2 deletions httpserver/reachability_testing.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@ namespace detail {
/// TODO: make this class "private"?
class reach_record_t {

// The time the node failed for the first time
// (and hasn't come back online)

using time_point_t = std::chrono::time_point<std::chrono::steady_clock>;

public:
// The time the node failed for the first time
// (and hasn't come back online)
time_point_t first_failure;
time_point_t last_tested;
// whether it's been reported to Lokid
Expand Down
6 changes: 3 additions & 3 deletions httpserver/service_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1006,12 +1006,12 @@ void ServiceNode::report_node_reachability(const sn_pub_key_t& sn_pk,

auto cb = [this, sn_pk, reachable](const sn_response_t&& res) {
if (res.error_code != SNodeError::NO_ERROR) {
LOKI_LOG(error, "Could not report node status");
LOKI_LOG(warn, "Could not report node status");
return;
}

if (!res.body) {
LOKI_LOG(error, "Empty body on Lokid report node status");
LOKI_LOG(warn, "Empty body on Lokid report node status");
return;
}

Expand All @@ -1026,7 +1026,7 @@ void ServiceNode::report_node_reachability(const sn_pub_key_t& sn_pk,
if (status == "OK") {
success = true;
} else {
LOKI_LOG(error, "Could not report node. Status: {}", status);
LOKI_LOG(warn, "Could not report node. Status: {}", status);
}
} catch (...) {
LOKI_LOG(error,
Expand Down
4 changes: 4 additions & 0 deletions httpserver/swarm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,10 @@ void Swarm::update_state(const all_swarms_t& swarms,
all_funded_nodes_.push_back(sn);
}
}

for (const auto& sn : decommissioned) {
all_funded_nodes_.push_back(sn);
}
}

boost::optional<sn_record_t> Swarm::choose_funded_node() const {
Expand Down

0 comments on commit bab0ad5

Please sign in to comment.