Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Election under high latency #92

Merged
merged 15 commits into from
Dec 10, 2023
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ dbs/
*.op
*.madadata
*.pid
.DS_Store
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ tokio = "1.19.2"
futures = "0.3.1"
ws = "*"
env_logger = "*"
log = { version = "=0.4.8", features = ["std"] }
log = { version = "=0.4.20", features = ["std"] }
rustc-serialize = "*"
bincode = "*"
serde = "*"
Expand All @@ -31,6 +31,7 @@ reqwest = { version="*" , features = ["blocking"]}
signal-hook = "0.3.9"
atomic_float = "0.1.0"
lazy_static = "1.4.0"
async-std = "1.12.0"

[dev-dependencies]
tokio-test = "*"
Expand Down
33 changes: 33 additions & 0 deletions Dockerfile-dev
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
FROM rust:1.70.0-slim as builder
RUN apt-get update
RUN apt-get -y install libssl-dev pkg-config
RUN cargo new --bin nun-db
WORKDIR ./nun-db
COPY ./Cargo.toml ./Cargo.toml
RUN mkdir benches/
RUN mkdir src/lib/
RUN mkdir src/bin/
RUN touch benches/nundb_benchmark.rs
RUN touch src/lib/lib.rs
RUN mv src/main.rs src/bin/main.rs
RUN cargo build --release
RUN rm src/**/*.rs
RUN rm benches/*.rs

ADD . ./
RUN touch src/bin/main.rs
RUN touch src/lib/lib.rs

RUN cargo build --release

FROM bitnami/minideb:bullseye

RUN apt-get update
RUN apt-get -y install libssl-dev pkg-config
COPY --from=builder ./nun-db/target/release/nun-db /usr/bin/nun-db
ENV NUN_WS_ADDR "0.0.0.0:3012"
ENV NUN_HTTP_ADDR "0.0.0.0:3013"
ENV NUN_TCP_ADDR "0.0.0.0:3014"

CMD ["sh" , "-c", "nun-db -u ${NUN_USER} -p ${NUN_PWD} start --http-address ${NUN_HTTP_ADDR} --tcp-address ${NUN_TCP_ADDR} --ws-address ${NUN_WS_ADDR} "]

18 changes: 18 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,21 @@ docker-exec-toxiproxy:

run-tests:
@cargo test -- --test-threads=1 --show-output


add-primary-latency:
@docker-compose exec toxiproxy sh -c "/go/bin/toxiproxy-cli -h toxiproxy:8474 toxic add -t latency -a latency=500 primary"

remove-primary-latency:
@docker-compose exec toxiproxy sh -c "/go/bin/toxiproxy-cli -h toxiproxy:8474 toxic remove -n latency_downstream primary"

remove-all-latency:
@docker-compose exec toxiproxy sh -c '/go/bin/toxiproxy-cli -h toxiproxy:8474 toxic remove -n latency_downstream primary & /go/bin/toxiproxy-cli -h toxiproxy:8474 toxic remove -n latency_downstream secondary-1 & /go/bin/toxiproxy-cli -h toxiproxy:8474 toxic remove -n latency_downstream secondary-2'


add-full-latency:
@docker-compose exec toxiproxy sh -c "/go/bin/toxiproxy-cli -h toxiproxy:8474 toxic add -t latency -a latency=3000 primary&/go/bin/toxiproxy-cli -h toxiproxy:8474 toxic add -t latency -a latency=3000 secondary-1 & /go/bin/toxiproxy-cli -h toxiproxy:8474 toxic add -t latency -a latency=3000 secondary-2"


restart-all-replicas:
@docker-compose restart nun-db-primary nun-db-secondary-1 nun-db-secondary-2
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,9 @@ vue : none
test : arbiter
analitcs-blog : none
$admin : newer

debug force-election
# Forces run an election immediately, which is useful if some node in the cluster is not responsive or to debug latency problems between nodes. Elections in Nun-db are predictable and, unless the primary node is slow, the primary should not change even if you force an election.
```

### Arbiter
Expand Down Expand Up @@ -489,3 +492,9 @@ $connections
## Secure keys
* All keys prefixed with `$$` will be considered secure by Nun-db and will only allow database admin authentication to `set`, `get`, or `remove` them. These are useful if admins want to store information that should not be leaked to any client.
* The key `$$token` cannot be removed even with admin credentials.



## Configurations
### NUN_ELECTION_TIMEOUT
* Configurations are available to define the timeout period for elections to wait until they are acknowledged from all nodes. It is important to note that you should rarely change this variable since doing so could make elections slower. The value of this variable should be at least twice the latency value to ensure that the election process runs smoothly.
12 changes: 7 additions & 5 deletions benches/nundb_benchmark.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
use nundb::process_request::process_request;
use criterion::{criterion_group, criterion_main, Criterion};
use futures::channel::mpsc::{channel, Receiver, Sender};
use nundb::bo::*;
use nundb::process_request::process_request;
use std::collections::HashMap;
use std::sync::atomic::Ordering;
use std::sync::Arc;

fn criterion_benchmark(c: &mut Criterion) {

c.bench_function("Parse use-db", |b| {
b.iter(|| Request::parse("use-db jose jose"))
});
Expand All @@ -31,11 +30,15 @@ fn criterion_benchmark(c: &mut Criterion) {
receiver.try_next().unwrap();
process_request("use-db test test-1", &dbs, &mut client);
process_request("create-user foo bar", &dbs, &mut client);
process_request("set-permissions foo rw key-*|rwix nure-*", &dbs, &mut client);
process_request(
"set-permissions foo rw key-*|rwix nure-*",
&dbs,
&mut client,
);
client.auth.store(false, Ordering::Relaxed);
process_request("use-db test foo bar", &dbs, &mut client);

b.iter(||{
b.iter(|| {
let thread_id = std::thread::current().id();
let key = format!("key-{:?}", thread_id).to_string();
let command = format!("set {} jose", key).to_string();
Expand All @@ -44,7 +47,6 @@ fn criterion_benchmark(c: &mut Criterion) {
let response = receiver.try_next().unwrap();
println!("response: {:?}", response);
});
//todo!()
});
}
pub fn create_default_args() -> (Receiver<String>, Arc<Databases>, Client) {
Expand Down
46 changes: 28 additions & 18 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,20 @@ services:
image: "shopify/toxiproxy"
entrypoint: >
sh -c "
/go/bin/toxiproxy-cli -h toxiproxy:8474 create primary --listen 0.0.0.0:3017 --upstream nun-db-1:3017
sleep 1
/go/bin/toxiproxy-cli -h toxiproxy:8474 create primary --listen 0.0.0.0:3017 --upstream nun-db-primary:3017
/go/bin/toxiproxy-cli -h toxiproxy:8474 create secondary-1 --listen 0.0.0.0:3018 --upstream nun-db-secondary-1:3018
/go/bin/toxiproxy-cli -h toxiproxy:8474 create secondary-2 --listen 0.0.0.0:3019 --upstream nun-db-secondary-2:3019

/go/bin/toxiproxy-cli -h toxiproxy:8474 toxic add -t latency -a latency=1000 primary
/go/bin/toxiproxy-cli -h toxiproxy:8474 toxic add --upstream --type=latency --toxicName=latency_upstream --a latency=1000 --toxicity=1 primary
/go/bin/toxiproxy-cli -h toxiproxy:8474 toxic add -t latency -a latency=2000 primary
/go/bin/toxiproxy-cli -h toxiproxy:8474 toxic add --upstream --type=latency --toxicName=latency_upstream --a latency=2000 --toxicity=1 primary

/go/bin/toxiproxy-cli -h toxiproxy:8474 toxic add -t latency -a latency=10000 secondary-1
/go/bin/toxiproxy-cli -h toxiproxy:8474 toxic add --upstream --type=latency --toxicName=latency_upstream --a latency=1000 --toxicity=1 secondary-1
/go/bin/toxiproxy-cli -h toxiproxy:8474 toxic add -t latency -a latency=500 secondary-1
/go/bin/toxiproxy-cli -h toxiproxy:8474 toxic add --upstream --type=latency --toxicName=latency_upstream --a latency=2000 --toxicity=1 secondary-1

/go/bin/toxiproxy-cli -h toxiproxy:8474 toxic add -t latency -a latency=1000 secondary-2
/go/bin/toxiproxy-cli -h toxiproxy:8474 toxic add --upstream --type=latency --toxicName=latency_upstream --a latency=1000 --toxicity=1 secondary-2
/go/bin/toxiproxy-cli -h toxiproxy:8474 toxic add -t latency -a latency=500 secondary-2
/go/bin/toxiproxy-cli -h toxiproxy:8474 toxic add --upstream --type=latency --toxicName=latency_upstream --a latency=2000 --toxicity=1 secondary-2
sleep 100000
"
jaeger:
image: jaegertracing/all-in-one:1.17
Expand All @@ -46,7 +48,7 @@ services:
image: "traefik:v2.7"
container_name: "traefik"
depends_on:
- nun-db-1
- nun-db-primary
- nun-db-secondary-1
- nun-db-secondary-2
- jaeger
Expand All @@ -66,24 +68,26 @@ services:
- "8080:8080"
volumes:
- "/var/run/docker.sock:/var/run/docker.sock:ro"
nun-db-1:
nun-db-primary:
#image: mateusfreira/nun-db
build:
context: .
dockerfile: ./Dockerfile
environment:
- NUN_DBS_DIR=/nun_data
- NUN_WS_ADDR=0.0.0.0:3058
- NUN_HTTP_ADDR=0.0.0.0:9092
- NUN_TCP_ADDR=nun-db-1:3017
- NUN_TCP_ADDR=nun-db-primary:3017
- NUN_TCP_JOIN_ADDR=toxiproxy:3017
- NUN_LOG_LEVEL=debug
- NUN_USER=mateus
- NUN_REPLICASET=toxiproxy:3017,toxiproxy:3018,toxiproxy:3019
- NUN_PWD=mateus
- NUN_ELECTION_TIMEOUT=3000
command:
- "sh"
- "-c"
- "nun-db -u $$NUN_USER -p $$NUN_PWD start --http-address $$NUN_HTTP_ADDR --tcp-address $$NUN_TCP_ADDR --ws-address $$NUN_WS_ADDR --replicate-address $$NUN_REPLICASET --external-address $$NUN_TCP_JOIN_ADDR"
- "sleep 3&&nun-db -u $$NUN_USER -p $$NUN_PWD start --http-address $$NUN_HTTP_ADDR --tcp-address $$NUN_TCP_ADDR --ws-address $$NUN_WS_ADDR --replicate-address $$NUN_REPLICASET --external-address $$NUN_TCP_JOIN_ADDR"
ports:
- "3058:3058" # ws
- "9092:9092" # http
Expand All @@ -92,19 +96,22 @@ services:
- /tmp/data/nun_db/:/nun_data
labels:
- "traefik.enable=true"
- "traefik.http.routers.nun-db-1.rule=Host(`nun-db-1.localhost`)"
- "traefik.http.routers.nun-db-1.entrypoints=web"
- "traefik.http.services.nun-db-1.loadbalancer.server.port=9092"
- "traefik.http.routers.nun-db-primary.rule=Host(`nun-db-primary.localhost`)"
- "traefik.http.routers.nun-db-primary.entrypoints=web"
- "traefik.http.services.nun-db-primary.loadbalancer.server.port=9092"

- "traefik.tcp.routers.nun-db-1.rule=HostSNI(`nun-db-1.localhost`)"
- "traefik.tcp.services.nun-db-1.loadbalancer.server.port=3017"
- "traefik.tcp.routers.nun-db-primary.rule=HostSNI(`nun-db-primary.localhost`)"
- "traefik.tcp.services.nun-db-primary.loadbalancer.server.port=3017"
depends_on:
- toxiproxy
- toxiproxy-config
nun-db-secondary-1:
#image: mateusfreira/nun-db
build:
context: .
dockerfile: ./Dockerfile
depends_on:
- nun-db-1
- nun-db-primary
environment:
- NUN_LOG_LEVEL=debug
- NUN_DBS_DIR=/nun_data
Expand All @@ -115,6 +122,7 @@ services:
- NUN_USER=mateus
- NUN_PWD=mateus
- NUN_REPLICASET=toxiproxy:3017,toxiproxy:3018,toxiproxy:3019
- NUN_ELECTION_TIMEOUT=3000
command:
- "sh"
- "-c"
Expand All @@ -134,11 +142,12 @@ services:
- "traefik.tcp.services.nun-db-2.loadbalancer.server.port=3018"
nun-db-secondary-2:
#image: mateusfreira/nun-db
#image: docker.io/library/nun-db-nun-db-secondary-1
build:
context: .
dockerfile: ./Dockerfile
depends_on:
- nun-db-1
- nun-db-primary
- nun-db-secondary-1
environment:
- NUN_DBS_DIR=/nun_data
Expand All @@ -149,6 +158,7 @@ services:
- NUN_USER=mateus
- NUN_PWD=mateus
- NUN_REPLICASET=toxiproxy:3017,toxiproxy:3018,toxiproxy:3019
- NUN_ELECTION_TIMEOUT=3000
command:
- "sh"
- "-c"
Expand Down
8 changes: 5 additions & 3 deletions src/bin/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ fn main() -> Result<(), String> {
log::info!("nundb starting!");
let matches: ArgMatches<'_> = nundb::commad_line::commands::prepare_args();
if let Some(start_match) = matches.subcommand_matches("start") {
let tcp_address = start_match.value_of("tcp-address").unwrap_or(NUN_TCP_ADDR.as_str());
let tcp_address = start_match
.value_of("tcp-address")
.unwrap_or(NUN_TCP_ADDR.as_str());
return start_db(
matches.value_of("user").unwrap_or(NUN_USER.as_str()),
matches.value_of("pwd").unwrap_or(NUN_PWD.as_str()),
Expand All @@ -39,7 +41,7 @@ fn main() -> Result<(), String> {
start_match
.value_of("http-address")
.unwrap_or(NUN_HTTP_ADDR.as_str()),
tcp_address,
tcp_address,
start_match
.value_of("replicate-address")
.unwrap_or(NUN_REPLICATE_ADDR.as_str()),
Expand Down Expand Up @@ -104,7 +106,7 @@ fn start_db(
}
});

let tcp_address_to_thread = String::from(tcp_address.clone());
let tcp_address_to_thread = String::from(tcp_address);
let dbs_tcp = dbs.clone();
/*
* This thread has to be the first one to start to avoid race conditions if the primary tries
Expand Down