Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Poison the database by default if snapshotting fails #160

Merged
merged 2 commits into from
May 30, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,14 @@

All changes in this project will be noted in this file.

<details>
<summary><b>Unreleased</b></summary>

* Snapshotting failure will now poison the database (customizable through CLI options or the configuration file)
[see [#160](https://github.com/skytable/skytable/pull/160)]

</details>

## Version 0.6.0 [2021-05-27]

> Breaking changes!
Expand Down
1 change: 1 addition & 0 deletions examples/config-files/template.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ every = 120
[snapshot]
every = 3600 # Make a snapshot after every 1 hour (60min * 60sec= 3600secs)
atmost = 4 # Keep the 4 most recent snapshots
failsafe = true # stops accepting writes if snapshotting fails

# This key is *OPTIONAL*, used for TLS/SSL config
[ssl]
Expand Down
177 changes: 91 additions & 86 deletions server/src/cli.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,90 +3,95 @@ version: 0.6.0
author: Sayan N. <ohsayan@outlook.com>
about: The Skytable Database server
args:
- config:
short: c
required: false
long: withconfig
value_name: cfgfile
help: Sets a configuration file to start skyd
takes_value: true
- restore:
short: r
required: false
long: restore
value_name: snapshotfile
help: Restores data from a previous snapshot
takes_value: true
- host:
short: h
required: false
long: host
value_name: host
help: Sets the host to which the server will bind
takes_value: true
- port:
short: p
required: false
long: port
value_name: port
help: Sets the port to which the server will bind
takes_value: true
- noart:
required: false
long: noart
help: Disables terminal artwork
takes_value: false
- nosave:
required: false
long: nosave
help: Disables automated background saving
takes_value: false
- saveduration:
required: false
long: saveduration
value_name: duration
short: S
takes_value: true
help: Set the BGSAVE duration
- snapevery:
required: false
long: snapevery
value_name: duration
help: Set the periodic snapshot duration
takes_value: true
- snapkeep:
required: false
long: snapkeep
value_name: count
help: Sets the number of most recent snapshots to keep
takes_value: true
- sslkey:
required: false
long: sslkey
short: k
value_name: key
help: Sets the PEM key file to use for SSL/TLS
takes_value: true
- sslchain:
required: false
long: sslchain
short: z
value_name: chain
help: Sets the PEM chain file to use for SSL/TLS
takes_value: true
- sslonly:
required: false
long: sslonly
takes_value: false
help: Tells the server to only accept SSL connections and disables the non-SSL port
- config:
short: c
required: false
long: withconfig
value_name: cfgfile
help: Sets a configuration file to start skyd
takes_value: true
- restore:
short: r
required: false
long: restore
value_name: snapshotfile
help: Restores data from a previous snapshot
takes_value: true
- host:
short: h
required: false
long: host
value_name: host
help: Sets the host to which the server will bind
takes_value: true
- port:
short: p
required: false
long: port
value_name: port
help: Sets the port to which the server will bind
takes_value: true
- noart:
required: false
long: noart
help: Disables terminal artwork
takes_value: false
- nosave:
required: false
long: nosave
help: Disables automated background saving
takes_value: false
- saveduration:
required: false
long: saveduration
value_name: duration
short: S
takes_value: true
help: Set the BGSAVE duration
- snapevery:
required: false
long: snapevery
value_name: duration
help: Set the periodic snapshot duration
takes_value: true
- snapkeep:
required: false
long: snapkeep
value_name: count
help: Sets the number of most recent snapshots to keep
takes_value: true
- sslkey:
required: false
long: sslkey
short: k
value_name: key
help: Sets the PEM key file to use for SSL/TLS
takes_value: true
- sslchain:
required: false
long: sslchain
short: z
value_name: chain
help: Sets the PEM chain file to use for SSL/TLS
takes_value: true
- sslonly:
required: false
long: sslonly
takes_value: false
help: Tells the server to only accept SSL connections and disables the non-SSL port
- stopwriteonfail:
required: false
long: stop-write-on-fail
takes_value: true
help: Stop accepting writes if any persistence method except BGSAVE fails (defaults to true)
subcommands:
- upgrade:
about: Upgrades old datsets to the latest format supported by this server edition
args:
- format:
short: f
long: from
takes_value: true
value_name: format
required: true
help: The format of the old files which need to be upgraded
- upgrade:
about: Upgrades old datsets to the latest format supported by this server edition
args:
- format:
short: f
long: from
takes_value: true
value_name: format
required: true
help: The format of the old files which need to be upgraded
42 changes: 33 additions & 9 deletions server/src/config/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,8 @@ pub struct ConfigKeySnapshot {
///
/// If atmost is set to `0`, then all the snapshots will be kept
atmost: usize,
/// Prevent writes to the database if snapshotting fails
failsafe: Option<bool>,
}

/// Port configuration
Expand Down Expand Up @@ -214,16 +216,22 @@ pub struct SnapshotPref {
pub every: u64,
/// The maximum numeber of snapshots to be kept
pub atmost: usize,
/// Lock writes if snapshotting fails
pub poison: bool,
}

impl SnapshotPref {
/// Create a new a new `SnapshotPref` instance
pub const fn new(every: u64, atmost: usize) -> Self {
SnapshotPref { every, atmost }
pub const fn new(every: u64, atmost: usize, poison: bool) -> Self {
SnapshotPref {
every,
atmost,
poison,
}
}
/// Returns `every,almost` as a tuple for pattern matching
pub const fn decompose(self) -> (u64, usize) {
(self.every, self.atmost)
pub const fn decompose(self) -> (u64, usize, bool) {
(self.every, self.atmost, self.poison)
}
}

Expand Down Expand Up @@ -295,7 +303,11 @@ impl ParsedConfig {
snapshot: cfg_info
.snapshot
.map(|snapshot| {
SnapshotConfig::Enabled(SnapshotPref::new(snapshot.every, snapshot.atmost))
SnapshotConfig::Enabled(SnapshotPref::new(
snapshot.every,
snapshot.atmost,
snapshot.failsafe.unwrap_or(true),
))
})
.unwrap_or(SnapshotConfig::default()),
ports: if let Some(sslopts) = cfg_info.ssl {
Expand All @@ -306,7 +318,6 @@ impl ParsedConfig {
chain: sslopts.chain,
port: sslopts.port,
},

host: cfg_info.server.host,
}
} else {
Expand Down Expand Up @@ -526,8 +537,21 @@ pub fn get_config_file_or_return_cfg() -> Result<ConfigType<ParsedConfig, String
},
None => None,
};
let failsafe = if let Ok(failsafe) = matches
.value_of("stop-write-on-fail")
.map(|val| val.parse::<bool>())
.unwrap_or(Ok(true))
{
failsafe
} else {
return Err(ConfigError::CliArgErr(
"Please provide a boolean `true` or `false` value to --stop-write-on-fail",
));
};
let snapcfg = match (snapevery, snapkeep) {
(Some(every), Some(keep)) => SnapshotConfig::Enabled(SnapshotPref::new(every, keep)),
(Some(every), Some(keep)) => {
SnapshotConfig::Enabled(SnapshotPref::new(every, keep, failsafe))
}
(Some(_), None) => {
return Err(ConfigError::CliArgErr(
"No value supplied for `--snapkeep`. When you supply `--snapevery`, you also need to specify `--snapkeep`"
Expand Down Expand Up @@ -702,7 +726,7 @@ mod tests {
ParsedConfig::new(
false,
BGSave::default(),
SnapshotConfig::Enabled(SnapshotPref::new(3600, 4)),
SnapshotConfig::Enabled(SnapshotPref::new(3600, 4, true)),
PortConfig::new_secure_only(
DEFAULT_IPV4,
SslOpts::new(
Expand Down Expand Up @@ -782,7 +806,7 @@ mod tests {
assert_eq!(
cfg,
ParsedConfig {
snapshot: SnapshotConfig::Enabled(SnapshotPref::new(3600, 4)),
snapshot: SnapshotConfig::Enabled(SnapshotPref::new(3600, 4, true)),
bgsave: BGSave::default(),
noart: false,
ports: PortConfig::default()
Expand Down
5 changes: 4 additions & 1 deletion server/src/coredb/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,10 @@ impl CoreDB {
pub fn new(snapshot_cfg: &SnapshotConfig, restore_file: Option<String>) -> TResult<Self> {
let coremap = diskstore::get_saved(restore_file)?;
let mut snap_count = None;
if let SnapshotConfig::Enabled(SnapshotPref { every: _, atmost }) = snapshot_cfg {
if let SnapshotConfig::Enabled(SnapshotPref {
every: _, atmost, ..
}) = snapshot_cfg
{
snap_count = Some(atmost);
}
let snapcfg = snap_count
Expand Down
14 changes: 11 additions & 3 deletions server/src/services/snapshot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ use tokio::time::{self, Duration};
/// This service calls `SnapEngine::mksnap()` periodically to create snapshots. Whenever
/// the interval for snapshotting expires or elapses, we create a snapshot. The snapshot service
/// keeps creating snapshots, as long as the database keeps running. Once [`dbnet::run`] broadcasts
/// a termination signal, we're ready to quit
/// a termination signal, we're ready to quit. This function will, by default, poison the database
/// if snapshotting fails, unless customized by the user.
pub async fn snapshot_service(
handle: CoreDB,
ss_config: SnapshotConfig,
Expand All @@ -47,7 +48,7 @@ pub async fn snapshot_service(
return;
}
SnapshotConfig::Enabled(configuration) => {
let (duration, atmost) = configuration.decompose();
let (duration, atmost, failsafe) = configuration.decompose();
let duration = Duration::from_secs(duration);
let mut sengine = match SnapshotEngine::new(atmost, &handle, None) {
Ok(ss) => ss,
Expand All @@ -59,7 +60,14 @@ pub async fn snapshot_service(
loop {
tokio::select! {
_ = time::sleep_until(time::Instant::now() + duration) => {
let _ = sengine.mksnap().await;
if sengine.mksnap().await {
// it passed, so unpoison the handle
drop(handle.unpoison());
} else if failsafe {
// mksnap returned false and we are set to stop writes if snapshotting failed
// so let's poison the handle
drop(handle.poison());
}
},
_ = termination_signal.receive_signal() => {
// time to terminate; goodbye!
Expand Down