Skip to content

Commit

Permalink
Merge pull request #31 from bhagenbourger/feature/length_for_string
Browse files Browse the repository at this point in the history
Add length option for random string provider

Co-authored-by: bhagenbourger <benoit@hagenbourger.fr>
  • Loading branch information
vianneybacoup and bhagenbourger committed Mar 10, 2024
2 parents 827e4e1 + c2eee67 commit 0513971
Show file tree
Hide file tree
Showing 7 changed files with 162 additions and 27 deletions.
5 changes: 4 additions & 1 deletion docs/columns/providers/random.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,10 @@ Create a random 32 bits integer with:
```yaml
- name: string_code
provider: Random.String.alphanumeric
length: 5..15
```
Create a random string of length 10, with only Alphanumerics characters.
Create a random string, with only Alphanumerics characters.

- an optional parameter **length** to specify the length of the string. This parameter can be a range `5..15` or a constant `8`. Default is 10.

[Options](../options.md) are also possible.
5 changes: 4 additions & 1 deletion src/generate/csv/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,10 @@ mod tests {
},
Column {
name: "id".to_string(),
provider: Box::new(AlphanumericProvider {}),
provider: Box::new(AlphanumericProvider {
min_length: 10,
max_length: 11,
}),
presence: presence::new_from_yaml(
&YamlLoader::load_from_str("presence: 1").unwrap()[0],
),
Expand Down
25 changes: 20 additions & 5 deletions src/generate/parquet/batch_generator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,10 @@ mod tests {
fn given_bool_batch_generator_with_wrong_provider_should_panic() {
let column = Column {
name: "bool_column".to_string(),
provider: Box::new(AlphanumericProvider {}),
provider: Box::new(AlphanumericProvider {
min_length: 10,
max_length: 11,
}),
presence: new_from_yaml(&YamlLoader::load_from_str("name: temp").unwrap()[0]),
};
let batch_generator = BoolBatchGenerator { column };
Expand Down Expand Up @@ -300,7 +303,10 @@ mod tests {
fn given_int_batch_generator_with_wrong_provider_should_panic() {
let column = Column {
name: "int_column".to_string(),
provider: Box::new(AlphanumericProvider {}),
provider: Box::new(AlphanumericProvider {
min_length: 10,
max_length: 11,
}),
presence: new_from_yaml(&YamlLoader::load_from_str("name: temp").unwrap()[0]),
};
let batch_generator = IntBatchGenerator { column };
Expand All @@ -312,7 +318,10 @@ mod tests {
fn given_str_provider_should_return_batch_generator() {
let column = Column {
name: "str_column".to_string(),
provider: Box::new(AlphanumericProvider {}),
provider: Box::new(AlphanumericProvider {
min_length: 10,
max_length: 11,
}),
presence: new_from_yaml(&YamlLoader::load_from_str("name: test").unwrap()[0]),
};

Expand All @@ -324,7 +333,10 @@ mod tests {
fn given_str_batch_generator_should_batch_correctly() {
let column = Column {
name: "str_column".to_string(),
provider: Box::new(AlphanumericProvider {}),
provider: Box::new(AlphanumericProvider {
min_length: 10,
max_length: 11,
}),
presence: new_from_yaml(&YamlLoader::load_from_str("name: test").unwrap()[0]),
};
let batch_generator = StrBatchGenerator { column };
Expand All @@ -337,7 +349,10 @@ mod tests {
fn given_str_batch_generator_with_presence_should_batch_correctly() {
let column = Column {
name: "str_column".to_string(),
provider: Box::new(AlphanumericProvider {}),
provider: Box::new(AlphanumericProvider {
min_length: 10,
max_length: 11,
}),
presence: new_from_yaml(&YamlLoader::load_from_str("presence: 0.5").unwrap()[0]),
};
let batch_generator = StrBatchGenerator { column };
Expand Down
5 changes: 4 additions & 1 deletion src/generate/parquet/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,10 @@ mod tests {
fn given_str_provider_should_return_utf8_datatype() {
let column = Column {
name: "str_column".to_string(),
provider: Box::new(AlphanumericProvider {}),
provider: Box::new(AlphanumericProvider {
min_length: 10,
max_length: 11,
}),
presence: new_from_yaml(&YamlLoader::load_from_str("name: test").unwrap()[0]),
};
assert_eq!(get_parquet_type_from_column(column), DataType::Utf8);
Expand Down
109 changes: 100 additions & 9 deletions src/providers/random/string/alphanumeric.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,68 @@
use crate::providers::provider::{Provider, Value};
use crate::providers::utils::string::random_characters;

use log::warn;
use yaml_rust::Yaml;

use yaml_rust::Yaml::Integer;
use yaml_rust::Yaml::String;

const DEFAULT_LENGTH: u32 = 10;

#[derive(Clone)]
pub struct AlphanumericProvider;
pub struct AlphanumericProvider {
pub min_length: u32,
pub max_length: u32,
}

impl Provider for AlphanumericProvider {
fn value(&self, _: u32) -> Value {
Value::String(random_characters(10))
Value::String(random_characters(fastrand::u32(
self.min_length..self.max_length,
)))
}
fn new_from_yaml(_: &Yaml) -> AlphanumericProvider {
AlphanumericProvider
fn new_from_yaml(column: &Yaml) -> AlphanumericProvider {
let yaml_length: Vec<u32> = match &column["length"] {
Integer(i) => Some(i.to_string()),
String(s) => Some(s.to_string()),
_ => None,
}
.map(|s| {
s.split("..")
.map(|s: &str| s.trim().parse::<u32>().unwrap_or(DEFAULT_LENGTH))
.collect()
})
.unwrap_or(vec![DEFAULT_LENGTH]);

let mut param_min_length: u32 = DEFAULT_LENGTH;
let mut param_max_length: u32 = DEFAULT_LENGTH + 1;

match yaml_length.len() {
1 => {
param_min_length = yaml_length[0];
param_max_length = param_min_length + 1;
}
2 => {
if yaml_length[0] < yaml_length[1] {
param_min_length = yaml_length[0];
param_max_length = yaml_length[1];
} else {
warn!(
"min range is not less to max ranger. Default are used ([{} and {}[)",
DEFAULT_LENGTH, DEFAULT_LENGTH + 1
)
}
}
_ => warn!(
"length option is invalid, must be either an u32 or a ranger u32..u32. Default ({}) is taken.",
DEFAULT_LENGTH
)
}

AlphanumericProvider {
min_length: param_min_length,
max_length: param_max_length,
}
}
}

Expand All @@ -22,8 +73,12 @@ mod tests {

use yaml_rust::YamlLoader;

fn generate_provider() -> AlphanumericProvider {
let yaml_str = "name: id".to_string();
fn generate_provider(length: Option<&str>) -> AlphanumericProvider {
let yaml_length = match length {
Some(value) => format!("{}length: {}", "\n", value),
None => String::new(),
};
let yaml_str = format!("name: id{}", yaml_length);

let yaml = YamlLoader::load_from_str(yaml_str.as_str()).unwrap();
AlphanumericProvider::new_from_yaml(&yaml[0])
Expand All @@ -32,7 +87,10 @@ mod tests {
// Parquet type
#[test]
fn given_nothing_should_return_string_type() {
let provider: AlphanumericProvider = AlphanumericProvider;
let provider: AlphanumericProvider = AlphanumericProvider {
min_length: 10,
max_length: 11,
};
match provider.value(0) {
Value::String(_) => (),
_ => panic!(),
Expand All @@ -42,13 +100,46 @@ mod tests {
// Validate YAML file
#[test]
fn given_no_config_should_return_default() {
let _: AlphanumericProvider = generate_provider();
let provider: AlphanumericProvider = generate_provider(None);
assert_eq!(provider.min_length, 10);
assert_eq!(provider.max_length, 11);
}

#[test]
fn given_constant_config_should_return_good_length_range() {
let provider: AlphanumericProvider = generate_provider(Some("8"));
assert_eq!(provider.min_length, 8);
assert_eq!(provider.max_length, 9);
}

#[test]
fn given_bad_constant_config_should_return_default() {
let provider: AlphanumericProvider = generate_provider(Some("test"));
assert_eq!(provider.min_length, 10);
assert_eq!(provider.max_length, 11);
}

#[test]
fn given_range_config_should_return_good_length_range() {
let provider: AlphanumericProvider = generate_provider(Some("8 .. 20"));
assert_eq!(provider.min_length, 8);
assert_eq!(provider.max_length, 20);
}

#[test]
fn given_bad_range_config_should_return_default() {
let provider: AlphanumericProvider = generate_provider(Some("20..8"));
assert_eq!(provider.min_length, 10);
assert_eq!(provider.max_length, 11);
}

// Validate value calculation
#[test]
fn given_index_x_should_return_random_string_of_length_10() {
let provider = AlphanumericProvider;
let provider = AlphanumericProvider {
min_length: 10,
max_length: 11,
};

let values_to_check = [0, 4, 50];
for value in values_to_check {
Expand Down
10 changes: 9 additions & 1 deletion tests/all_options.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,17 @@ columns:
after: 2000-02-15 12:15:00
before: 2020-07-17 23:11:57

- name: code
- name: code_10
provider: Random.String.alphanumeric

- name: code_20
provider: Random.String.alphanumeric
length: 20

- name: code_between_5_and_15
provider: Random.String.alphanumeric
length: 5..15

- name: is_subscribed
provider: Random.bool

Expand Down
30 changes: 21 additions & 9 deletions tests/test.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
#[cfg(windows)]
const FAKELAKE_COMMAND_NAME: &str = "fakelake.exe";
#[cfg(not(windows))]
const FAKELAKE_COMMAND_NAME: &str = "fakelake";

#[cfg(test)]
mod tests {
use assert_cmd::prelude::*;
Expand All @@ -6,13 +11,18 @@ mod tests {

use std::path::Path;

use crate::FAKELAKE_COMMAND_NAME;

#[test]
fn given_no_args_should_fail() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin("fakelake")?;

cmd.assert().failure().stderr(predicate::str::contains(
"Usage: fakelake.exe [OPTIONS] <COMMAND>",
));
cmd.assert()
.failure()
.stderr(predicate::str::contains(format!(
"Usage: {} [OPTIONS] <COMMAND>",
FAKELAKE_COMMAND_NAME
)));

Ok(())
}
Expand All @@ -24,9 +34,10 @@ mod tests {
cmd.arg("--help")
.assert()
.success()
.stdout(predicate::str::contains(
"Usage: fakelake.exe [OPTIONS] <COMMAND>",
));
.stdout(predicate::str::contains(format!(
"Usage: {} [OPTIONS] <COMMAND>",
FAKELAKE_COMMAND_NAME
)));

Ok(())
}
Expand All @@ -38,9 +49,10 @@ mod tests {
cmd.arg("generate")
.assert()
.failure()
.stderr(predicate::str::contains(
"Usage: fakelake.exe generate <PATH_TO_CONFIG>",
));
.stderr(predicate::str::contains(format!(
"Usage: {} generate <PATH_TO_CONFIG>",
FAKELAKE_COMMAND_NAME
)));

Ok(())
}
Expand Down

0 comments on commit 0513971

Please sign in to comment.