From e869d3b294d191ce0c15e15935692cde32f47ddb Mon Sep 17 00:00:00 2001 From: GeorgeKelly Date: Thu, 9 Jul 2020 16:51:43 +0100 Subject: [PATCH 1/2] added set seed capability --- data_generator/data_generator.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/data_generator/data_generator.py b/data_generator/data_generator.py index 01371e6..f3d327f 100644 --- a/data_generator/data_generator.py +++ b/data_generator/data_generator.py @@ -1,6 +1,7 @@ import csv import json from typing import Any, Optional, AnyStr, IO, Union, Tuple +import random from random import choice, choices, uniform, randint from faker import Faker @@ -18,6 +19,18 @@ def __init__(self, meta: dict, **kwargs): self.default_min = kwargs.get("default_min", -1000) self.default_max = kwargs.get("default_max", 1000) self.null_probability = kwargs.get("null_probability", 0.1) + self._seed = None + + @property + def seed(self): + return self._seed + + @seed.setter + def seed(self, value): + self._seed = value + self.fake.seed_instance(value) + random.seed(value) + def null_column_value(self, col: dict) -> bool: """ From 0ab5947999355019ca6cb19ef93be1da02837a75 Mon Sep 17 00:00:00 2001 From: GeorgeKelly Date: Mon, 13 Jul 2020 10:34:34 +0100 Subject: [PATCH 2/2] added test for seed --- tests/data/output/seed_test.csv | 11 ++++++++++ tests/test_meta_faker.py | 36 +++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 tests/data/output/seed_test.csv diff --git a/tests/data/output/seed_test.csv b/tests/data/output/seed_test.csv new file mode 100644 index 0000000..87c2454 --- /dev/null +++ b/tests/data/output/seed_test.csv @@ -0,0 +1,11 @@ +my_int,my_character_enum,my_email,my_datetime +,b,ruth65@jackson-long.info,2013-06-13 05:11:07 +16,b,alexcarpenter@yahoo.com,1995-04-30 10:23:29 +16,c,jeremyfranklin@sosa.info,2017-10-15 20:25:05 +17,a,heather83@smith-moss.net,1991-12-27 06:57:23 +18,c,mvalentine@hotmail.com,1980-03-28 07:31:18 +,a,uwalker@hotmail.com,1984-04-21 18:36:57 +13,c,wstevenson@hotmail.com,1992-11-08 17:18:12 +11,c,zwise@yahoo.com,1972-10-21 20:26:53 +10,a,harringtonapril@yahoo.com,1973-05-18 07:35:46 +11,b,jacksonkyle@nguyen.com,1991-03-13 15:48:11 diff --git a/tests/test_meta_faker.py b/tests/test_meta_faker.py index 1857f70..977142c 100644 --- a/tests/test_meta_faker.py +++ b/tests/test_meta_faker.py @@ -4,6 +4,8 @@ from io import StringIO from datetime import datetime +import csv +import tempfile def test_readme(): @@ -160,3 +162,37 @@ def test_dates(col, exp_fmt): row = mf.generate_row() assert isinstance(row["test"], str) datetime.strptime(row["test"], exp_fmt) + + +def test_seed(): + meta = { + "columns": [ + { + "name": "my_int", + "type": "int", + "minimum": 10, + "maximum": 20, + "nullable": True, + }, + {"name": "my_character_enum", "type": "character", "enum": ["a", "b", "c"]}, + {"name": "my_email", "type": "character",}, + {"name": "my_datetime", "type": "datetime",}, + ] + } + + sc = {"my_email": "email"} + + mf = MetaFaker(meta=meta, special_cols=sc) + + mf.seed = 888 + + with tempfile.TemporaryDirectory() as d: + mf.write_data_to_csv(f"{d}/test.csv", total_rows=10) + + with open(f"{d}/test.csv", "r") as t1, open("tests/data/output/seed_test.csv", "r") as t2: + fileone = t1.readlines() + filetwo = t2.readlines() + assert fileone == filetwo, "Problem with seed" + + +