Skip to content

Commit

Permalink
Use Gaussian to throw number of files/folders created per step
Browse files Browse the repository at this point in the history
  • Loading branch information
klieret committed Apr 19, 2019
1 parent ac202a4 commit bc011a9
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 25 deletions.
3 changes: 3 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@ If the executable is not in your path after installation, you can also use
randomfiletree.create_random_tree("/path/to/basedir", prob_file=2.0, prob_folder=0.5, maxdepth=5, repeat=10)
Randomfiletree will now crawl through all directories in ``/path/to/basedir`` and
create new files with a probability

Take a look at the documentation_ to find out more about the additional functionality provided.

.. _documentation: https://randomfiletree.readthedocs.io/
Expand Down
34 changes: 25 additions & 9 deletions randomfiletree/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,32 @@ def parser():
)
parser.add_argument(
"-d",
"--directory-probability",
"--directories",
default=1,
dest="prob_folder",
help="Probability to create a folder",
dest="nfolders",
help="Average number of folders to create",
type=float
)
parser.add_argument(
"-f",
"--file-probability",
"--files",
default=1,
dest="prob_file",
help="Probability to create a file",
dest="nfiles",
help="Average number of files to create",
type=float
)
parser.add_argument(
"--files-sigma",
default=1,
dest="files_sigma",
help="Spread of number of files created in each step",
type=float
)
parser.add_argument(
"--directories-sigma",
default=1,
dest="folders_sigma",
help="Spread of number of folders created in each step",
type=float
)
parser.add_argument(
Expand All @@ -49,10 +63,12 @@ def cli(args=None):
args = parser().parse_args()
create_random_tree(
basedir=args.basedir,
prob_file=args.prob_file,
prob_folder=args.prob_folder,
nfiles=args.nfiles,
nfolders=args.nfolders,
repeat=args.repeat,
maxdepth=args.maxdepth
maxdepth=args.maxdepth,
sigma_files=args.files_sigma,
sigma_folders=args.folders_sigma
)


Expand Down
23 changes: 13 additions & 10 deletions randomfiletree/core.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#!/usr/bin/env python3

import sys
import os
import random
import string
Expand All @@ -23,38 +24,40 @@ def random_string(min_length=5, max_length=10):
)


def create_random_tree(basedir, prob_file=2, prob_folder=1, repeat=1,
maxdepth=None):
def create_random_tree(basedir, nfiles=2, nfolders=1, repeat=1,
maxdepth=None, sigma_folders=1, sigma_files=1):
"""
Create a random set of files and folders by repeatedly walking through the
current tree and creating random files or subfolders with a certain kind
of probability
current tree and creating random files or subfolders (the number of files
and folders created is chosen from a Gaussian distribution).
Args:
basedir: Directory to create files and folders in
prob_file: Probability to create a file in a directory
prob_folder: Probability to create a
nfiles: Average number of files to create
nfolders: Average number of folders to create
repeat: Walk this often through the directory tree to create new
subdirectories and files
maxdepth: Maximum depth to descend into current file tree. If None,
infinity.
sigma_folders: Spread of number of folders
sigma_files: Spread of number of files
Returns:
(List of dirs, List of files), all as pathlib.Path objects.
"""
alldirs = []
allfiles = []
for i in range(repeat):
for root, dirs, files in os.walk(str(basedir)):
print(root)
if random.random() < prob_folder:
for _ in range(int(random.gauss(nfolders, sigma_folders))):
p = Path(root) / random_string()
p.mkdir(exist_ok=True)
alldirs.append(p)
if random.random() < prob_file:
for _ in range(int(random.gauss(nfiles, sigma_files))):
p = Path(root) / random_string()
p.touch(exist_ok=True)
allfiles.append(p)
if maxdepth and os.path.relpath(root, str(basedir)).count(os.sep) >= maxdepth:
depth = os.path.relpath(root, str(basedir)).count(os.sep)
if maxdepth and depth >= maxdepth - 1:
del dirs[:]
alldirs = list(set(alldirs))
allfiles = list(set(allfiles))
Expand Down
16 changes: 10 additions & 6 deletions randomfiletree/test/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,38 +44,38 @@ def get_content(self):

def test_create_random_tree_empty(self):
self.reset()
create_random_tree(self.basedir.name, 0, 0, 10, None)
create_random_tree(self.basedir.name, -10, -10, 3, None)
dirs, files = self.get_content()
self.assertEqual(len(dirs) + len(files), 0)

def test_create_random_files(self):
self.reset()
create_random_tree(self.basedir.name, 1, 0, 10, None)
create_random_tree(self.basedir.name, 5, -10, 3, None)
dirs, files = self.get_content()
self.assertEqual(len(dirs), 0)
self.assertGreater(len(files), 1)

def test_create_random_dirs(self):
self.reset()
create_random_tree(self.basedir.name, 0, 0.5, 10, None)
create_random_tree(self.basedir.name, -10, 2, 3, None)
dirs, files = self.get_content()
self.assertEqual(len(files), 0)
self.assertGreater(len(dirs), 1)

def test_create_both(self):
self.reset()
create_random_tree(self.basedir.name, 10, 0.5, 10, None)
create_random_tree(self.basedir.name, 3, 0.5, 3, None)
dirs, files = self.get_content()
self.assertGreater(len(files), 1)
self.assertGreater(len(dirs), 1)

def test_limit_depth(self):
self.reset()
create_random_tree(self.basedir.name, 10, 10, 5, 5)
create_random_tree(self.basedir.name, 3, 2, 5, maxdepth=3)
dirs, files = self.get_content()
max_depth = max(map(lambda x: x.count(os.sep), dirs)) - \
self.basedir.name.count(os.sep)
self.assertLessEqual(max_depth, 5)
self.assertLessEqual(max_depth, 4)


class TestChooseSample(unittest.TestCase):
Expand Down Expand Up @@ -129,3 +129,7 @@ def test_choose_ignore(self):
dirs, files = choose_random_elements(self.basedir.name, 2, 2, "ignore")
self.assertEqual(len(dirs), 0)
self.assertEqual(len(files), 0)


if __name__ == "__main__":
unittest.main()

0 comments on commit bc011a9

Please sign in to comment.