Skip to content

Commit

Permalink
get rid of big data files (#15)
Browse files Browse the repository at this point in the history
replace large test files with random data generator
  • Loading branch information
kolomenkin authored and siddhantgoel committed May 20, 2018
1 parent 1d5637e commit 0fed805
Show file tree
Hide file tree
Showing 7 changed files with 67 additions and 45 deletions.
10 changes: 0 additions & 10 deletions tests/data/file.txt

This file was deleted.

Binary file removed tests/data/image-2560x1600.png
Binary file not shown.
Binary file removed tests/data/image-500k.png
Binary file not shown.
Binary file removed tests/data/image-600x400.png
Binary file not shown.
Binary file removed tests/data/image-high-res.jpg
Binary file not shown.
75 changes: 44 additions & 31 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os.path
from io import BytesIO
from numpy import random
from unittest import TestCase

from requests_toolbelt import MultipartEncoder
Expand All @@ -7,19 +8,31 @@
from streaming_form_data.targets import ValueTarget


DATA_DIR = 'tests/data'
def get_random_bytes(size, seed):
random.seed(seed)
return random.bytes(size)


def data_file_path(filename):
return os.path.join(DATA_DIR, filename)
def open_dataset(filename):
if filename == 'file.txt':
filedata = b'this is a txt file\r\n' * 10
elif filename == 'image-600x400.png':
filedata = get_random_bytes(1780, 600)
elif filename == 'image-2560x1600.png':
filedata = get_random_bytes(11742, 2560)
elif filename == 'image-500k.png':
filedata = get_random_bytes(437814, 500)
elif filename == 'image-high-res.jpg':
filedata = get_random_bytes(9450866, 945)
else:
raise Exception('Unknown file name: ' + filename)
return BytesIO(filedata)


def load_file(path):
_, filename = os.path.split(path)

with open(path, 'rb') as file_:
def encoded_dataset(filename):
with open_dataset(filename) as dataset_:
fields = {
filename: (filename, file_, 'text/plain')
filename: (filename, dataset_, 'text/plain')
}

encoder = MultipartEncoder(fields=fields)
Expand Down Expand Up @@ -204,10 +217,10 @@ def test_file_content_single(self):
filenames = ('file.txt', 'image-600x400.png', 'image-2560x1600.png')

for filename in filenames:
with open(data_file_path(filename), 'rb') as file_:
expected_value = file_.read()
with open_dataset(filename) as dataset_:
expected_value = dataset_.read()

content_type, body = load_file(data_file_path(filename))
content_type, body = encoded_dataset(filename)

value = ValueTarget()

Expand All @@ -220,10 +233,10 @@ def test_file_content_single(self):
self.assertEqual(value.value, expected_value)

def test_file_content_multiple(self):
with open(data_file_path('file.txt'), 'rb') as file_:
expected_value = file_.read()
with open_dataset('file.txt') as dataset_:
expected_value = dataset_.read()

content_type, body = load_file(data_file_path('file.txt'))
content_type, body = encoded_dataset('file.txt')

txt = ValueTarget()

Expand All @@ -244,10 +257,10 @@ def test_file_content_multiple(self):
self.assertEqual(txt.value, expected_value)

def test_file_content_varying_chunk_size(self):
with open(data_file_path('file.txt'), 'rb') as file_:
expected_value = file_.read()
with open_dataset('file.txt') as dataset_:
expected_value = dataset_.read()

content_type, body = load_file(data_file_path('file.txt'))
content_type, body = encoded_dataset('file.txt')

for index in range(len(body)):
txt = ValueTarget()
Expand All @@ -262,14 +275,14 @@ def test_file_content_varying_chunk_size(self):
self.assertEqual(txt.value, expected_value)

def test_mixed_content_varying_chunk_size(self):
with open(data_file_path('file.txt'), 'rb') as file_:
expected_value = file_.read()
with open_dataset('file.txt') as dataset_:
expected_value = dataset_.read()

with open(data_file_path('file.txt'), 'rb') as file_:
with open_dataset('file.txt') as dataset_:
fields = {
'name': 'hello world',
'age': '10',
'cv.txt': ('file.txt', file_, 'text/plain')
'cv.txt': ('file.txt', dataset_, 'text/plain')
}

encoder = MultipartEncoder(fields=fields)
Expand Down Expand Up @@ -338,17 +351,17 @@ def test_multiple_files(self):
txt_filename = 'file.txt'
png_filename = 'image-600x400.png'

with open(data_file_path(txt_filename), 'rb') as file_:
expected_txt = file_.read()
with open_dataset(txt_filename) as dataset_:
expected_txt = dataset_.read()

with open(data_file_path(png_filename), 'rb') as file_:
expected_png = file_.read()
with open_dataset(png_filename) as dataset_:
expected_png = dataset_.read()

txt_target = ValueTarget()
png_target = ValueTarget()

with open(data_file_path(txt_filename), 'rb') as txt_file, \
open(data_file_path(png_filename), 'rb') as png_file:
with open_dataset(txt_filename) as txt_file, \
open_dataset(png_filename) as png_file:
encoder = MultipartEncoder(fields={
txt_filename: (txt_filename, txt_file,
'application/plain'),
Expand All @@ -369,10 +382,10 @@ def test_multiple_files(self):
def test_large_file(self):
for filename in ['image-500k.png', 'image-2560x1600.png',
'image-600x400.png', 'image-high-res.jpg']:
with open(data_file_path(filename), 'rb') as file_:
expected_value = file_.read()
with open_dataset(filename) as dataset_:
expected_value = dataset_.read()

content_type, body = load_file(data_file_path(filename))
content_type, body = encoded_dataset(filename)

value = ValueTarget()

Expand Down
27 changes: 23 additions & 4 deletions utils/benchmark.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from argparse import ArgumentParser
from functools import wraps
from io import StringIO
import cProfile
from functools import wraps
from io import StringIO, BytesIO
from numpy import random
import pstats

from requests_toolbelt import MultipartEncoder
Expand Down Expand Up @@ -64,16 +65,34 @@ def parse_args():
parser = ArgumentParser()
parser.add_argument('-c', '--content-type', type=str, required=True,
help='Content Type of the input file')
parser.add_argument('-f', '--filename', type=str, required=True,
parser.add_argument('-f', '--filename', type=str, required=False,
help='File to be uploaded')
parser.add_argument('--data-size', metavar='SIZE',
type=int, required=False,
help='Size of generated data' +
' to be used instead of real file')
return parser.parse_args()


def get_random_bytes(size, seed):
random.seed(seed)
return random.bytes(size)


def open_data(args):
if args.filename is not None:
return open(args.filename, 'rb')
if args.data_size is not None:
return BytesIO(get_random_bytes(args.data_size, 42))
raise Exception('Not enough arguments passed: ' +
'please specify --filename or --data_size argument')


@c_profile()
def main():
args = parse_args()

with open(args.filename, 'rb') as fd:
with open_data(args) as fd:
encoder = MultipartEncoder(fields={
'file': ('file', fd, args.content_type)
})
Expand Down

0 comments on commit 0fed805

Please sign in to comment.