Skip to content

Commit

Permalink
Online data generator fix. 13970320
Browse files Browse the repository at this point in the history
  • Loading branch information
m-zakeri committed Jun 10, 2018
1 parent fe47a22 commit 7d2a48d
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 26 deletions.
6 changes: 3 additions & 3 deletions config.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
iu_config = {
'single_object_update': True, # [False, True]
'single_object_update': False, # [False, True]
# The below option use only if 'single_object_update' set to False
'portion_of_rewrite_objects': 1/4., # [1/4., 1/3., 1/2.] /**/ {host1_max: 1/5., host2_min: 1/3., host3_avg: 1/4.}
'portion_of_rewrite_objects': 1/5., # [1/4., 1/3., 1/2.] /**/ {host1_max: 1/5., host2_min: 1/3., host3_avg: 1/4.}
#
'update_policy': 'bottom_up', # ['random', 'bottom_up', 'top-down']
'update_policy': 'random', # ['random', 'bottom_up', 'top-down'] /**/ {'bottom_up' for sou and 'random' for mou}
'getting_object_policy': 'random', # ['sequential', 'random']

# Old pdf file path (same hosts)
Expand Down
42 changes: 21 additions & 21 deletions incremental_update/iu_4.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@

import PyPDF2

# sys.path.insert(0, '../config.py')
from .. import config
# sys.path.insert(0, '..')
from config import iu_config
import pdf_object_preprocess as preprocess
from .. import lstm_text_generation_pdf_objs_8
from lstm_text_generation_pdf_objs_8 import FileFormatFuzzer


class IncrementalUpdate(object):
Expand All @@ -33,8 +33,8 @@ class IncrementalUpdate(object):
"""
def __init__(self,
host_id=None,
object_file_path=config.iu_config['baseline_object_path'],
stream_directory_path=config.iu_config['stream_directory_path']):
object_file_path=iu_config['baseline_object_path'],
stream_directory_path=iu_config['stream_directory_path']):
"""
:param host_id: Name of host file without postfix, e.g. host1_max, host2_min or host3_avg
Expand All @@ -52,7 +52,7 @@ def __init__(self,

# Creating new directory foreach time that program run and we want to generate new test data
dt = datetime.datetime.now().strftime(self.host_id + '_date_%Y-%m-%d_%H-%M-%S')
self.storage_dir_name = config.iu_config['new_pdfs_directory'] + self.host_id + '/' + dt + '/'
self.storage_dir_name = iu_config['new_pdfs_directory'] + self.host_id + '/' + dt + '/'
if not os.path.exists(self.storage_dir_name):
os.makedirs(self.storage_dir_name)
print('New storage directory build.')
Expand All @@ -62,14 +62,14 @@ def __init__(self,
retval = os.getcwd()
os.chdir('../')
print(os.getcwd())
self.fff = lstm_text_generation_pdf_objs_8.FileFormatFuzzer(maxlen=50, step=1, batch_size=256)
self.fff = FileFormatFuzzer(maxlen=50, step=1, batch_size=256)

self.object_buffer_list = self.fff.load_model_and_generate()
self.object_buffer_index = 0
os.chdir(retval)

def read_pdf_file(self):
with open(config.iu_config['raw_host_directory'] + self.host_id + '.pdf', 'rb') as f:
with open(iu_config['raw_host_directory'] + self.host_id + '.pdf', 'rb') as f:
data = f.read()
return data

Expand All @@ -85,7 +85,7 @@ def obj_generator(self, obj_list):
if i >= len(obj_list):
i = 0

def get_one_object(self, getting_object_policy=config.iu_config['getting_object_policy'], from_model=True):
def get_one_object(self, getting_object_policy=iu_config['getting_object_policy'], from_model=True):
"""
Provide one pdf data object whether an existing object in corpus or
an online new generated object from learnt model
Expand Down Expand Up @@ -144,7 +144,7 @@ def __get_one_object_from_model(self):
return obj

def get_last_object_id(self):
with open(config.iu_config['raw_host_directory'] + self.host_id + '.pdf', 'br') as f:
with open(iu_config['raw_host_directory'] + self.host_id + '.pdf', 'br') as f:
read_pdf = PyPDF2.PdfFileReader(f)
last_object_id = read_pdf.trailer['/Size'] - 1 # size xref - 1
return last_object_id
Expand All @@ -159,11 +159,11 @@ def incremental_update(self, sequential_number=0):
last_object_id = str(self.get_last_object_id())
rewrite_object_content = self.get_one_object() # Updated. Now include stream objects.

if config.iu_config['single_object_update']: # Just one object rewrite with new content
if config.iu_config['update_policy'] == 'random':
if iu_config['single_object_update']: # Just one object rewrite with new content
if iu_config['update_policy'] == 'random':
# Random choose between [2,:] because we don't want modify first object at any condition.
rewrite_object_id = str(random.randint(2, int(last_object_id)))
elif config.iu_config['update_policy'] == 'bottom_up':
elif iu_config['update_policy'] == 'bottom_up':
rewrite_object_id = last_object_id
else:
rewrite_object_id = last_object_id
Expand All @@ -177,18 +177,18 @@ def incremental_update(self, sequential_number=0):
self.write_pdf_file(name_description, data)
print('save new pdf file successfully')
else: # Multiple object rewrite with new content (base on 'portion_of_rewrite_objects') in config file
number_of_rewrite_objects = math.ceil(config.iu_config['portion_of_rewrite_objects'] * int(last_object_id))
number_of_rewrite_objects = math.ceil(iu_config['portion_of_rewrite_objects'] * int(last_object_id))
# print(host_id, number_of_of_rewrite_objects)
rewrite_object_id = last_object_id
rewrite_object_ids = ''
for i in range(int(number_of_rewrite_objects)):
rewrite_object_content = self.get_one_object()
if config.iu_config['update_policy'] == 'random':
if iu_config['update_policy'] == 'random':
# Random choose between [2,:] because we don't want modify first object at any condition.
rewrite_object_id = str(random.randint(2, int(last_object_id)))
elif config.iu_config['update_policy'] == 'bottom_up':
elif iu_config['update_policy'] == 'bottom_up':
rewrite_object_id = int(last_object_id) - i
elif config.iu_config['update_policy'] == 'top-down':
elif iu_config['update_policy'] == 'top-down':
# Not implement yet.
pass
rewrite_object_ids += '-' + str(rewrite_object_id).zfill(3)
Expand All @@ -198,7 +198,7 @@ def incremental_update(self, sequential_number=0):
# Set name for new pdf files like:
# host1_sou_85_6_20180307_114117
# dt = datetime.datetime.now().strftime('_%Y%m%d_%H%M%S')
name_description = '_mou_' + str(sequential_number).zfill(4) + '_objs' + str(rewrite_object_ids)
name_description = '_mou_' + str(sequential_number).zfill(4) + '_objs_25p' # + str(rewrite_object_ids)
self.write_pdf_file(name_description, data)
print('save new pdf file successfully')

Expand Down Expand Up @@ -292,7 +292,7 @@ def fuzz_binary_stream(self, binary_stream):
:param binary_stream:
:return: fuzzed_binary_stream
"""
if config.iu_config['stream_fuzzing_policy'] == 'basic_random':
if iu_config['stream_fuzzing_policy'] == 'basic_random':
for i in range(math.ceil(len(binary_stream)/100)):
# Choose one byte randomly
byte_to_reverse_index = random.randint(0, len(binary_stream)-1)
Expand Down Expand Up @@ -322,14 +322,14 @@ def fuzz_binary_stream(self, binary_stream):
binary_stream = binary_stream[0:byte_to_reverse_index]\
+ one_byte_reverse \
+ binary_stream[byte_to_reverse_index+1:]
elif config.iu_config['stream_fuzzing_policy'] == 'other':
elif iu_config['stream_fuzzing_policy'] == 'other':
# No other policy implement yet:)
pass
return binary_stream


def main(argv):
host_id = 'host2_min'
host_id = 'host1_max'
amount_of_testdata = 1000
iu = IncrementalUpdate(host_id=host_id)
for i in range(amount_of_testdata):
Expand Down
4 changes: 2 additions & 2 deletions lstm_text_generation_pdf_objs_8.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,10 +409,10 @@ def generate_and_fuzz_new_samples(self,

# diversities = [i*0.10 for i in range(1,20,2)]
diversities = [0.2, 0.5, 1.0, 1.2, 1.5, 1.8]
diversities = [1.5]
diversities = [0.5]

generated_obj_total = 1000 # [5, 10, 100, 1000]
generated_obj_with_same_prefix = 10 # [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
generated_obj_with_same_prefix = 20 # [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
generated_obj_max_allowed_len = random.randint(450, 550) # Choose max allowed len for object randomly
exclude_from_fuzzing_set = {'s', 't', 'r', 'e', 'a', 'm'} # set(['s', 't', 'r', 'e', 'a', 'm'])

Expand Down

0 comments on commit 7d2a48d

Please sign in to comment.