# Synthetic Dataset Generator

In [1]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append("../../")
from src.models.synthetic_logs import *

import pandas as pd

### Base Class

In [2]:
logs = synthetic_logs()

In [3]:
logs.generate_trace()

[(-1, '... boring ...')]

In [4]:
logs.generate_instances()

[[(-1, '... boring ...')],
 [(-1, '... boring ...')],
 [(-1, '... boring ...')],
 [(-1, '... boring ...')],
 [(-1, '... boring ...')]]

In [5]:
logs.show_instances()

  1 : [... boring ...]
  2 : [... boring ...]
  3 : [... boring ...]
  4 : [... boring ...]
  5 : [... boring ...]


In [6]:
pd.DataFrame(logs.describe()['generators'])

Unnamed: 0,class,#symbols,example
0,synthetic_logs,0,(boring)


## Noise

In [7]:
logs = noisy_path(count=6)

In [8]:
logs.generate_trace()

[(0, 'j'), (10, 's'), (20, 'c'), (30, 'b'), (40, 'm'), (50, 'r')]

In [9]:
logs.generate_instances()
logs.show_instances()

  1 : [o v h s a i]
  2 : [v m o f p c]
  3 : [k o f n h w]
  4 : [a t n d x q]
  5 : [q d s n a j]


In [10]:
pd.DataFrame(logs.describe()['generators'])

Unnamed: 0,class,#symbols,example
0,noisy_path,25,q d s n a j


## Single Paths

In [11]:
logs = single_path(7)

In [12]:
logs.generate_trace()

[(5, 'A'), (10, 'B'), (17, 'C'), (29, 'D'), (32, 'E'), (38, 'F'), (45, 'G')]

In [13]:
logs.generate_instances()
logs.show_instances()

  1 : [A B C D E F G]
  2 : [A B C D E F G]
  3 : [A B C D E F G]
  4 : [A B C D E F G]
  5 : [A B C D E F G]


In [14]:
pd.DataFrame(logs.describe()['generators'])

Unnamed: 0,class,#symbols,example,error,probability
0,single_path,7,A B C D E F G,10,1


## Noise * single Path (combine)

In [15]:
logs = noisy_path( every=10, num_symbols=50, count=30)  
logs = logs * single_path( 9, every=25, error=0, probability=1)  
logs = logs * single_path( 5, probability=0.5)  

In [16]:
logs.generate_trace()[:7]

[(0, 'l'), (0, 'A'), (10, 'ap'), (20, 'ar'), (25, 'B'), (30, 'ad'), (40, 'ak')]

In [17]:
logs.generate_instances(10)
logs.show_instances()

  1 : [u A f g B k ao j C aw ah D ar aq an E e al F a k au G ai z H w ad aj I q ar ao aw w ac f ai c]
  2 : [ax A c J ak K B L aa ai M r C N ac ab D w p am E e am F b m ae G ac x H au z aj I v an g k n f ax aj ai]
  3 : [aa A ah au B aq s ae C t q D ag ak aa E ad a F ai am ax G s y H ae i f I av au af t ad z u ai b]
  4 : [ar A J aj ah B K c L u M q C N r au D ad ap ab E d at F aj s z G aw aw H c c ar I ab am at t ag ax x z q]
  5 : [ab A J q ak K B L y M z N g C k af D t al at E m v F an m an G ar a H s j ah I n ad ar au ap f o av e]
  6 : [w A af ax B r r aw C ag ax D aq ab av E ag b F x an t G p m H aq av ad I n g l am ax al at as am]
  7 : [r A J ao K r B ag L M aa N at C b g D h aj y E av ai F aq b v G ae ao H aa h as I aj ab s af j ae ar as at]
  8 : [b A e aw B k au h C aj al D aj v n E ao ai F n r an G ag ai H y au an I w ad ac e e an a an s]
  9 : [s A J K u t B L f M N af l C n i D f as v E ao b F f b ad G aj f H ao g ao I al z ak q aa as v af g]
 10 : [b A J l K n B q L at a

In [18]:
pd.DataFrame(logs.describe()['generators'])

Unnamed: 0,class,#symbols,example,error,probability
0,noisy_path,50,b l n q at ae e au au aq,,
1,single_path,9,A B C D E F G H I,0.0,1.0
2,single_path,5,J K L M N,10.0,0.5


## Combining several single_path

In [19]:
sp1 = single_path(4)
sp2 = single_path(5)
sp3 = single_path(10, every=10, error=5, probability=0.5)

logs = sp1 *sp2 * sp3

In [20]:
#logs.generate_trace()

In [21]:
logs.generate_instances(100)
logs.show_instances()

  1 : [A J E F K B L C M G D N H O I P Q R S]
  2 : [E A F B G C H D I]
  3 : [E A B F G C D H I]
  4 : [E F J G A B K H L C M N I D O P Q R S]
  5 : [A B E F G C H I D]
  6 : [J A E K B F L M C G D N H O I P Q R S]
  7 : [A B E F C G D H I]
  8 : [J A E K F L B C G D M H I N O P Q R S]
  9 : [E J A K F B L M C G H I D N O P Q R S]
 10 : [J A E B C D F K L G M N O P H I Q R S]


In [22]:
pd.DataFrame(logs.describe()['generators'])

Unnamed: 0,class,#symbols,example,error,probability
0,single_path,4,A B C D,10,1.0
1,single_path,5,E F G H I,10,1.0
2,single_path,10,J K L M N O P Q R S,5,0.5


## Adding traces (concatenation)

In [23]:
logs = single_path(4) - single_path(5)

TypeError: unsupported operand type(s) for -: 'single_path' and 'single_path'