# Synthetic Dataset Generator

In [1]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append("../../")
from src.models.synthetic_logs import *

import pandas as pd

### Base Class

In [24]:
logs = synthetic_logs()

In [25]:
logs.generate_trace()

[(-1, '... boring ...')]

In [26]:
logs.generate_instances()

[[(-1, '... boring ...')],
 [(-1, '... boring ...')],
 [(-1, '... boring ...')],
 [(-1, '... boring ...')],
 [(-1, '... boring ...')]]

In [27]:
logs.show_instances()

  1 : [... boring ...]
  2 : [... boring ...]
  3 : [... boring ...]
  4 : [... boring ...]
  5 : [... boring ...]


In [31]:
pd.DataFrame(logs.describe()['generators'])

Unnamed: 0,class,#symbols,example
0,synthetic_logs,0,(boring)


## Noise

In [33]:
logs = noisy_path(count=6)

In [34]:
logs.generate_trace()

[(0, 'i'), (10, 'f'), (20, 'p'), (30, 't'), (40, 'i'), (50, 'v')]

In [35]:
logs.generate_instances()
logs.show_instances()

  1 : [m b b t d n]
  2 : [t d k j v u]
  3 : [o x d v e p]
  4 : [n g n f m m]
  5 : [w g y s w q]


In [36]:
pd.DataFrame(logs.describe()['generators'])

Unnamed: 0,class,#symbols,example
0,noisy_path,25,w g y s w q


## Single Paths

In [84]:
logs = single_path(7)

In [85]:
logs.generate_trace()

[(3, 'A'), (9, 'B'), (12, 'C'), (27, 'D'), (43, 'E'), (49, 'F'), (65, 'G')]

In [86]:
logs.generate_instances()
logs.show_instances()

  1 : [A B C D E F G]
  2 : [A B C D E F G]
  3 : [A B C D E F G]
  4 : [A B C D E F G]
  5 : [A B C D E F G]


In [87]:
pd.DataFrame(logs.describe()['generators'])

Unnamed: 0,class,#symbols,example,error,probability
0,single_path,7,A B C D E F G,10,1


## Noise * single Path (combine)

In [92]:
logs = noisy_path( every=10, num_symbols=50, count=30)  
logs = logs * single_path( 9, every=25, error=0, probability=1)  
logs = logs * single_path( 5, probability=0.5)  

In [93]:
logs.generate_trace()[:7]

[(0, 'ae'), (0, 'A'), (5, 'J'), (8, 'K'), (10, 'm'), (17, 'L'), (20, 'u')]

In [94]:
logs.generate_instances(10)
logs.show_instances()

  1 : [a A an e B ar ap s C ax r D av aq r E r y F ar an ab G j aw H c y ao I k aj ah ac af e n b ao]
  2 : [m A ar au B k g w C am a D d ae w E ao ao F at aw a G ae ak H aw x av I ap aq ak g al t r w o]
  3 : [ae A J j K ad B L aq M w N z C p d D x i ac E f aw F k ao ai G ai ac H d x ad I as ad ae x c am l j n]
  4 : [q A J ac K L e M B ad N aq an C an au D ak ao ad E y y F v y s G v aq H d f u I k am ap n am g d au l]
  5 : [ap A J t aj K B v am L M N b C ap f D b d u E am at F aj r h G g af H d af ac I d d t z a f ag v au]
  6 : [s A c ai B n av y C y av D y ai q E ar c F an ar au G as a H av m b I q ax ae ag ao d r ad l]
  7 : [ae A au ap B i r g C d w D d p ad E aw g F j p as G t ad H i j g I ai p w l y aq b ai aw]
  8 : [w A m q B s as an C n aw D p ar ae E ax ah F aq ar h G c v H al b an I y g ad au ap x aj l ap]
  9 : [c A z t B ak aj as C ag q D i ar aw E av b F al h am G o z H ao l w I ag ac k ag aw ad t ah g]
 10 : [i A ax y B ai an f C ap o D r af ae E e m F p as n G au e H

In [95]:
pd.DataFrame(logs.describe()['generators'])

Unnamed: 0,class,#symbols,example,error,probability
0,noisy_path,50,i ax y ai an f ap o r af,,
1,single_path,9,A B C D E F G H I,0.0,1.0
2,single_path,5,J K L M N,10.0,0.5


## Combining several single_path

In [72]:
sp1 = single_path(4)
sp2 = single_path(5)
sp3 = single_path(10, every=10, error=5, probability=0.5)

logs = sp1 *sp2 * sp3

In [73]:
#logs.generate_trace()

In [74]:
logs.generate_instances(100)
logs.show_instances()

  1 : [E J K A B F G L C M H I N O D P Q R S]
  2 : [A B E C F D G H I]
  3 : [A E F J G K L B H M C I D N O P Q R S]
  4 : [A B E F G C D H I]
  5 : [E F A G B C H I D]
  6 : [A B E F G C H D I]
  7 : [E F A G B H C D I]
  8 : [A E F G B H I C D]
  9 : [J E A B K F G C L H M D N I O P Q R S]
 10 : [A J B E F K C L G M D N H O I P Q R S]


In [75]:
pd.DataFrame(logs.describe()['generators'])

Unnamed: 0,class,#symbols,example,error,probability
0,single_path,4,A B C D,10,1.0
1,single_path,5,E F G H I,10,1.0
2,single_path,10,J K L M N O P Q R S,5,0.5


## Adding traces (concatenation)

In [51]:
logs = single_path(4) - single_path(5)

TypeError: unsupported operand type(s) for +: 'single_path' and 'single_path'