# Synthetic Dataset Generator

In [1]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append("../../")
from src.models.synthetic_logs import *

import pandas as pd

### Base Class

In [2]:
logs = synthetic_logs()

In [3]:
logs.generate_trace()

[(-1, '... boring ...')]

In [4]:
logs.generate_instances()

[[(-1, '... boring ...')],
 [(-1, '... boring ...')],
 [(-1, '... boring ...')],
 [(-1, '... boring ...')],
 [(-1, '... boring ...')]]

In [5]:
logs.show_instances()

  1 : [... boring ...]
  2 : [... boring ...]
  3 : [... boring ...]
  4 : [... boring ...]
  5 : [... boring ...]


In [6]:
pd.DataFrame(logs.describe()['generators'])

Unnamed: 0,class,#symbols,example
0,synthetic_logs,0,(boring)


## Noise

In [7]:
logs = noisy_path(count=6)

In [8]:
logs.generate_trace()

[(0, 'r'), (10, 'r'), (20, 'l'), (30, 'j'), (40, 'f'), (50, 'm')]

In [9]:
logs.generate_instances()
logs.show_instances()

  1 : [r o c t j d]
  2 : [m p c r k t]
  3 : [i k i u v f]
  4 : [t e u j m w]
  5 : [s d m q q x]


In [10]:
pd.DataFrame(logs.describe()['generators'])

Unnamed: 0,class,#symbols,example
0,noisy_path,25,s d m q q x


## Single Paths

In [11]:
logs = single_path(7)

In [12]:
logs.generate_trace()

[(3, 'A'), (4, 'B'), (19, 'C'), (28, 'D'), (37, 'E'), (50, 'F'), (57, 'G')]

In [13]:
logs.generate_instances()
logs.show_instances()

  1 : [A B C D E F G]
  2 : [A B C D E F G]
  3 : [A B C D E F G]
  4 : [A B C D E F G]
  5 : [A B C D E F G]


In [14]:
pd.DataFrame(logs.describe()['generators'])

Unnamed: 0,class,#symbols,example,error,probability
0,single_path,7,A B C D E F G,10,1


## Noise * single Path (combine)

In [15]:
logs = noisy_path( every=10, num_symbols=50, count=30)  
logs = logs * single_path( 9, every=25, error=0, probability=1)  
logs = logs * single_path( 5, probability=0.5)  

In [16]:
logs.generate_trace()[:7]

[(0, 'o'), (0, 'A'), (10, 'aj'), (20, 'aq'), (25, 'B'), (30, 'at'), (40, 'u')]

In [17]:
logs.generate_instances(10)
logs.show_instances()

  1 : [an A J K l L g B b M k k C N ao a D b au l E m ap F v s al G ad ar H aa ae r I ab d l h ar am ax am ad]
  2 : [q A ah ag B ax a ap C j u D y d ag E d aw F at av d G z al H au b ac I p af ao b m z r al au]
  3 : [al A J aj K o B L ab M ah ab C N ab w D au ap at E a f F r au ag G d aw H c ap ae I am o ad ap l ad k c au]
  4 : [at A J ag K ax B L ab M N j p C aa as D f k l E g a F av v o G ag u H a aw t I av as ah u z aq ac e z]
  5 : [av A J l K L r B w M av g C N u ag D au f ap E ar aw F k a at G ar m H j au m I aj c m ao w am u h ar]
  6 : [p A i av B ab a u C ae am D h b y E y ad F t au k G p au H ap m av I ah ac a ao aa t af w ao]
  7 : [i A as ar B x ai aa C aa aj D ag s av E al f F c t i G z t H b aq q I e x e an ao g t aw aw]
  8 : [aa A J b K ap B L y M N ai at C x aa D f g m E au au F b af ae G x g H m am aw I e ac z ar f al c p ae]
  9 : [u A ap ap B af e j C ak z D y an d E av d F w ad al G an y H z av b I x l g ax c am an ai ab]
 10 : [ap A ad J K al L B M ac r N aw C 

In [18]:
pd.DataFrame(logs.describe()['generators'])

Unnamed: 0,class,#symbols,example,error,probability
0,noisy_path,50,ap ad al ac r aw ab ak y t,,
1,single_path,9,A B C D E F G H I,0.0,1.0
2,single_path,5,J K L M N,10.0,0.5


## Combining several single_path

In [19]:
sp1 = single_path(4)
sp2 = single_path(5)
sp3 = single_path(10, every=10, error=5, probability=0.5)

logs = sp1 *sp2 * sp3

In [20]:
#logs.generate_trace()

In [21]:
logs.generate_instances(100)
logs.show_instances()

  1 : [E J A K B F G L C H I M N D O P Q R S]
  2 : [A J E K F B L G C M D H N I O P Q R S]
  3 : [A E F B G H I C D]
  4 : [A E B C D F G H I]
  5 : [J E A K L B F C M D G N O H P I Q R S]
  6 : [A B E F C G H I D]
  7 : [E A B C F G D H I]
  8 : [J E A K L F B G M H C D N I O P Q R S]
  9 : [A E B F G H C D I]
 10 : [E A F B C G D H I]


In [22]:
pd.DataFrame(logs.describe()['generators'])

Unnamed: 0,class,#symbols,example,error,probability
0,single_path,4,A B C D,10,1.0
1,single_path,5,E F G H I,10,1.0
2,single_path,10,J K L M N O P Q R S,5,0.5


## Adding traces (concatenation)

In [23]:
logs = single_path(4) - single_path(5)

TypeError: unsupported operand type(s) for -: 'single_path' and 'single_path'