In [1]:
import pandas as pd
import numpy as np

sequenceLength = 200

amount_of_sequences = 16
amount_of_anomalies = 4

#The following code makes sure the initial events do not have any anomalies (first 50 events + 10 white spaces)
#The other 150 events are divided into 4 bins, each bin has 1 anomaly. To make sure the anomalies are evently spread across the sequence.
anomaly_bins = []
upper_bound = 50
for _ in range(4):
    lower_bound = upper_bound + 2
    upper_bound += 37
    anomaly_bins.append((lower_bound, upper_bound - 2))
anomaly_bins

white_space_bin_amount = 8
white_space_percentage = 0.2
white_space_amount = int(sequenceLength*white_space_percentage) #40
total_length = int(sequenceLength+(sequenceLength*white_space_percentage)) #240

bin_size_white_space = sequenceLength//white_space_bin_amount #25
white_space_per_bin = int((sequenceLength*white_space_percentage)//white_space_bin_amount) #5

transition_margen = 2

### patttern and dataframe building blocks

In [2]:
def patterngenerator(pattern, sequenceLength=200):
  repeated_pattern = np.tile(pattern, (sequenceLength // len(pattern)) + 1)[:sequenceLength]
  return repeated_pattern

In [3]:
def dataframegenerator(patternlist, length=240):

  patternarray = np.array([])
  for pattern in patternlist:
    patternarray = np.concatenate([patternarray, pattern]).astype(int)

  df_generated = pd.DataFrame({
    'Sequence': np.repeat(np.arange(amount_of_sequences), length),
    'Event': np.tile(np.arange(length), amount_of_sequences),
    'EventCat': patternarray
  })
  return df_generated

In [4]:
def patternconcatinator(patternlist):
    patternarray = np.array([])
    for pattern in patternlist:
      patternarray = np.concatenate([patternarray, pattern]).astype(int)
    return patternarray

In [5]:
def patternimputer(pattern, pattern_value, spacing):
  pattern_indices = np.array(list(range(spacing-1, sequenceLength, spacing))).astype(int)
  pattern_value_list = np.array([pattern_value]*len(pattern_indices)).astype(int)
  pattern[pattern_indices] = pattern_value_list
  return pattern

### Base pattern

In [6]:
base_pattern_1 = np.array([0, 1]).astype(int)
base_pattern_2 = np.array([0, 2]).astype(int)
base_pattern_3 = np.array([2, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]).astype(int) # 15


base_list = [base_pattern_1, base_pattern_2]

base_20 = {}
base_50 = {}

for pat_length in [20, 50]:
    for i, pattern in enumerate(base_list):
        key = f"pat1_{pat_length}"
        if pat_length == 20:
            base_20[f"x{i+1}"] = patterngenerator(pattern, pat_length)
        elif pat_length == 50:
            base_50[f"x{i+1}"] = patterngenerator(pattern, pat_length)


In [7]:
type_1_no_anomaly = patterngenerator(base_pattern_1, sequenceLength)
type_2_no_anomaly = patterngenerator(base_pattern_3, sequenceLength)
type_3_no_anomaly = patternconcatinator([base_50['x2'], base_50['x1'], base_50['x2'], base_50['x1']])

### Adding whitespace binned

In [8]:
import random

def add_white_space_per_pattern_binned(pattern):
  Bins = list(range(0, (sequenceLength + bin_size_white_space), bin_size_white_space))
  white_space_indexes = []

  for i in range(len(Bins)-1):
    bin_range = (Bins[i], (Bins[i+1]-1))  # Example bin range
    white_space_indexes.extend(random.sample(range(bin_range[0], bin_range[1]+1), white_space_per_bin))

  white_space_indexes.sort()

  new_array = []
  index_pointer = 0
  for i in range(len(pattern)):
    if index_pointer in white_space_indexes:
      new_array.append(11)
      new_array.append(pattern[index_pointer])
      index_pointer = index_pointer + 1
    else:
      new_array.append(pattern[index_pointer])
      index_pointer = index_pointer + 1
  
  return new_array

In [9]:
def white_space_pattern_list(pattern_list):
  white_space_patterns = []
  for pattern in pattern_list:
    pattern = add_white_space_per_pattern_binned(pattern.copy())
    white_space_patterns.append(pattern)
  return white_space_patterns

### Anomalies Binned

In [10]:
def indexfinder(arr):
    # Initialize an empty list to store unique elements encountered
    pattern_memory = []
    index_of_change = []

    # Loop through the list
    for i, item in enumerate(arr):
        # Check if the item is not in pattern_memory yet
        if item not in pattern_memory:
            # If not, add it to pattern_memory
            pattern_memory.append(item)
        if len(pattern_memory) == 3:
            index_of_change.append(i)
            pattern_memory = []
    
    return index_of_change

In [11]:
def frozenindexes_binned(change_index, bin):
  indexes_not_to_change = []

  # Exclude numbers from 50 to 87
  bin_frozen = np.delete(np.arange(sequenceLength), np.arange(bin[0], bin[1])).astype(int)

  for item in change_index:
    for i in range(-transition_margen, transition_margen+1, 1):
      indexes_not_to_change.append(item+i)

  unique_values_final = np.union1d(indexes_not_to_change, bin_frozen)
  
  return unique_values_final

In [12]:
import random
def anomalyIndexGenerator_binned(excluded_indexes):
  
  numbers = list(range(sequenceLength))
  filtered_numbers = [num for num in numbers if num not in excluded_indexes]
  random_numbers = random.sample(filtered_numbers, 1) #amount_of_anomalies = 4
  return random_numbers[0]

In [13]:
def anomalyValueGenerator_binned(pattern, anomaly_index):
  anomaly_list = [0,1,2]
  if anomaly_index == 0:
    return 0
  else:
    anomaly_list.remove(pattern[anomaly_index])
    anomaly_list.remove(pattern[(anomaly_index-1)])
    return anomaly_list[0]

In [14]:
def anomalyValues_binned(indexes, pattern):
  values = []
  for item in indexes:
    values.append(anomalyValueGenerator_binned(pattern, item) + 8)
  return values

### Type 3 binned

In [15]:
def anomaliesImputerType3_binned(pattern):
  indexes_of_transition = indexfinder(pattern)
  indexes_anomalies = []
  for bin in anomaly_bins:
    indexes_unchanged = frozenindexes_binned(indexes_of_transition, bin)
    indexes_anomalies.append(anomalyIndexGenerator_binned(indexes_unchanged))
  anomaly_list = anomalyValues_binned(indexes_anomalies, pattern)
  pattern[indexes_anomalies] = anomaly_list
  return pattern

new_type_3_binned = anomaliesImputerType3_binned(type_3_no_anomaly.copy())


### Type 2 binned

In [16]:
def anomaliesImputerType2_binned(pattern):
  indexes_of_transition = indexfinder(pattern)
  indexes_anomalies = []
  for bin in anomaly_bins:
    indexes_unchanged = frozenindexes_binned(indexes_of_transition, bin)
    indexes_anomalies.append(anomalyIndexGenerator_binned(indexes_unchanged))
  anomaly_list = anomalyValues_binned(indexes_anomalies, pattern)
  pattern[indexes_anomalies] = anomaly_list
  return pattern

new_type_2 = anomaliesImputerType2_binned(type_2_no_anomaly.copy())


### Type 1 binned

In [17]:
def anomaliesImputerType1_binned(pattern):
  indexes_of_transition = []
  indexes_anomalies = []
  for bin in anomaly_bins:
    indexes_unchanged = frozenindexes_binned(indexes_of_transition, bin)
    indexes_anomalies.append(anomalyIndexGenerator_binned(indexes_unchanged))
  anomaly_list = anomalyValues_binned(indexes_anomalies, pattern)
  pattern[indexes_anomalies] = anomaly_list
  return pattern

new_type_1 = anomaliesImputerType1_binned(type_1_no_anomaly.copy())

### Mix

In [18]:
mixed_patterns_anomalies_binned = []

mixed_patterns_anomalies_binned.extend([anomaliesImputerType1_binned(type_1_no_anomaly.copy()) for i in range(4)])
mixed_patterns_anomalies_binned.extend([anomaliesImputerType2_binned(type_2_no_anomaly.copy()) for i in range(4)])
mixed_patterns_anomalies_binned.extend([anomaliesImputerType3_binned(type_3_no_anomaly.copy()) for i in range(4)])
mixed_patterns_anomalies_binned.extend([type_1_no_anomaly.copy()])
mixed_patterns_anomalies_binned.extend([type_2_no_anomaly.copy()])
mixed_patterns_anomalies_binned.extend([type_3_no_anomaly.copy()])
mixed_patterns_anomalies_binned.extend([type_1_no_anomaly.copy()])

In [19]:
pilot_mixed_patterns_anomalies_white_space_binned = white_space_pattern_list(mixed_patterns_anomalies_binned)
final_versions_dataframes = []

for i in range(4):
  version = i+1
  random.shuffle(pilot_mixed_patterns_anomalies_white_space_binned)
  finaldataframe = dataframegenerator(pilot_mixed_patterns_anomalies_white_space_binned, len(pilot_mixed_patterns_anomalies_white_space_binned[0]))
  final_versions_dataframes.append(finaldataframe)
  finaldataframe.to_csv(f'pilot_mixed_binned_16_{version}.csv', index=False)

In [33]:
df =final_versions_dataframes[0]

In [34]:

filtered_frame = df[df['EventCat'].isin([9, 10])] 
filtered_frame


pivotframe_binned = df.pivot(index='Sequence', columns='Event', values='EventCat')
pivotframe_binned

Event,0,1,2,3,4,5,6,7,8,9,...,230,231,232,233,234,235,236,237,238,239
Sequence,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0,1,0,1,11,0,1,0,1,0,...,1,0,1,0,11,1,11,0,11,1
1,0,2,0,11,2,0,2,0,2,0,...,1,0,1,0,1,11,0,1,0,1
2,0,2,0,11,2,0,2,0,11,2,...,1,11,0,1,0,1,0,1,0,1
3,2,0,1,0,1,0,1,0,11,1,...,11,1,0,11,1,2,0,1,0,1
4,11,0,11,2,0,2,0,2,0,2,...,10,0,1,0,1,0,1,11,0,1
5,0,1,0,1,0,1,0,1,0,11,...,11,1,0,11,1,0,11,1,0,1
6,0,11,1,0,1,0,11,1,0,1,...,10,1,0,1,0,11,1,11,0,1
7,2,0,1,0,1,0,1,11,0,1,...,0,1,0,1,2,0,1,11,0,1
8,2,11,0,1,0,1,11,0,1,0,...,0,1,0,1,2,0,1,11,0,1
9,0,2,0,2,0,11,2,0,2,0,...,11,1,11,0,1,0,1,11,0,1


In [35]:
pivotframe_binned['total white spaces'] = pivotframe_binned.apply(lambda row: row.value_counts().get(11, 0), axis=1)
pivotframe_binned['total anomaly 9'] = pivotframe_binned.apply(lambda row: row.value_counts().get(9, 0), axis=1)
pivotframe_binned['total anomaly 10'] = pivotframe_binned.apply(lambda row: row.value_counts().get(10, 0), axis=1)
pivotframe_binned["total anmaly"] = pivotframe_binned["total anomaly 9"] + pivotframe_binned["total anomaly 10"]
pivotframe_binned

Event,0,1,2,3,4,5,6,7,8,9,...,234,235,236,237,238,239,total white spaces,total anomaly 9,total anomaly 10,total anmaly
Sequence,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0,1,0,1,11,0,1,0,1,0,...,11,1,11,0,11,1,40,0,4,4
1,0,2,0,11,2,0,2,0,2,0,...,1,11,0,1,0,1,40,1,3,4
2,0,2,0,11,2,0,2,0,11,2,...,0,1,0,1,0,1,40,1,3,4
3,2,0,1,0,1,0,1,0,11,1,...,1,2,0,1,0,1,40,0,4,4
4,11,0,11,2,0,2,0,2,0,2,...,1,0,1,11,0,1,40,1,3,4
5,0,1,0,1,0,1,0,1,0,11,...,1,0,11,1,0,1,40,0,4,4
6,0,11,1,0,1,0,11,1,0,1,...,0,11,1,11,0,1,40,0,4,4
7,2,0,1,0,1,0,1,11,0,1,...,2,0,1,11,0,1,40,0,4,4
8,2,11,0,1,0,1,11,0,1,0,...,2,0,1,11,0,1,40,0,0,0
9,0,2,0,2,0,11,2,0,2,0,...,1,0,1,11,0,1,40,0,0,0
