In [1]:
import sys
sys.path.append('../') # To import from parent dir
import os

import pandas as pd
pd.set_option('display.max_columns', None)
import numpy as np
import util.util as util

import datetime

  from .autonotebook import tqdm as notebook_tqdm


### Sample Data based on percentage instead of occurance

Set the parameters in the following cell to your desired output. The parameters are described by the comment afterwards.
Do not change the parameters after the indication.

Execute the following two cells to generate the validation logs based on the SmartRPA validation logs in the folder "logs/SmartRPA/".
The data will be available after the complete exection. Particularly, the file containing all indexes and parameters will only be available if all loops were executed.

1. Get unique actions
2. Calculate the log size based on
    1. Number of Motifs
    2. Length of Motifs 
    3. Occurance per Motifs
    3. Percentage over Log
3. Get randomized log in size to fit motifs into the data (log_wo_motif)
    1. Sample sequences between 1-motif length until size is reached
4. Generate number of motifs motifs with size length
5. Generate random insert values between 0 and len(log_wo_motif)
6. Inset the motifs from back to front
    1. Shuffle the template if shuffled
    2. Add it at index with log_wo_motf[0:index] template + [index:len(log_wo_motif)]
    3. Store insert value in df
    4. increment all inserted for ground truth data
7. Store Log
8. Add information to ground_truth data

In [2]:
different_motif_count = [5,3,2,1] # Different motif types
lengths_count = [5,10,20,50] # Different lengths of motifs
occurances_count = [5,10,20] # Different number of occurances of motifs
percentageMotif_counts = [1,10,25,50,75,100] # Different percentage of motif in the time series
percentageShuffle_counts = [0,0.1,0.2] # Different percentage of shuffle of actions in the motif

# ---- Set the Data Path for the csv files used for the data sampling and where the logs should be added ----
csvPath = "../logs/smartRPA/OriginalAgostinelliLogs/"
validation_path = "../logs/smartRPA/202511-update/"

if not os.path.exists(validation_path):
    os.makedirs(validation_path)
    
# DO NOT Change from here for synthetic data!
# ---- Columns to generate the validation data for the experiment ----
validationDataColumns = ["uiLogName","percentageMotifsOverLog","logLength","noOfMotifs"]
i = 0
while i < max(different_motif_count):
    validationDataColumns.append(f"motif{i}-length")
    validationDataColumns.append(f"motif{i}-occurances")
    validationDataColumns.append(f"motif{i}-shuffle")
    validationDataColumns.append(f"motif{i}-startIndexes")
    validationDataColumns.append(f"motif{i}-caseID")
    i+=1

validationDataDF = pd.DataFrame(columns=validationDataColumns)
concept_name_column = 'case:concept:name'
timeStampCol = "time:timestamp"

# ---- Gathering of unique events until the upper limit is reached ----
dfAll = util.read_csvs_and_combine(csvPath,250000)
# Drop duplicates based on equality assumption in https://doi.org/10.1016/j.compind.2022.103721          
subset=["category","application","concept:name","event_src_path","event_dest_path","browser_url","xpath"]
df_unique = dfAll.drop_duplicates(subset=subset)
print(f"There are {df_unique.shape[0]} unique events in the dataframe.")

Maximum row limit of 250000 reached. Stopping reading additional files.
There are 7096 unique events in the dataframe.


In [3]:
total_loops = (
    len(different_motif_count)
    * len(occurances_count)
    * len(lengths_count)
    * len(percentageMotif_counts)
    * len(percentageShuffle_counts)
)
loop_counter = 0

for motifs in different_motif_count:
    for occurances in occurances_count:
        print("--------------------------------------------------")
        timeForPreparing = datetime.datetime.now()
        # Excluded from code as this is the most time consuming part and only needs to be done a few times
        random_cases_list = util.get_random_values(dfAll, concept_name_column, motifs, min_len=max(lengths_count))
        only_case_actions_df = dfAll[dfAll[concept_name_column].isin(random_cases_list[concept_name_column])]
        only_case_actions_df = only_case_actions_df.sort_values(by=[concept_name_column,timeStampCol])
        max_action_count = motifs * max(lengths_count) * occurances * 100 / min(percentageMotif_counts)
        print(f"Max action count for random log generation: {max_action_count}")
        randomDF = util.get_rand_uiLog(dfAll, n_max= max(lengths_count) // 2, action_count=max_action_count, printing=True)
        for length in lengths_count:
            for percentageMotif in percentageMotif_counts:
                for shuffle in percentageShuffle_counts:
                    timeForGenerating = datetime.datetime.now()
                    loop_counter += 1
                    util.print_progress_bar(loop_counter, total_loops)
                    print(f"Processing: {motifs}-{different_motif_count} | {occurances}-{occurances_count} | {length}-{lengths_count} | {percentageMotif}-{percentageMotif_counts} | {shuffle}-{percentageShuffle_counts}")
                    new_row = util.generate_log_from_data_seed(dfAll, 
                                                            randomDF,
                                                            validation_path,
                                                            concept_name_column,
                                                            only_case_actions_df,
                                                            random_cases_list,
                                                            motifs,
                                                            occurances,
                                                            length,
                                                            percentageMotif,
                                                            shuffle)
                    validationDataDF = validationDataDF._append(new_row, ignore_index=True)
                    # Saftey Store
                    validationDataDF.to_csv(os.path.join(validation_path, "validationLogInformation.csv"), index=False)
                    print(f"Time taken for generating log: {datetime.datetime.now() - timeForGenerating}")
        print(f"Time taken for preparing data for motif count {motifs} and occurance count {occurances}: {datetime.datetime.now() - timeForPreparing}")
# Save the validation data log information
validationDataDF.to_csv(os.path.join(validation_path, "validationLogInformation.csv"), index=False)

--------------------------------------------------
Max action count for random log generation: 125000.0
Current generated UiLog length: 0
Current generated UiLog length: 1000
Current generated UiLog length: 1100
Current generated UiLog length: 1300
Current generated UiLog length: 1900
Current generated UiLog length: 2200
Current generated UiLog length: 2500
Current generated UiLog length: 2600
Current generated UiLog length: 2700
Current generated UiLog length: 2900
Current generated UiLog length: 3100
Current generated UiLog length: 3300
Current generated UiLog length: 3600
Current generated UiLog length: 4100
Current generated UiLog length: 5000
Current generated UiLog length: 5100
Current generated UiLog length: 5700
Current generated UiLog length: 5800
Current generated UiLog length: 5900
Current generated UiLog length: 6000
Current generated UiLog length: 6300
Current generated UiLog length: 6900
Current generated UiLog length: 7000
Current generated UiLog length: 7200
Current gen

### Testing Code

In [None]:
# Sample Data for Testing
motifs = 5
occurances = 5
length = 50
percentageMotif = 10
shuffle = 0.2

# ---- Generating the validation data ----
start_time = datetime.datetime.now()
log_name = f"log_motifs{motifs}_occurances{occurances}_length{length}_percentage{percentageMotif}_shuffle{shuffle}.csv"
new_row = {
    "uiLogName": log_name,
    "percentageMotifsOverLog": percentageMotif,
    "noOfMotifs": motifs
}

# Get random cases with at least max length
# 5 must be removed in looping later
random_cases_list = util.get_random_values(dfAll, concept_name_column, motifs, min_len=max(lengths_count))
only_case_actions_df = dfAll[dfAll[concept_name_column].isin(random_cases_list[concept_name_column])]
only_case_actions_df = only_case_actions_df.sort_values(by=[concept_name_column,timeStampCol])

# Calculate log length for percentage of motif to log
log_total_length = motifs * length * occurances * 100 / percentageMotif
log_noise_length = log_total_length * (100-percentageMotif)/100
print(f"Processing: {log_name} with the total length {log_total_length} containing {log_noise_length} events.")

new_row["logLength"] = int(log_total_length)
getRandomValues_time = datetime.datetime.now()

# Generate random log just for noise
random_log = util.get_rand_uiLog(dfAll, n_max=length // 2, action_count=log_noise_length, printing=False)
random_log.drop(columns=["Unnamed: 0"], inplace=True, errors='ignore')
getRandomUILogTime = datetime.datetime.now()

# Get random indexes for motif insertion
insertion_indexes = sorted(np.random.choice(range(0, len(random_log)), size=motifs*occurances, replace=False),reverse=True)
indexes_df = pd.DataFrame(columns=["random_index","uplift","final_index","isUsed"])
indexes_df["random_index"] = insertion_indexes
# How much will the element be uplifted due to prior insertions
indexes_df["uplift"] = (len(indexes_df) - indexes_df.index-1) * length
# Final indexes after uplift
indexes_df["final_index"] = indexes_df["random_index"] + indexes_df["uplift"]
# Mark all as not used << Not needed, but for checking
indexes_df["isUsed"] = False
# Case-ID sequence for insertion
indexes_df["insertedCaseID"] = ""
# For checking purposes, store the end index of the motif
indexes_df["end_indexes"] = indexes_df["final_index"] + length - 1

# Create the case-ID sequence for insertion in random order
case_sequence = (
    random_cases_list["case:concept:name"]
    .repeat(occurances)
    .sample(frac=1)                 # << shuffle the repeated sequence
    .reset_index(drop=True)
)

# Safety check
if len(insertion_indexes) < len(case_sequence):
    raise ValueError("Not enough insertion positions for all motif occurrences.")

# Assign case-IDs to insertion positions (one-to-one)
assigned_positions = list(zip(insertion_indexes[:len(case_sequence)], case_sequence))

# Process from highest index to lowest
assigned_positions.sort(key=lambda x: x[0], reverse=True)
getIndexDFTime = datetime.datetime.now()

for position, case_id in assigned_positions:
    # Get the motif actions for the current case_id
    motif_actions = only_case_actions_df[only_case_actions_df[concept_name_column] == case_id][:length]
    # Shuffle the motif by the specified percentage
    motif_actions = util.shuffle_x_percent(motif_actions.reset_index(), shuffle, seed=42)
    # Insert motif actions into the random log at the specified position
    top_part = random_log.iloc[:position]
    bottom_part = random_log.iloc[position:]
    random_log = pd.concat([top_part, motif_actions, bottom_part]).reset_index(drop=True)
    # Mark the position as used
    indexes_df.loc[indexes_df["random_index"] == position, "isUsed"] = True
    # Store insertion details in indexes_df
    indexes_df.loc[indexes_df["random_index"] == position, "insertedCaseID"] = case_id
insertionTime = datetime.datetime.now()

random_log.reset_index(drop=True, inplace=True)
i=0
while i < motifs:
    new_row[f"motif{i}-length"] = length
    new_row[f"motif{i}-occurances"] = occurances
    new_row[f"motif{i}-shuffle"] = shuffle
    case_id = random_cases_list["case:concept:name"].iloc[i]
    new_row[f"motif{i}-caseID"] = case_id
    start_indexes = indexes_df[indexes_df["insertedCaseID"] == case_id]["final_index"].tolist()
    new_row[f"motif{i}-startIndexes"] = start_indexes
    i += 1

random_log.to_csv(os.path.join(validation_path, log_name), index=False)
validationDataDF = validationDataDF._append(new_row, ignore_index=True)

print("Time taken for getting random values:", getRandomValues_time - start_time)
print("Time taken for getting random UI log:", getRandomUILogTime - getRandomValues_time)
print("Time taken for getting index DF:", getIndexDFTime - getRandomUILogTime)
print("Time taken for insertion of motifs:", insertionTime - getIndexDFTime)
print("Total time taken:", insertionTime - start_time)

In [None]:
random_log[0:50]

### Old Code

In [None]:
# ---- Setup of the parameters for the logs to be created ----
randomness = [1] # Length of sampling sequence, when creating the baseline log (1=> only one event inserted, 2=> sequences of 2 from all possible events inserted ...)
motifs = [1] # how many different motifs should be inserted into the log
occurances = [10,15,20,30,60] # Number of motif appearances in the log >> r_o in the paper
# !!!! occurances should be large enough, if motifs >1 to 1. fit all motifs, and 2. make it possible that at random choice the motifs are added as well. There is currently no fix
lengthMotifs = [5,10,15,20,25] # Length of the Motifs to be inserted >> r_len in the paper
percentageMotifsOverLog = [1,2.5,5,10] # Percentage representation of the Motif in the log >> p in the paper
shuffles = [0,10,20] # Percentage by which the inserted routine should be shuffled >> v_in in the paper

# ---- Set the Data Path for the csv files used for the data sampling and where the logs should be added ----
csvPath = "../logs/smartRPA/OriginalAgostinelliLogs/"
validation_path = "../logs/smartRPA/202509-update/"

if not os.path.exists(validation_path):
    os.makedirs(validation_path)

# DO NOT Change from here for synthetic data!
# ---- Columns to generate the validation data for the experiment ----
validationDataColumns = ["uiLogName","variationPercentage","numberOfOccurrancesToBeDiscovered","motifLength","percentageMotifsOverLog","logLength",
                     "motifSpots","caseIds"]
validationDataDF = pd.DataFrame(columns=validationDataColumns)
concept_name_column = 'case:concept:name'
timeStampCol = "time:timestamp"
reduce = 0 # Currently not in use

# ---- Gathering of unique events until the upper limit is reached ----
dfAll = read_csvs_and_combine(csvPath,250000)
# Drop duplicates based on equality assumption in https://doi.org/10.1016/j.compind.2022.103721          
subset=["category","application","concept:name","event_src_path","event_dest_path","browser_url","xpath"]
df_unique = dfAll.drop_duplicates(subset=subset)
print(f"There are {df_unique.shape[0]} unique events in the dataframe.")

In [None]:
validationDataDF = pd.DataFrame(columns=validationDataColumns)
# ---- Loops to create the data and store the file containing the data parameters ----
for mot in motifs: # Currently has no effect on the creation >> Would only be two different logs created 
    for rand in randomness:
        for occ in occurances:
            for percentage in percentageMotifsOverLog:
                for motifLen in lengthMotifs:
                    # Calculate the length of the log based on the percentage values
                    
                    l = ((occ*motifLen) / percentage * 100)
                    # print(f"Log Length: {l}")
                    samplingLength = l-(occ*motifLen)
                    print(f"Random Events: {samplingLength}")
                    # Check if multiple motifs should be inserted
                    if mot >= 1:
                        random_cases_list = get_random_values(dfAll, concept_name_column, mot, min_len=max(lengthMotifs)) # Mot > 2
                        filtered_df = dfAll[dfAll[concept_name_column].isin(random_cases_list)]
                        trimmed_df = pd.DataFrame()
                        for case in random_cases_list:
                            case_data = dfAll[dfAll[concept_name_column] == case].iloc[:motifLen]
                            trimmed_df = pd.concat([trimmed_df, case_data], ignore_index=True)
                        filtered_df = trimmed_df
                    else:
                        print(f"Error: Cannot generate {mot} motifs. Skipping.")
                        break
                    print(f"{mot} motifs have been generated. Proceeding with log creation.")
                    # Sample the UI log from all available data
                    if samplingLength > 0:
                        uiLog = get_rand_uiLog(df_unique, n_max=rand, actions=int(samplingLength)) # set to actions=l for proper work
                        print(f"UI Log of length {l} is sampled. Proceeding with motif insertion.")
                        
                        for shuffle in shuffles:
                            if percentage == 2.5:
                                filename = f"202509-Log_{rand}_vin{shuffle}_no{mot}_ro{occ}_rlen{motifLen}_p2-5_len{int(l)}.csv"
                            else:
                                filename = f"202509-Log_{rand}_vin{shuffle}_no{mot}_ro{occ}_rlen{motifLen}_p{percentage}_len{int(l)}.csv"
                            # random_cases_list = get_random_values(dfAll, concept_name_column, mot, min_len=max(lengthMotifs))
                            # filtered_df = dfAll[dfAll[concept_name_column].isin(random_cases_list)] # Needs adjustment if two motifs should be inserted
                            # filtered_df = filtered_df.iloc[:motifLen]
                            
                            uiLog, indices, random_cases_list = insert_motifs_non_overlap(random_cases_list=random_cases_list,
                                                                uiLog=uiLog,
                                                                dfcases=filtered_df,
                                                                occurances=occ,
                                                                case_column_name=concept_name_column,
                                                                sorted_insert_col=timeStampCol,
                                                                shuffled=True,
                                                                shuffled_by=shuffle,
                                                                reduced=False,
                                                                reduced_by=reduce)
                            
                    else: # This is buggy at the moment. Does produce too long logs.
                        # random_cases_list = get_random_values(dfAll, concept_name_column, mot, min_len=max(lengthMotifs))
                        # filtered_df = dfAll[dfAll[concept_name_column].isin(random_cases_list)] # Needs adjustment if two motifs should be inserted
                        # filtered_df = filtered_df.iloc[:motifLen] # Needs adjustment if two motifs should be inserted
                        for shuffle in shuffles: 
                            filename = f"t-Baseline-LenLog_{rand}_vin{shuffle}_no{mot}_ro{occ}_rlen{motifLen}_p{percentage}_len{int(l)}.csv"
                            if len(random_cases_list) >= 2:
                                sampled_dataframes = list()   
                                inserted_cases_list = []               
                                for _ in range(occ):
                                    random_case = random.choice(random_cases_list)
                                    inserted_cases_list.append(random_case) # Must be double checked Edited afterwards to add case ids.
                                    sampled_data = dfAll[dfAll[concept_name_column] == random_case]
                                    sampled_data = sampled_data[:motifLen]
                                    sampled_dataframes.append(sampled_data)
                                uiLog = pd.concat(sampled_dataframes, ignore_index=True)
                            elif len(random_cases_list) == 1: # Only one motif is expected
                                case = dfAll[dfAll[concept_name_column] == random_cases_list[0]]
                                inserted_cases_list = "[" + ",".join([str(x) for x in np.repeat(random_cases_list[0], occ)]) + "]"
                                case = case[:motifLen]
                                uiLog = pd.concat([case] * occ, ignore_index=True)
                            else:
                                print(f"Cannot generate negative length or zero routines dataframe: {filename}")
                                break
                            indices = [i * motifLen for i in range(occ)]
                            random_cases_list = inserted_cases_list

                    new_row = {'uiLogName': filename, "variationPercentage":  shuffle, "motifsToBeDiscovered": mot, "numberOfOccurrancesToBeDiscovered": occ,
                    "motifLength": motifLen, "percentageMotifsOverLog": percentage, "logLength": l, "motifSpots": indices, "caseIds": random_cases_list}
                    filepath = validation_path + filename
                    uiLog.to_csv(filepath, index=False)
                    print(f"UI Log with name {filename} completed and stored")
                    # It is know that there is a FutureWarning for all-na or empty rows. Based on the previous logic everythin will work.
                    validationDataDF = validationDataDF._append(new_row, ignore_index=True)

validationDataDF.to_csv(validation_path + "202509-validation_data.csv")

### Archive - Not in Use

### Creation of artificial validation logs for TSDM Discovery in UI logs

Method:
  1. Get user interactions (a) and create a set of user actions (A)
  2. Select random actions (1 to n consequtive actions per looping) append them into a dataframe (D) until a upper limit (x) is reached
        - The upper limit x is considered as 1 action per 3 seconds in a 8 hour work day => 8* 60 * (60/3) = 9600 actions a day
  4. Get routines (r) (1-m overall) and insert the routines (r) o-times at random points into the dataframe (D)
        - The routines need not interrupt themselfs, otherwise no motif could be discovered (for future tests, the could interrupt as well)

Result: A dataframe (D) with x + (o * len(r)) number of actions containing m routines at random points

We create a set of UI logs which vary in the following constraints:
- Randomness of actions inserted into the logs
    - The actions can be sampled completly randomly
    - The actions can be consequitive actions taken from previous logs
- Number of motifs inserted
    - The length of the motif is defined by the original motif in the smartRPA log
- Number of times the motifs are inserted
- Length of the UI log

Objective: Create a dataframe that mimics a long time recording of users, which contains routines

The file name will be created as follows
"Log_ [#Randomness]_ [#Motifs]_ [#MotifOccurance]_ [#LogLength]_ [#ShuffledBy]_ [#ReducedBy].csv"

In [None]:
# ---- Set the Data Path for the csv files used for the data sampling and where the logs should be added ----
csvPath = "../logs/smartRPA/202509-update/"
validation_path = csvPath + "validation/"

randomness = [1] # how long should the original action sequences be
motifs = [1] # how many different motifs should be inserted into the log
occurances = [10,15,20,30,60] #motifs count of the motif occurances in the log
lengthLog = [1000,2000,4000,8000,12000,15000,17500,20000,25000,30000] # 9600 are Events for approximatly one working day
percentageMotifsOverLog = [1,2.5,5,10]

# ToDo or just take the original size of the case to make it more real world
lengthMotifs = [5,10,15,20,25] # Could be added to enter different length motifs into the data

# Shuffle and reduction of the event log
shuffle = 15
reduce = 15

# ---- Setup of the parameters for the logs to be created ----
randomness = [1] # Length of sampling sequence, when creating the baseline log (1=> only one event inserted, 2=> sequences of 2 from all possible events inserted ...)
motifs = [1] # how many different motifs should be inserted into the log
occurances = [10,15,20,30,60] # Number of motif appearances in the log >> r_o in the paper
# !!!! occurances should be large enough, if motifs >1 to 1. fit all motifs, and 2. make it possible that at random choice the motifs are added as well. There is currently no fix
lengthMotifs = [5,10,15,20,25] # Length of the Motifs to be inserted >> r_len in the paper
percentageMotifsOverLog = [1,2.5,5,10] # Percentage representation of the Motif in the log >> p in the paper
shuffles = [0,10,20] # Percentage by which the inserted routine should be shuffled >> v_in in the paper


concept_name_column = 'case:concept:name'
timeStampCol = "time:timestamp"

dfAll = read_csvs_and_combine(csvPath,250000)
# Drop duplicates based on equality assumption in https://doi.org/10.1016/j.compind.2022.103721          
subset=["category","application","concept:name","event_src_path","event_dest_path","browser_url","xpath"]
df_unique = dfAll.drop_duplicates(subset=subset)
print(f"There are {df_unique.shape[0]} unique events in the dataframe.")

In [None]:
data = {'Filename': [], 'Index': [], 'CaseOrder': []}
index_frame = pd.DataFrame(data)
for rand in randomness:
    # Not used at the moment as we have changed to percentage based calculation
    for l in lengthLog: # do not name len as shortcut, cause trouble with inbuild length function len()
        beginningLog = get_rand_uiLog(dfAll, n_max=rand, actions=l) # set to actions=l for proper work
        for occ in occurances:
            for mot in motifs: # Number of motifs
                uiLog = beginningLog
                random_cases_list = get_random_values(dfAll, concept_name_column, mot, min_len=15)
                # Filter rows with values in the list, because the length is shorter for the following loop
                filtered_df = dfAll[dfAll[concept_name_column].isin(random_cases_list)]

                uiLog, indices, random_cases_list = insert_motifs_non_overlap(random_cases_list=random_cases_list,
                                                           uiLog=uiLog,
                                                           dfcases=filtered_df,
                                                           occurances=occ,
                                                           case_column_name=concept_name_column,
                                                           sorted_insert_col=timeStampCol,
                                                           shuffled=True,
                                                           shuffled_by=shuffle,
                                                           reduced=False,
                                                           reduced_by=reduce)
                filename = f"Log_{rand}_{mot}_{occ}_{l}_{shuffle}_{reduce}_.csv"
                filepath = validation_path + filename
                print(filename)
                uiLog.to_csv(filepath, index=False)

                # For tracking purpose and validation: Store the index of the cases in each log
                row = (filename, str(indices), str(random_cases_list)) 
                new_row_series = pd.Series(row, index=index_frame.columns)
                index_frame = pd.concat([index_frame, new_row_series.to_frame().T], ignore_index=True)

filepath = validation_path + "validation_data_high_percentage.csv"
index_frame.to_csv(filepath, index=False)

The previously part created motifs have all the same length. However, the data in our approach can handle different length motifs as well.
We use the work in doi.org/10.1109/ACCESS.2023.3295995 to identif such motifs.
The logs created here contain different length sized motifs, resembling different length user tasks.

In [None]:
data = {'Filename': [], 'Index': [], 'CaseOrder': [], 'CaseLength':[]}
index_frame = pd.DataFrame(data)
for rand in randomness:
    for occ in occurances:
        for l in lengthLog: # do not name len as shortcut, cause trouble with inbuild length function len()
            uiLog = get_rand_uiLog(df_unique, n_max=rand, actions=l) # set to actions=l for proper work
            mot = len(lengthMotifs)
            random_cases_list = get_random_values(dfAll, concept_name_column, mot, min_len=max(lengthMotifs))
            filtered_df = dfAll[dfAll[concept_name_column].isin(random_cases_list)]

            # Reduce the cases in length and append again
            strCaseLength = ""
            for i, element in enumerate(lengthMotifs):
                insert_df = filtered_df[filtered_df[concept_name_column] == random_cases_list[i]].sort_values(timeStampCol)
                try:
                    variableLengthDf = pd.concat([insert_df.iloc[:element], variableLengthDf], ignore_index=True)
                except NameError:
                    variableLengthDf = insert_df.iloc[:element-1]
                strCaseLength = strCaseLength + f"{random_cases_list[i]}:{element}/"

            uiLog, indices, random_cases_list = insert_motifs_non_overlap(random_cases_list=random_cases_list,
                                                        uiLog=uiLog,
                                                        dfcases=variableLengthDf,
                                                        occurances=occ,
                                                        case_column_name=concept_name_column,
                                                        sorted_insert_col=timeStampCol,
                                                        shuffled=True,
                                                        shuffled_by=shuffle,
                                                        reduced=True,
                                                        reduced_by=reduce)
        
            filename = f"VarLenLog_{rand}_{mot}_{occ}_{l}_{shuffle}_{reduce}.csv"
            filepath = validation_path + filename
            uiLog.to_csv(filepath, index=False)

            # For tracking purpose and validation: Store the index of the cases in each log
            row = (filename, str(indices), str(random_cases_list), strCaseLength) 
            new_row_series = pd.Series(row, index=index_frame.columns)
            index_frame = pd.concat([index_frame, new_row_series.to_frame().T], ignore_index=True)

filepath = validation_path + "var_len_validation_data.csv"
index_frame.to_csv(filepath, index=False)