In [5]:
import os
import re
import pandas as pd
import csv
import ast


# Transformation SmartRPA > Action Logger

Both tools use different attribute names and they require a transformation.
Our baseline are SmartRPA logs. The following cell in this notebook takes as an input a SmartRPA file and gernerates an Action Logger file.
Please Note: Some attributes in Action Logger exist that are not present in SmartRPA and vise versa.

In [None]:
# Diccionario para sustituir nombres de columnas
column_mapping = {
    "case:concept:name": "caseID",
    "category": "target.class",
    "application": "targetApp",
    "time:timestamp": "timeStamp",
    "org:resource": "userID",
    "concept:name": "eventType",
    "browser_url": "url",
    "clipboard_content": "content",
    "workbook": "target.workbookName",
    "tag_name": "target.tagName",
    "tag_type": "target.type",
    "tag_value": "target.value",
    "tag_innerText": "target.innerText",
    "tag_checked": "target.checked",
    "tag_href": "target.href",
    "tag_option": "target.option",
    "tag_title": "target.title",
    "id": "target.id",
    "case:concept:name": "target.name",
    "current_worksheet": "target.sheetName",
    "tag_html": "target.innerHTML"
}

def process_csv(file_path, results_path):
    # Leer el archivo CSV
    df = pd.read_csv(file_path)
    
    # Renombrar columnas seg√∫n el diccionario
    df.rename(columns=column_mapping, inplace=True)
    
    # Convertir todos los valores a strings con quotes
    # df = df.map(lambda x: f'{x}' if pd.notnull(x) else '')
    
    # Guardar el archivo CSV procesado
    output_file = f"processed_{os.path.basename(file_path)}"
    output_file = os.path.join(results_path, output_file)
    df.to_csv(output_file, index=False, quoting=csv.QUOTE_ALL, na_rep='')
    
    # with open(file_path, 'r', newline='') as infile, open(new_file_path, 'w', newline='') as outfile:
    #     reader = csv.reader(infile)
    #     writer = csv.writer(outfile, quoting=csv.QUOTE_ALL)
    #     for row in reader:
    #         writer.writerow(row)

def process_directory(directory):
    # Comprobar si en "directory" existe el directorio "processed", sino crearlo
    processed_directory = os.path.join(directory, "processed")
    if not os.path.exists(processed_directory):
        os.makedirs(processed_directory)
    
    for file_name in os.listdir(directory):
        if file_name.endswith('.csv'):
            file_path = os.path.join(directory, file_name)
            try:
                process_csv(file_path, processed_directory)
            except:
                print(f"{file_name} could not be transformed.")

# Especifica la carpeta que deseas procesar
directory_path = "../logs/smartRPA/percentageComparison/LenoComparison/"

process_directory(directory_path)

  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)


# Creation of Grund Truth Files 4 RPM Segmentor and n-grams for SmartRPA Segmentor

SmartRPA does not require ground truth. Action Logger by Leno does. This part of the project is to create a ground truth txt file. With that file the common measures (precision, recall, and f1-score) can be calculated properly.

Two options can be used here:

1. Grund Truth Files for Leno et als. Approach: Adjust the activity pattern to the following:
<blockquote>
pattern = ""

for _, row in log.iloc[indexes[0]:indexes[0] + motifLength].iterrows(): 

    activityPattern = f"{row['concept:name']}_{row['application']}_{row['category']}"

    pattern = pattern + activityPattern.replace(" ","") + " -1 "

\# concat into string

pattern_complete = pattern_complete + pattern + " -2 \n"
 </blockquote>    

The three attributes are concatenated by a "_" and all white spaces are removed.
After each activity there is a "-1" added as this is required by the "CloFast" algorithm.
All lines have to end with a "-2" as this is the CloFast line delimitor.

2. N-Gram Patterns for Agostinelli et als. Approach: Adjust the code as follows, as only the action attribute is required
<blockquote>
pattern = ""

for _, row in log.iloc[indexes[0]:indexes[0] + motifLength].iterrows():

        activityPattern = f"{row['concept:name']}"

        pattern = pattern + activityPattern.replace(" ","") + " "

\# concat into string

pattern_complete = pattern_complete + pattern + "\n"
</blockquote>

In [None]:
# Settings

# Folder path in which the UI logs are which should be transformed into RPM Segmentor ground truth or SmartRPA segmentor files
folder_path = "../logs/Leno/"

# Settings for Leno Logs generated for experiment
logs = ["202511_SR_RT_plus.csv","202511_SR_RT_parallel.csv","202511_SR_RT_plus_extended.csv","202511_SR_RT_parallel_extended.csv"]
gts = ["202511_SR_RT_plus_ground_truth.csv","202511_SR_RT_parallel_ground_truth.csv","202511_SR_RT_plus_extended_ground_truth.csv","202511_SR_RT_parallel_extended_ground_truth.csv"]

seperator = ";" # "," for SmartRPA, ";" for Tockler/AWT
encoding_method = "utf-8" # UTF-8 for SmartRPA, latin-1 for Tockler/AWT

# Read Files 1. Log 2. Validation Data to identify patterns
for i,log in enumerate(logs):
    print(log)
    log_filename = log
    log = pd.read_csv(folder_path + log_filename, encoding=encoding_method, sep=seperator)
    validation_data = pd.read_csv(folder_path + gts[i], encoding=encoding_method, sep=seperator)
    # Get Index of Motifs

    motifLength = int(validation_data["length"].iloc[0])

    motifSpots = validation_data["start_index"].tolist()
    caseAtSpot = validation_data["caseid"].tolist()
    motifAtSpot = validation_data["motif"].tolist()

    # example_ranges = []
    # if "parallel" in log_filename:
    #     rangeSR = range(motifSpots[0],motifSpots[1]-1)
    #     rangeRT = range(motifSpots[1],motifSpots[2]-1)
    # elif "plus" in log_filename:
    #     rangeSR = range(motifSpots[0],motifSpots[1]-1)
    #     rangeRT = range(motifSpots[51],motifSpots[52]-1)

    # Generate Filename
    grundTruth_File = log_filename.split(".")[0] + ".txt"
    pattern_complete = ""
    pattern_dict = {}
    for row in validation_data.itertuples():
        print(row)
        pattern = ""
        print(row[2], row[3]+row[2])
        for _, row in log.iloc[row[2]:row[2] + row[3]].iterrows():
            activityPattern = f"{row['eventType']}"
            pattern = pattern + activityPattern.replace(" ","") + " -1 "

        # Add only unique patterns   
        if pattern not in pattern_dict.values():
            pattern_dict[row[1]] = pattern
            pattern_complete = pattern_complete + pattern + "-2\n"
        
    print(pattern_complete)


    with open(folder_path + log_filename.split(".")[0] + ".txt", 'w') as f:
        f.write(pattern_complete)
    

202511_SR_RT_plus.csv
Pandas(Index=0, caseid=1, start_index=0, length=28, motif='SR')
0 28
Pandas(Index=1, caseid=2, start_index=29, length=31, motif='SR')
29 60
Pandas(Index=2, caseid=3, start_index=60, length=29, motif='SR')
60 89
Pandas(Index=3, caseid=4, start_index=89, length=31, motif='SR')
89 120
Pandas(Index=4, caseid=5, start_index=120, length=31, motif='SR')
120 151
Pandas(Index=5, caseid=6, start_index=151, length=31, motif='SR')
151 182
Pandas(Index=6, caseid=7, start_index=182, length=29, motif='SR')
182 211
Pandas(Index=7, caseid=8, start_index=211, length=32, motif='SR')
211 243
Pandas(Index=8, caseid=9, start_index=243, length=30, motif='SR')
243 273
Pandas(Index=9, caseid=10, start_index=273, length=31, motif='SR')
273 304
Pandas(Index=10, caseid=11, start_index=304, length=31, motif='SR')
304 335
Pandas(Index=11, caseid=12, start_index=335, length=29, motif='SR')
335 364
Pandas(Index=12, caseid=13, start_index=364, length=31, motif='SR')
364 395
Pandas(Index=13, casei

  pattern_dict[row[1]] = pattern


202511_SR_RT_parallel.csv
Pandas(Index=0, caseid=1, start_index=0, length=28, motif='SR')
0 28
Pandas(Index=1, caseid=2, start_index=29, length=61, motif='RT')
29 90
Pandas(Index=2, caseid=3, start_index=90, length=33, motif='SR')
90 123
Pandas(Index=3, caseid=4, start_index=123, length=60, motif='RT')
123 183
Pandas(Index=4, caseid=5, start_index=183, length=31, motif='SR')
183 214
Pandas(Index=5, caseid=6, start_index=214, length=60, motif='RT')
214 274
Pandas(Index=6, caseid=7, start_index=274, length=33, motif='SR')
274 307
Pandas(Index=7, caseid=8, start_index=307, length=60, motif='RT')
307 367
Pandas(Index=8, caseid=9, start_index=367, length=33, motif='SR')
367 400
Pandas(Index=9, caseid=10, start_index=400, length=60, motif='RT')
400 460
Pandas(Index=10, caseid=11, start_index=460, length=33, motif='SR')
460 493
Pandas(Index=11, caseid=12, start_index=493, length=60, motif='RT')
493 553
Pandas(Index=12, caseid=13, start_index=553, length=31, motif='SR')
553 584
Pandas(Index=13

  pattern_dict[row[1]] = pattern


202511_SR_RT_plus_extended.csv
Pandas(Index=0, caseid=1, start_index=0, length=28, motif='SR')
0 28
Pandas(Index=1, caseid=2, start_index=78, length=31, motif='SR')
78 109
Pandas(Index=2, caseid=3, start_index=159, length=29, motif='SR')
159 188
Pandas(Index=3, caseid=4, start_index=238, length=31, motif='SR')
238 269
Pandas(Index=4, caseid=5, start_index=319, length=31, motif='SR')
319 350
Pandas(Index=5, caseid=6, start_index=400, length=31, motif='SR')
400 431
Pandas(Index=6, caseid=7, start_index=481, length=29, motif='SR')
481 510
Pandas(Index=7, caseid=8, start_index=560, length=32, motif='SR')
560 592
Pandas(Index=8, caseid=9, start_index=642, length=30, motif='SR')
642 672
Pandas(Index=9, caseid=10, start_index=722, length=31, motif='SR')
722 753
Pandas(Index=10, caseid=11, start_index=803, length=31, motif='SR')
803 834
Pandas(Index=11, caseid=12, start_index=884, length=29, motif='SR')
884 913
Pandas(Index=12, caseid=13, start_index=963, length=31, motif='SR')
963 994
Pandas(

  pattern_dict[row[1]] = pattern


202511_SR_RT_parallel_extended.csv
Pandas(Index=0, caseid=1, start_index=0, length=28, motif='SR')
0 28
Pandas(Index=1, caseid=2, start_index=78, length=61, motif='RT')
78 139
Pandas(Index=2, caseid=3, start_index=189, length=33, motif='SR')
189 222
Pandas(Index=3, caseid=4, start_index=272, length=60, motif='RT')
272 332
Pandas(Index=4, caseid=5, start_index=382, length=31, motif='SR')
382 413
Pandas(Index=5, caseid=6, start_index=463, length=60, motif='RT')
463 523
Pandas(Index=6, caseid=7, start_index=573, length=33, motif='SR')
573 606
Pandas(Index=7, caseid=8, start_index=656, length=60, motif='RT')
656 716
Pandas(Index=8, caseid=9, start_index=766, length=33, motif='SR')
766 799
Pandas(Index=9, caseid=10, start_index=849, length=60, motif='RT')
849 909
Pandas(Index=10, caseid=11, start_index=959, length=33, motif='SR')
959 992
Pandas(Index=11, caseid=12, start_index=1042, length=60, motif='RT')
1042 1102
Pandas(Index=12, caseid=13, start_index=1152, length=31, motif='SR')
1152 11

  pattern_dict[row[1]] = pattern


# Recall, Precision, and F1-Score Calculation for SmartRPA Segmentor results

This script part is used to calculate the Recall, Precision, and F1-Score based on two input files:

1. The results of the Agostinelli et al. Segmentor named "result_Agostinelli-[Log Name]"
2. The n-gram generated from the UI Log from the previous cell named "pattern-[Log Name]"

In [None]:
# Settings - Folder Path in which the discovered n-gram txt files from Agostinelli et als method 
# and the results of the pervious cell (n-gram baseline) are stored
folder_path = "../logs/smartRPA/percentageComparison/AgostinelliComparison_500_5000_50k-150k/"

def remove_trailing_number(line):
    """
    Removes the number at the end of a string.

    Parameters:
    line (str): Input string.

    Returns:
    str: String with the trailing number removed.
    """
    return re.sub(r'\d+$', '', line)

def read_text_file_to_list(file_path):
    """
    Reads a text file and stores each line as an element in a list.

    Parameters:
    file_path (str): The full path to the text file.

    Returns:
    list: A list where each element is a line from the text file.
    """
    # Check if the file exists
    if not os.path.exists(file_path):
        print(f"The file '{file_path}' does not exist.")
        return []

    # Read the file and store each line in a list
    with open(file_path, 'r') as file:
        lines = file.readlines()

    # Remove trailing newline characters from each line
    lines = [line.strip() for line in lines]
    lines = [remove_trailing_number(line) for line in lines]

    return lines

def match_files(folder_path, type=1):
    """
    Matches files starting with 'result_Agostinelli-' to their counterparts
    starting with 'agostinelli_patternBaseline-' in the same folder.

    Parameters:
    folder_path (str): Path to the folder containing the files.
    type (int: default 1): If the files have the pre-text (1) or just are same files with .txt (2)

    Returns:
    list: List of tuples with matched files (result_file, pattern_file).
    """
    # Get all files in the folder
    all_files = os.listdir(folder_path)

    # Separate files by their prefixes
    result_files = [f for f in all_files if f.startswith('result_Agostinelli-')]
    pattern_files = [f for f in all_files if f.startswith('pattern-LenLog')]

    # Match result files to pattern files
    matches = []
    for result_file in result_files:
        # Extract the unique identifier from the result file
        identifier = result_file.split('result_Agostinelli-', 1)[1]
        identifier = identifier[:-5]
        # Search for a corresponding pattern file
        for pattern_file in pattern_files:
            if identifier in pattern_file:
                matches.append((result_file, pattern_file))
                break

    return matches

def find_matching_subpatterns_with_summed_numbers(pattern_file, result_file):
    """
    Checks if any patterns from the pattern file occur as sub-patterns in the result file
    and sums the numbers from all matching lines for each pattern.

    Parameters:
    pattern_file (str): Path to the text file containing expected patterns.
    result_file (str): Path to the text file containing result patterns.

    Returns:
    dict: A dictionary where keys are patterns from the pattern file, and values are:
          - The sum of numbers from all matching lines in the result file.
          - None if the pattern is not found.
    """
    # Read the patterns from both files
    with open(pattern_file, 'r') as pf:
        patterns = [line.strip() for line in pf.readlines()]
    
    with open(result_file, 'r') as rf:
        results = [line.strip() for line in rf.readlines()]

    # Check for sub-patterns and sum numbers
    pattern_matches = {}
    for pattern in patterns:
        total_sum = 0
        for result in results:
            if pattern in result:
                # Extract the first number in the matching line
                number_match = re.search(r'\d+', result)
                if number_match:
                    total_sum += int(number_match.group())
        # Store the total sum or None if no matches were found
        pattern_matches[pattern] = total_sum if total_sum > 0 else None

    return pattern_matches

def sum_numbers_for_longer_patterns(result_file, pattern_word_length):
    """
    Sums the numbers in result file lines with word counts greater than the specified pattern length.

    Parameters:
    pattern_file (str): Path to the text file containing expected patterns.
    result_file (str): Path to the text file containing result patterns.
    pattern_word_length (int): Minimum word count for patterns from the pattern file.

    Returns:
    int: The sum of numbers in result patterns that exceed the pattern word length.
    """
    # Read the result file
    with open(result_file, 'r') as rf:
        results = [line.strip() for line in rf.readlines()]

    # Sum the numbers for result patterns longer than the pattern word length
    total_sum = 0
    for result in results:
        # Count words in the result line
        word_count = len(result.split())
        if word_count > pattern_word_length:
            # Extract the first number in the result line
            number_match = re.search(r'\d+', result)
            if number_match:
                total_sum += int(number_match.group())

    return total_sum

matches = match_files(folder_path)

result_df = pd.DataFrame(columns=["Filename", "n_motifs", "p", "r_o","rlen","precision","recall","f1score"])
for result, pattern in matches:
    # print(f"Result File: {result}, Matching Pattern File: {pattern}")
    file_path_result = os.path.join(folder_path, result)
    lines_list_result = read_text_file_to_list(file_path_result)
    # print("Contents of the text file as a list:")
    # print(lines_list)
    file_path_pattern = os.path.join(folder_path, pattern)
    lines_list_pattern = read_text_file_to_list(file_path_pattern)
    # print("Contents of the text file as a list:")
    # print(lines_list_pattern)

    patterns_found = find_matching_subpatterns_with_summed_numbers(file_path_pattern, file_path_result)
    # Print the results
    print(f"Result File {result}")
    total_true_positives = 0
    for found_pattern, occurred in patterns_found.items():
        # Handle None as 0
        value = occurred if occurred is not None else 0
        total_true_positives += value
        # print(f"Pattern: '{pattern}' - Found as Sub-pattern: {value}")


    # Get number of activities ("Words") from filename
    if "rlen" in result:
        activities = result.split("rlen")[1].split("_")[0]
        ro_motifs = result.split("ro")[1].split("_")[0]
        p_overLog = result.split("_p")[1].split("_")[0]
        n_motifs = result.split("_no")[1].split("_")[0]
    else:
        activities = result.split("_")[5]
        ro_motifs = result.split("_")[4]
        p_overLog = result.split("_")[6]
        n_motifs = result.split("_")[2]
    
    total_discovered = sum_numbers_for_longer_patterns(file_path_result, int(activities))
    print(f"Total longer routines discovered: {total_discovered}")
    if total_discovered is None or total_true_positives == 0 or total_discovered == 0:
        total_discovered = 0
        precision = 0
        recall = 0
        f1_score = 0
    else:
        print(f"Total True Positives: {total_true_positives}")
        print(f"ro_motifs: {ro_motifs}")
        precision = total_true_positives/total_discovered
        recall = total_true_positives/int(ro_motifs)
        if recall > 1:
            recall = 1
        f1_score = 2*(precision*recall)/(precision+recall)
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F1-score: {f1_score}")
    new_row = pd.DataFrame({"Filename": [pattern],"n_motifs": [n_motifs], "p": [p_overLog], "r_o": [ro_motifs],"rlen": [activities], "precision": [precision]
                            ,"recall": [recall], "f1score": [f1_score]})
    result_df = pd.concat([result_df, new_row], ignore_index=True)


Result File result_Agostinelli-LenLog_1_10_1_10_25_5_5000.txt
Total longer routines discovered: 0
Precision: 0
Recall: 0
F1-score: 0
Result File result_Agostinelli-LenLog_1_10_1_10_5_10_500.txt
Total longer routines discovered: 0
Precision: 0
Recall: 0
F1-score: 0
Result File result_Agostinelli-LenLog_1_10_1_10_5_1_5000.txt
Total longer routines discovered: 0
Precision: 0
Recall: 0
F1-score: 0
Result File result_Agostinelli-LenLog_1_10_1_20_25_10_5000.txt
Total longer routines discovered: 0
Precision: 0
Recall: 0
F1-score: 0
Result File result_Agostinelli-LenLog_1_1_10_25_5_5000.txt
Total longer routines discovered: 0
Precision: 0
Recall: 0
F1-score: 0
Result File result_Agostinelli-LenLog_1_1_10_5_10_500.txt
Total longer routines discovered: 1
Total True Positives: 1
ro_motifs: 10
Precision: 1.0
Recall: 0.1
F1-score: 0.18181818181818182
Result File result_Agostinelli-LenLog_1_1_10_5_1_5000.txt
Total longer routines discovered: 0
Precision: 0
Recall: 0
F1-score: 0
Result File result_Ag

In [9]:
pd.set_option("display.max_rows", None, "display.max_columns", None)
result_df

Unnamed: 0,Filename,n_motifs,p,r_o,rlen,precision,recall,f1score
0,pattern-LenLog_1_10_1_10_25_5_5000.txt,1,25,1,10,0.0,0.0,0.0
1,pattern-LenLog_1_10_1_10_5_10_500.txt,1,5,1,10,0.0,0.0,0.0
2,pattern-LenLog_1_10_1_10_5_1_5000.txt,1,5,1,10,0.0,0.0,0.0
3,pattern-LenLog_1_10_1_20_25_10_5000.txt,1,25,1,20,0.0,0.0,0.0
4,pattern-LenLog_1_1_10_25_5_5000.txt,1,5,10,25,0.0,0.0,0.0
5,pattern-LenLog_1_1_10_5_10_500.txt,1,10,10,5,1.0,0.1,0.181818
6,pattern-LenLog_1_1_10_5_1_5000.txt,1,1,10,5,0.0,0.0,0.0
7,pattern-LenLog_1_1_20_25_10_5000.txt,1,10,20,25,1.0,0.3,0.461538
8,pattern-LenLog_1_20_1_10_25_5_5000.txt,1,25,1,10,0.0,0.0,0.0
9,pattern-LenLog_1_20_1_10_5_10_500.txt,1,5,1,10,0.0,0.0,0.0


### Generate Results from Agostinelli Patterns Excel File

CSV File that contained all discovered results per file in just one line

In [None]:
import pandas as pd

folder_path = "../logs/smartRPA/percentageComparison/"
grundTruth_ValidationData_filename = "Agostinelli_csv_results_500_5000_50k-150k.csv"

data = pd.read_csv(folder_path + grundTruth_ValidationData_filename, sep=",")

# Function to add a newline after every number occurrence
def add_newline_after_numbers(content):
    # Remove all tabs and replace them with a single space
    content = content.replace("\t", " ")
    # Split the content into lines
    lines = content.splitlines()
    # Remove rows that only contain 'nan' (case-insensitive)
    cleaned_lines = [line for line in lines if not re.fullmatch(r"(\s*nan\s*)+", line, flags=re.IGNORECASE)]
    # Rejoin the cleaned lines
    cleaned_content = "\n".join(cleaned_lines)
    # Remove trailing occurrences of "nan" at the end of the content
    cleaned_content = re.sub(r"(\s*nan\s*)+$", "", cleaned_content, flags=re.IGNORECASE)
    # Add newline after every decimal number (integer or float)
    cleaned_content = re.sub(r"(\b\d+(\.\d+)?\b)", r"\1\n", cleaned_content)
    # Strip leading and trailing whitespace
    cleaned_content = cleaned_content.strip()
    return cleaned_content

for index, row in data.iterrows():
    # Extract the filename from the first column
    filename = row[0]
    
    # Combine the relevant row data into a string
    content = "\t".join(str(value) for value in row[2:])
    
    print(filename)
    
    # Add a newline after every number
    processed_content = add_newline_after_numbers(content)
    print(processed_content)

    # Save the content to a text file with the filename
    with open(f"result_Agostinelli-{filename}.txt", "w", encoding="utf-8") as file:
        file.write(content)

LenLog_1_10_1_20_25_10_5000

LenLog_1_1_20_25_10_5000
copy clickLink paste SlideSelectionChanged newPresentationSlide openDocument clickRadioButton installBrowserExtension clickRadioButton clickLink clickLink installBrowserExtension clickLink clickLink clickLink uninstallBrowserExtension uninstallBrowserExtension installBrowserExtension clickRadioButton installBrowserExtension clickCheckboxButton clickLink clickCheckboxButton dragElement installBrowserExtension installBrowserExtension unmutedTab installBrowserExtension dragElement clickLink installBrowserExtension 6.0
 newWindow copy clickLink paste SlideSelectionChanged newPresentationSlide openDocument clickRadioButton installBrowserExtension clickRadioButton clickLink clickLink installBrowserExtension clickLink clickLink clickLink uninstallBrowserExtension uninstallBrowserExtension installBrowserExtension clickRadioButton installBrowserExtension clickCheckboxButton clickLink clickCheckboxButton dragElement installBrowserExtension in

  filename = row[0]


In [None]:
import os

def clean_unmatched_txt_files(folder_path):
    """
    Deletes all .txt files in the specified folder that do not have a matching .csv file.

    Parameters:
    folder_path (str): The path to the folder containing .txt and .csv files.
    """
    if not os.path.exists(folder_path):
        print(f"The folder '{folder_path}' does not exist.")
        return

    # Get the list of all CSV files (without extensions)
    csv_base_names = {os.path.splitext(f)[0] for f in os.listdir(folder_path) if f.endswith('.csv')}

    # Iterate over all .txt files
    for txt_file in os.listdir(folder_path):
        if txt_file.endswith('.txt'):
            txt_base_name = os.path.splitext(txt_file)[0]  # Get the base name of the .txt file

            # If there is no matching CSV file, delete the txt file
            if txt_base_name not in csv_base_names:
                os.remove(os.path.join(folder_path, txt_file))
                print(f"Deleted: {txt_file}")

# Example usage
folder_path = "../logs/smartRPA/percentageComparison/LenoComparison"  # Replace with your folder path
clean_unmatched_txt_files(folder_path)


Deleted: pattern-LenLog_1_10_1_10_10_10_1000.txt
Deleted: pattern-LenLog_1_10_1_10_10_1_10000.txt
Deleted: pattern-LenLog_1_10_1_10_10_2-5_4000.txt
Deleted: pattern-LenLog_1_10_1_10_10_5_2000.txt
Deleted: pattern-LenLog_1_10_1_10_15_10_1500.txt
Deleted: pattern-LenLog_1_10_1_10_15_1_15000.txt
Deleted: pattern-LenLog_1_10_1_10_15_2-5_6000.txt
Deleted: pattern-LenLog_1_10_1_10_15_5_3000.txt
Deleted: pattern-LenLog_1_10_1_10_20_10_2000.txt
Deleted: pattern-LenLog_1_10_1_10_20_1_20000.txt
Deleted: pattern-LenLog_1_10_1_10_20_2-5_8000.txt
Deleted: pattern-LenLog_1_10_1_10_20_5_4000.txt
Deleted: pattern-LenLog_1_10_1_10_25_10_2500.txt
Deleted: pattern-LenLog_1_10_1_10_25_1_25000.txt
Deleted: pattern-LenLog_1_10_1_10_25_2-5_10000.txt
Deleted: pattern-LenLog_1_10_1_10_25_5_5000.txt
Deleted: pattern-LenLog_1_10_1_10_5_10_500.txt
Deleted: pattern-LenLog_1_10_1_10_5_1_5000.txt
Deleted: pattern-LenLog_1_10_1_10_5_2-5_2000.txt
Deleted: pattern-LenLog_1_10_1_10_5_5_1000.txt
Deleted: pattern-LenLog_1