# Training Directory Tools
----

This notebook contains all my magic I use to fiddle around with my datasets.

## Convert .webp to .png
----

This script converts all WebP images in a specified directory and its subdirectories to PNG format. It utilizes the `os` module to navigate through the directory structure and the `PIL` (Python Imaging Library) module's `Image` class to handle image processing. The function `convert_webp_to_png(directory)` takes a directory path as input, iterates through all files in that directory (including subdirectories), identifies WebP files based on their extension, converts them to PNG format, and saves the converted images in the same location. If conversion is successful, it also removes the original WebP files. If any errors occur during conversion, it prints an error message with details.

In [1]:
import os
from PIL import Image

def convert_webp_to_png(directory):
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.lower().endswith('.webp'):
                webp_path = os.path.join(root, file)
                png_path = os.path.splitext(webp_path)[0] + '.png'
                try:
                    with Image.open(webp_path) as img:
                        img.save(png_path, format='PNG')

                    os.remove(webp_path)
                    print(f"Converted {webp_path} to {png_path}")
                except Exception as e:
                    print(f"Error converting {webp_path}: {e}")

directory = r'E:\training_dir'
convert_webp_to_png(directory)

Converted E:\training_dir\bunnypaws\Aspekt.webp to E:\training_dir\bunnypaws\Aspekt.png
Converted E:\training_dir\bunnypaws\BeckAndArco.webp to E:\training_dir\bunnypaws\BeckAndArco.png
Converted E:\training_dir\bunnypaws\CAACAgEAAxUAAWDcC-RAMyhgCSdjHXALY3-AW04jAAJJAQACUzihRiT7YFrWoWOgIAQ.webp to E:\training_dir\bunnypaws\CAACAgEAAxUAAWDcC-RAMyhgCSdjHXALY3-AW04jAAJJAQACUzihRiT7YFrWoWOgIAQ.png
Converted E:\training_dir\bunnypaws\CAACAgEAAxUAAWDcH6ECBkkGen9vu0nfz3nOhi4KAAKqAQACdX3pR00bYhmqsDSOIAQ.webp to E:\training_dir\bunnypaws\CAACAgEAAxUAAWDcH6ECBkkGen9vu0nfz3nOhi4KAAKqAQACdX3pR00bYhmqsDSOIAQ.png
Converted E:\training_dir\bunnypaws\CAACAgEAAxUAAWDcH6EHLyrFVV6KDt_ehvaTqEvCAAIuAQACJLbhR4tj2fapsvIsIAQ.webp to E:\training_dir\bunnypaws\CAACAgEAAxUAAWDcH6EHLyrFVV6KDt_ehvaTqEvCAAIuAQACJLbhR4tj2fapsvIsIAQ.png
Converted E:\training_dir\bunnypaws\CAACAgEAAxUAAWDcH6Fvhv_5o8LmW4VwvgvHplQuAAJcAQACwNToRrdQ2H_0JmXkIAQ.webp to E:\training_dir\bunnypaws\CAACAgEAAxUAAWDcH6Fvhv_5o8LmW4VwvgvHplQuAAJcAQ

In [18]:
import os

def process_image_files(directory):
    """
    Process image files in the given directory. For each image file (.jpeg, .jpg, .png),
    if corresponding tags and caption files exist, combine the tags (.tags) and caption (.caption)
    into a single text file (.txt) with tags followed by caption separated by a comma. Additionally,
    the caption will have the following modifications:
        - Each sentence will have commas inside sentences stripped from them.
        - Each sentence will end with a period followed by a comma.
        - Any parentheses in the tags will be escaped with a backslash.

    Parameters:
        directory (str): The directory path containing image files and associated tags
                         and caption files.
    """
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.lower().endswith(('.jpeg', '.jpg', '.png')):
                image_name, _ = os.path.splitext(file)
                tags_file = os.path.join(root, image_name + '.tags')
                caption_file = os.path.join(root, image_name + '.caption')
                txt_file = os.path.join(root, image_name + '.txt')
                
                if os.path.exists(tags_file) and os.path.exists(caption_file):
                    with open(tags_file, 'r') as f:
                        tags = f.read().strip()
                        tags = tags.replace('(', '\\(').replace(')', '\\)')
                    with open(caption_file, 'r') as f:
                        caption = f.read().strip()
                        caption = caption.replace(', ', ' ')
                        caption = caption.replace('.', '.,')
                        caption = caption.rstrip(',')
                    
                    with open(txt_file, 'w') as f:
                        f.write(tags + ', ' + caption)
                    print(f"Processed {file} successfully.")
                else:
                    if not os.path.exists(tags_file):
                        print(f"Warning: Tags file missing for {file}")
                    if not os.path.exists(caption_file):
                        print(f"Warning: Caption file missing for {file}")

directory = r'C:\Users\kade\Desktop\training_dir_staging'
process_image_files(directory)

Processed Tibet tilkisi.jpg successfully.
Processed tumblr_41647863cea34e332fdeab607f40531f_7f78453a_1280.jpg successfully.
Processed 1539232.jpg successfully.
Processed 1f049eb995024948b6b7f2205e0703af.jpg successfully.
Processed 37398.jpg successfully.
Processed Border-Collie-2.jpg successfully.
Processed border-collie-4.jpg successfully.
Processed border-collie-running-grass-299097784-2000-226321f61c2c426da90434e13a55a0b5.jpeg successfully.
Processed cute-red-panda-ailurus-fulgens-in-a-tree.jpg successfully.
Processed fennec-fox--vulpes-zerda---484466908-bc194497fd834b29bf44692c4908d4bc.jpg successfully.
Processed fennec-fox-01-f56044b427f64d64a263417db3c23a13.jpg successfully.
Processed gecko.jpg successfully.
Processed iStock-484910814.jpg successfully.
Processed p86qy9k3xm851.png successfully.
Processed puvwusj6wcu41.jpg successfully.
Processed Red_Panda_(25193861686).jpg successfully.
Processed rev-1-PDP-TRL2-093_High_Res_JPEG.png successfully.
Processed snow-leopard--panthera-u

In [20]:
import os

def find_missing_files(directory):
    missing_files = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.lower().endswith(('.png', '.jpg', '.jpeg')):
                file_name, file_extension = os.path.splitext(file)
                caption_file = os.path.join(root, file_name + ".caption")
                tags_file = os.path.join(root, file_name + ".tags")
                if not os.path.exists(caption_file):
                    missing_files.append(caption_file)
                if not os.path.exists(tags_file):
                    missing_files.append(tags_file)
    return missing_files

if __name__ == "__main__":
    directory = r"C:\Users\kade\Desktop\training_dir_staging"
    missing_files = find_missing_files(directory)
    if missing_files:
        print("Missing files:")
        for file in missing_files:
            print(file)
    else:
        print("No missing files found.")


Missing files:
C:\Users\kade\Desktop\training_dir_staging\129440_Iridescent-fish-scales_shutterstock_586009376---Index.caption
C:\Users\kade\Desktop\training_dir_staging\129440_Iridescent-fish-scales_shutterstock_586009376---Index.tags
C:\Users\kade\Desktop\training_dir_staging\1539232.caption
C:\Users\kade\Desktop\training_dir_staging\1539232.tags
C:\Users\kade\Desktop\training_dir_staging\37398.caption
C:\Users\kade\Desktop\training_dir_staging\37398.tags
C:\Users\kade\Desktop\training_dir_staging\Border-Collie-2.caption
C:\Users\kade\Desktop\training_dir_staging\Border-Collie-2.tags
C:\Users\kade\Desktop\training_dir_staging\border-collie-4.caption
C:\Users\kade\Desktop\training_dir_staging\border-collie-4.tags
C:\Users\kade\Desktop\training_dir_staging\border-collie-running-grass-299097784-2000-226321f61c2c426da90434e13a55a0b5.caption
C:\Users\kade\Desktop\training_dir_staging\border-collie-running-grass-299097784-2000-226321f61c2c426da90434e13a55a0b5.tags
C:\Users\kade\Desktop\tra

In [23]:
import os
import json
from rich.console import Console

console = Console()

# Define tags to be ignored
ignored_tags = [
    "hi res",
    "shaded",
    "tagme",
    "absurd res",
    "detailed",
    "dota",
]

def process_file(file_path):
    try:
        console.print(f"Processing file: [bold]{file_path}[/bold]")
        with open(file_path, "r") as f:
            data = json.load(f)

        # Parse the URL and generate filename
        post_data = data.get("post", {})
        file_data = post_data.get("file", {})
        url = file_data.get("url")
        if url:
            filename, ext = os.path.splitext(os.path.basename(url))

            # Create caption file
            caption_file = f"{filename}.txt"
            caption_path = os.path.join(os.path.dirname(file_path), caption_file)

            with open(caption_path, "w", encoding="utf-8") as f:
                console.print(f"Creating caption file: [bold]{caption_path}[/bold]")
                # Write rating
                rating = file_data.get("rating", "q")
                if rating == "s":
                    f.write("rating_safe, ")
                elif rating == "e":
                    f.write("rating_explicit, ")
                else:
                    f.write("rating_questionable, ")

                # Process tags
                tags = []
                tags_data = post_data.get("tags", {})
                for category, tags_list in tags_data.items():
                    for tag in tags_list:
                        # Replace underscores with spaces
                        tag = tag.replace("_", " ")
                        if tag.lower() not in ignored_tags:
                            processed_tag = process_tag(tag, category)
                            if processed_tag:
                                tags.append(processed_tag)

                # Check if there are any valid tags before writing
                if tags:
                    # Join tags with commas and write to file
                    tags_line = ", ".join(tags)
                    f.write(tags_line.strip())
                    console.print(f"Writing tags: [italic]{tags_line.strip()}[/italic]")

    except Exception as e:
        console.print(f"Error processing file: [bold]{file_path}[/bold]")
        console.print(e)

def process_tag(tag, category=None):
    if tag.isdigit() or all(part.isdigit() for part in tag.split(':')):
        return ""
    tag = tag.replace("_", " ")
    if category == "artist":
        return f"by {tag}"
    else:
        tag = tag.replace("(", "\\(").replace(")", "\\)")
        return tag

def recursive_process(directory):
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith(".json"):
                file_path = os.path.join(root, file)
                process_file(file_path)

if __name__ == "__main__":
    root_directory = r"C:\Users\kade\Desktop\training_dir_staging"
    recursive_process(root_directory)

### Duplicate checker
----

In [3]:
import os

def check_duplicate_tags(file_path):
    with open(file_path, 'r') as file:
        tags = file.read().split(', ')
        duplicates = set()
        unique_tags = set()
        for tag in tags:
            if tag in unique_tags:
                duplicates.add(tag)
            else:
                unique_tags.add(tag)
        if duplicates:
            print(f"Duplicate tags found in {file_path}: {', '.join(duplicates)}")

def check_tags_in_directory(directory):
    for root, _, files in os.walk(directory):
        for file_name in files:
            if file_name.endswith('.txt'):
                file_path = os.path.join(root, file_name)
                check_duplicate_tags(file_path)

if __name__ == "__main__":
    directory_path = r'C:\Users\kade\Desktop\training_dir_staging'
    check_tags_in_directory(directory_path)


### Tag Counter
----

In [13]:
import os
from collections import Counter

def extract_tags_from_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        content = file.read()
        tags = content.split(', ')
        return tags

def process_directory(directory_path):
    all_tags = []
    for root, dirs, files in os.walk(directory_path):
        for file in files:
            if file.endswith(".txt"):
                file_path = os.path.join(root, file)
                tags = extract_tags_from_file(file_path)
                all_tags.extend(tags)
    return all_tags

def list_tags_frequency(tags):
    tag_counter = Counter(tags)
    sorted_tags = sorted(tag_counter.items(), key=lambda x: x[1], reverse=True)
    return sorted_tags

# Specify the directory path
directory_path = r'C:\Users\kade\Desktop\training_dir_staging'

# Process the directory and extract tags
all_tags = process_directory(directory_path)

# List tags and their frequencies in descending order
sorted_tags = list_tags_frequency(all_tags)

# Print the results
for tag, frequency in sorted_tags:
    print(f'{tag}: {frequency} times')

solo: 135 times
female: 108 times
rating_questionable: 104 times
: 95 times
genitals: 86 times
looking at viewer: 86 times
fur: 83 times
tail: 80 times
dragon: 80 times
scalie: 78 times
dota: 77 times
mythology: 77 times
auroth the winter wyvern: 77 times
mythological creature: 77 times
mythological scalie: 77 times
anthro: 75 times
european mythology: 72 times
western dragon: 72 times
hi res: 72 times
wyvern: 71 times
pussy: 68 times
feral: 68 times
mammal: 65 times
claws: 64 times
hair: 62 times
nude: 59 times
breasts: 57 times
teeth: 57 times
anus: 55 times
butt: 54 times
smile: 52 times
realistic: 50 times
scales: 45 times
wings: 45 times
canid: 42 times
canine: 41 times
bodily fluids: 40 times
presenting: 40 times
tongue: 40 times
nipples: 38 times
digital media \(artwork\): 37 times
detailed background: 37 times
lying: 36 times
genital fluids: 35 times
white body: 34 times
outside: 33 times
blue body: 32 times
clothing: 31 times
blue eyes: 30 times
seductive: 29 times
open mouth:

### Replace tags and remove duplicates
----

In [35]:
import os
import re

def process_files(directory, old_tag, new_tag):
    try:
        for entry in os.listdir(directory):
            entry_path = os.path.join(directory, entry)

            if os.path.isdir(entry_path):
                process_files(entry_path, old_tag, new_tag)

            elif os.path.isfile(entry_path) and entry.endswith(".txt"):
                with open(entry_path, 'r', encoding='utf-8') as f:
                    content = f.read()

                if old_tag:
                    content = re.sub(r'\b' + re.escape(old_tag) + r'\b', new_tag, content)

                tag_pattern = re.compile(r'(\b\w+\b)(?:(?:,|\s)+\1)+')
                content = re.sub(tag_pattern, r'\1', content)

                with open(entry_path, 'w', encoding='utf-8') as f:
                    f.write(content)

    except Exception as e:
        print(f"Error processing directory {directory}: {e}\n")

# Directory path
directory_path = r'C:\Users\kade\Desktop\training_dir_staging'

process_files(directory_path, 'were', 'sparkledog')
process_files(directory_path, 'werecanine', 'sparkledog')
process_files(directory_path, 'werewolf', 'sparkledog')

### Insert tag
----

In [1]:
import os

# Function to insert a specified tag in text files in subdirectories
def insert_tag_in_files(directory, tag_to_insert):
    try:
        for entry in os.listdir(directory):
            entry_path = os.path.join(directory, entry)

            if os.path.isdir(entry_path):
                insert_tag_in_files(entry_path, tag_to_insert)

            elif os.path.isfile(entry_path) and entry.endswith(".txt"):

                with open(entry_path, 'r', encoding='utf-8') as f:
                    content = f.read()

                # Insert the specified tag
                content = tag_to_insert + ', ' + content

                # Write back to the file
                with open(entry_path, 'w', encoding='utf-8') as f:
                    f.write(content)

    except Exception as e:
        print(f"Error processing directory {directory}: {e}\n")

directory_path = r'C:\Users\kade\Desktop\training_dir_staging'

# Execute the function with the desired tag
insert_tag_in_files(directory_path, 'realistic')

### Escape parentheses
----

Recursively escape unescaped parentheses in all '.txt' files within the specified directory and its subdirectories. 

In [2]:
import os
import re

def escape_parentheses(file_path):
    with open(file_path, 'r') as file:
        content = file.read()

    # Escape unescaped parentheses
    content = re.sub(r'(?<!\\)([()])', r'\\\1', content)

    with open(file_path, 'w') as file:
        file.write(content)

def process_directory(directory):
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith(".txt"):
                file_path = os.path.join(root, file)
                escape_parentheses(file_path)

                # Recurse into subdirectories
                process_directory(os.path.join(root, file))

directory_path = r'C:\Users\kade\Desktop\training_dir_staging'
process_directory(directory_path)

### Replace underscores with spaces
----

Recursively replaces underscores with spaces in the content of text files in the specified directory and its subdirectories,
excluding specified tags.

In [3]:
import os

excluded_tags = [
    "rating_safe",
    "rating_explicit",
    "rating_questionable"
]

def replace_underscores_with_spaces(directory_path):
    for root, dirs, files in os.walk(directory_path):
        for filename in files:
            if filename.endswith(".txt"):
                file_path = os.path.join(root, filename)

                # Read the content of the file
                with open(file_path, 'r') as file:
                    content = file.read()

                # Replace underscores with spaces, excluding specific tags
                for tag in excluded_tags:
                    replacement = tag.replace('_', ' ')
                    content = content.replace(replacement, tag)

                # Write the modified content back to the file
                with open(file_path, 'w') as file:
                    file.write(content)

# Specify the directory path
directory_path = r'C:\Users\kade\Desktop\training_dir_staging'

# Call the function to recursively replace underscores with spaces (excluding specified tags)
replace_underscores_with_spaces(directory_path)

### Fix tags that need underscores after that! 🐱
----

In [12]:
import os
import fileinput

# Function to recursively replace text in *.txt files
def replace_text_in_files(directory):
    for subdir, _, files in os.walk(directory):
        for file in files:
            if file.endswith(".txt"):
                file_path = os.path.join(subdir, file)
                with fileinput.FileInput(file_path, inplace=True) as file:
                    for line in file:
                        print(line.replace("rating safe", "rating_safe")
                                   .replace("rating questionable", "rating_questionable")
                                   .replace("rating explicit", "rating_explicit"), end='')

# Replace text in the specified directory
replace_text_in_files(r'C:\Users\kade\Desktop\training_dir_staging')

### Remove extra file extension before .txt
----

In [4]:
import os

def rename_files(directory_path):
    """
    Recursively renames files with additional image extensions in the specified directory and its subdirectories.
    """
    for root, dirs, files in os.walk(directory_path):
        for filename in files:
            if filename.endswith('.txt'):
                # Extract the base name without extension
                base_name, extension = os.path.splitext(filename)

                # Check if the file has an additional image extension
                if base_name.endswith(('.png', '.jpg', '.jpeg', '.webp', '.gif')):
                    # Construct the new filename with only the txt extension
                    new_filename = base_name[:-4] + '.txt'

                    # Construct the full file paths
                    old_path = os.path.join(root, filename)
                    new_path = os.path.join(root, new_filename)

                    # Rename the file
                    os.rename(old_path, new_path)

# Specify the directory path
directory_path = r'C:\Users\kade\Desktop\training_dir_staging'

# Call the function to recursively rename files
rename_files(directory_path)

### Newlines to commas
----

Recursively modify the content of '.txt' files in the specified directory and its subdirectories by replacing newlines with commas and spaces. 

In [14]:
import os

def process_directory(directory):
    for root, dirs, files in os.walk(directory):
        for filename in files:
            if filename.endswith(".txt"):
                file_path = os.path.join(root, filename)
                
                # Read the content of the file
                with open(file_path, 'r') as file:
                    content = file.read()
                
                # Replace newline with a comma and space
                modified_content = content.replace('\n', ', ')
                
                # Write the modified content back to the file
                with open(file_path, 'w') as file:
                    file.write(modified_content)

# Directory path
directory_path = r'C:\Users\kade\Desktop\training_dir_staging'

# Recursively process the directory and its subdirectories
process_directory(directory_path)

### Remove tags ⚠️
----

In [None]:
"""
This script is designed to remove specific tags from text files (*.txt) located within a given directory and its subdirectories. It iterates through each file, reads its content, removes specified tags, and then overwrites the file with the modified content.

The tags to be removed are specified in the `tags_to_remove` list within the `remove_tags` function. These tags include various strings, such as certain species names, phrases like "unavailable at source," years from 1996 to 2024, and phrases like "generation X pokemon." The script constructs regular expressions to match both escaped and non-escaped occurrences of parentheses in the tags.

The `remove_tags` function takes a file path as input and returns a list of removal actions performed, indicating which tags were removed from which files.

The `process_directory` function recursively processes all files within a specified directory and its subdirectories. For each file with a ".txt" extension, it calls the `remove_tags` function to remove tags and accumulates removal actions. Finally, it prints out all removal actions performed.

To utilize the script, provide the path to the directory containing the text files that need tag removal. Upon execution, the script will modify the files in place, removing the specified tags, and output a log of removal actions.
"""

import os
import re

def remove_tags(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        content = file.read()

    tags_to_remove = [
        "pal (species)",
        "pocketpair",
        "unavailable at source",
        "partially",
        "pokemon (species)",
        "generation",
        "pokephilia",
        "pokemon",
        "nintendo",
        "eeveelution",
        "uncensored",
        "translated",
        "partially translated",
        "translation request",
        "16 10",
        "16 9",
        "10 16",
        "9 16",
        "6 5",
        "5 6",
        "5 4",
        "4 3",
        "4 5",
        "3 4",
        "3 2",
        "2 3",
        "2 1",
        "1 2",
        "1 1",
        "4k",
        "absurd res",
        "hi res",
        "elden ring",
        "fromsoftware",
        "canid",
        "canis",
        "mammal",
        "unwanted erection",
        "lighting",
        "shaded",
        "widescreen"
    ]

    for gen in range(1, 10):
        tags_to_remove.append(f"generation {gen} pokemon")

    for year in range(1996, 2025):
        tags_to_remove.append(str(year))

    removal_actions = []

    for tag in tags_to_remove:
        # Construct regular expression to match both escaped and non-escaped occurrences of parentheses
        pattern = re.compile(re.escape(tag.replace('(', '\(').replace(')', '\)')) + r'|' + tag.replace('(', '\(').replace(')', '\)'))
        if pattern.search(content):
            content = pattern.sub('', content)
            removal_actions.append(f'Removed tag "{tag}" from file: {file_path}')

    with open(file_path, 'w', encoding='utf-8') as file:
        file.write(content)

    return removal_actions

def process_directory(directory):
    all_removal_actions = []

    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith(".txt"):
                file_path = os.path.join(root, file)
                removal_actions = remove_tags(file_path)
                all_removal_actions.extend(removal_actions)

    for action in all_removal_actions:
        print(action)

# Provide the path to the directory
directory_path = r'C:\Users\kade\Desktop\training_dir_staging'

# Recursively remove tags from *.txt files in the specified directory and print removal actions
process_directory(directory_path)

In [5]:
import os

def remove_tags(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        content = file.read()

    tags_to_remove = [
        "pal (species)",
        "pocketpair",
        "unavailable at source",
        "partially",
        "pokemon \(species\)",
        "generation",
        "pokephilia",
        "pokemon",
        "nintendo",
        "eeveelution",
        "uncensored",
        "translated",
        "partially translated",
        "translation request",
        "16 10",
        "16 9",
        "10 16",
        "9 16",
        "6 5",
        "5 6",
        "5 4",
        "4 3",
        "4 5",
        "3 4",
        "3 2",
        "2 3",
        "2 1",
        "1 2",
        "1 1",
        "4k",
        "absurd res",
        "hi res",
        "elden ring",
        "fromsoftware",
        "canid",
        "canis",
        "mammal",
        "unwanted erection",
        "lighting",
        "shaded",
        "widescreen"
    ]

    for gen in range(1, 9):
        tags_to_remove.append(f"generation {gen} pokemon")

    for year in range(1996, 2025):
        tags_to_remove.append(str(year))

    removal_actions = []

    for tag in tags_to_remove:
        if tag in content:
            content = content.replace(tag, '')

            tag_comma = f"{tag}, "
            content = content.replace(tag_comma, '')

            removal_actions.append(f'Removed tag "{tag}" and ", " after it from file: {file_path}')

    with open(file_path, 'w', encoding='utf-8') as file:
        file.write(content)

    return removal_actions

def process_directory(directory):
    all_removal_actions = []

    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith(".txt"):
                file_path = os.path.join(root, file)
                removal_actions = remove_tags(file_path)
                all_removal_actions.extend(removal_actions)

    for action in all_removal_actions:
        print(action)

# Provide the path to the directory
directory_path = r'C:\Users\kade\Desktop\training_dir_staging'

# Recursively remove tags and ", " after tags from *.txt files in the specified directory and print removal actions
process_directory(directory_path)


Removed tag "hi res" and ", " after it from file: C:\Users\kade\Desktop\training_dir_staging\1_hoodwink_(dota)\01868e97d6881edd02a2a023f3eebd1e.txt
Removed tag "mammal" and ", " after it from file: C:\Users\kade\Desktop\training_dir_staging\1_hoodwink_(dota)\01868e97d6881edd02a2a023f3eebd1e.txt
Removed tag "3 4" and ", " after it from file: C:\Users\kade\Desktop\training_dir_staging\1_hoodwink_(dota)\01899bf2d1d82e428f4cc17112b59520.txt
Removed tag "absurd res" and ", " after it from file: C:\Users\kade\Desktop\training_dir_staging\1_hoodwink_(dota)\01899bf2d1d82e428f4cc17112b59520.txt
Removed tag "hi res" and ", " after it from file: C:\Users\kade\Desktop\training_dir_staging\1_hoodwink_(dota)\01899bf2d1d82e428f4cc17112b59520.txt
Removed tag "mammal" and ", " after it from file: C:\Users\kade\Desktop\training_dir_staging\1_hoodwink_(dota)\01899bf2d1d82e428f4cc17112b59520.txt
Removed tag "4 5" and ", " after it from file: C:\Users\kade\Desktop\training_dir_staging\1_hoodwink_(dota)\045

### Replace `, ,` with `,` after that mess. 😼
----

In [10]:
import os

# Start directory
start_dir = r'C:\Users\kade\Desktop\training_dir_staging'

# Function to replace text in *.txt files
def replace_text_in_files(directory):
    while True:  # Run indefinitely until no more matches are found
        found_match = False  # Flag to track if any match is found
        for root, dirs, files in os.walk(directory):
            for file in files:
                if file.endswith(".txt"):
                    file_path = os.path.join(root, file)
                    with open(file_path, 'r', encoding='utf-8') as f:
                        content = f.read()
                    # Replace ', ,' with ','
                    updated_content = content.replace(', ,', ',').replace(',  ,', ',')
                    if updated_content != content:
                        found_match = True  # Set the flag to True if any match is found
                        with open(file_path, 'w', encoding='utf-8') as f:
                            f.write(updated_content)
        if not found_match:  # If no match is found, break the loop
            break

# Run the function
replace_text_in_files(start_dir)

In [None]:
import os

# Start directory
start_dir = r'C:\Users\kade\Desktop\training_dir_staging'

# Function to replace text in *.txt files
def replace_text_in_files(directory):
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith(".txt"):
                file_path = os.path.join(root, file)
                with open(file_path, 'r', encoding='utf-8') as f:
                    content = f.read()
                # Replace ', ,' with ','
                content = content.replace(', ,', ',')
                content = content.replace(',  ,', ',')
                with open(file_path, 'w', encoding='utf-8') as f:
                    f.write(content)

# Run the function
replace_text_in_files(start_dir)