# training_dir_staging

### Replace tags
----

In [None]:
import os

# Function to replace tags in text files in subdirectories
def replace_tags_in_files(directory, old_tag, new_tag):
    try:
        for entry in os.listdir(directory):
            entry_path = os.path.join(directory, entry)

            if os.path.isdir(entry_path):
                replace_tags_in_files(entry_path, old_tag, new_tag)

            elif os.path.isfile(entry_path) and entry.endswith(".txt"):
                with open(entry_path, 'r', encoding='utf-8') as f:
                    content = f.read()

                # Replace specified old tag with new tag
                content = content.replace(old_tag + ', ', new_tag + ', ').replace(old_tag, new_tag)

                # Write back to the file
                with open(entry_path, 'w', encoding='utf-8') as f:
                    f.write(content)

    except Exception as e:
        print(f"Error processing directory {directory}: {e}\n")

directory_path = r'C:\Users\kade\Desktop\training_dir_staging'

# Execute the function with the desired tags
replace_tags_in_files(directory_path, 'exclamation point', '!')
replace_tags_in_files(directory_path, 'english text', 'text')
replace_tags_in_files(directory_path, 'hankerchief', 'handkerchief')
replace_tags_in_files(directory_path, ',,', ',')
replace_tags_in_files(directory_path, ', ,', ',')

### Insert tag
----

In [None]:
import os

# Function to insert a specified tag in text files in subdirectories
def insert_tag_in_files(directory, tag_to_insert):
    try:
        for entry in os.listdir(directory):
            entry_path = os.path.join(directory, entry)

            if os.path.isdir(entry_path):
                insert_tag_in_files(entry_path, tag_to_insert)

            elif os.path.isfile(entry_path) and entry.endswith(".txt"):

                with open(entry_path, 'r', encoding='utf-8') as f:
                    content = f.read()

                # Insert the specified tag
                content = tag_to_insert + ', ' + content

                # Write back to the file
                with open(entry_path, 'w', encoding='utf-8') as f:
                    f.write(content)

    except Exception as e:
        print(f"Error processing directory {directory}: {e}\n")

directory_path = r'C:\Users\kade\Desktop\training_dir_staging'

# Execute the function with the desired tag
insert_tag_in_files(directory_path, 'new_tag_here')

### Escape parentheses
----

Recursively escape unescaped parentheses in all '.txt' files within the specified directory and its subdirectories. 

In [10]:
import os
import re

def escape_parentheses(file_path):
    with open(file_path, 'r') as file:
        content = file.read()

    # Escape unescaped parentheses
    content = re.sub(r'(?<!\\)([()])', r'\\\1', content)

    with open(file_path, 'w') as file:
        file.write(content)

def process_directory(directory):
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith(".txt"):
                file_path = os.path.join(root, file)
                escape_parentheses(file_path)

                # Recurse into subdirectories
                process_directory(os.path.join(root, file))

directory_path = r'C:\Users\kade\Desktop\training_dir_staging'
process_directory(directory_path)

### Replace underscores with spaces
----

Recursively replaces underscores with spaces in the content of text files in the specified directory and its subdirectories,
excluding specified tags.

In [9]:
import os

excluded_tags = [
    "score_safe",
    "score_explicit",
    "score_questionable"
]

def replace_underscores_with_spaces(directory_path):
    for root, dirs, files in os.walk(directory_path):
        for filename in files:
            if filename.endswith(".txt"):
                file_path = os.path.join(root, filename)

                # Read the content of the file
                with open(file_path, 'r') as file:
                    content = file.read()

                # Define tags to exclude from underscore replacement
                

                # Replace underscores with spaces, excluding specific tags
                for tag in excluded_tags:
                    replacement = tag.replace('_', ' ')
                    content = content.replace(replacement, tag)

                # Write the modified content back to the file
                with open(file_path, 'w') as file:
                    file.write(content)

# Specify the directory path
directory_path = r'C:\Users\kade\Desktop\training_dir_staging'

# Call the function to recursively replace underscores with spaces (excluding specified tags)
replace_underscores_with_spaces(directory_path)

### Fix tags that need underscores after that! 🐱
----

In [6]:
import os
import fileinput

# Function to recursively replace text in *.txt files
def replace_text_in_files(directory):
    for subdir, _, files in os.walk(directory):
        for file in files:
            if file.endswith(".txt"):
                file_path = os.path.join(subdir, file)
                with fileinput.FileInput(file_path, inplace=True) as file:
                    for line in file:
                        print(line.replace("score safe", "score_safe")
                                   .replace("score questionable", "score_questionable")
                                   .replace("score explicit", "score_explicit"), end='')

# Replace text in the specified directory
replace_text_in_files(r'C:\Users\kade\Desktop\training_dir_staging')

### Remove extra file extension before .txt
----

In [8]:
import os

def rename_files(directory_path):
    """
    Recursively renames files with additional image extensions in the specified directory and its subdirectories.
    """
    for root, dirs, files in os.walk(directory_path):
        for filename in files:
            if filename.endswith('.txt'):
                # Extract the base name without extension
                base_name, extension = os.path.splitext(filename)

                # Check if the file has an additional image extension
                if base_name.endswith(('.png', '.jpg', '.jpeg', '.webp', '.gif')):
                    # Construct the new filename with only the txt extension
                    new_filename = base_name[:-4] + '.txt'

                    # Construct the full file paths
                    old_path = os.path.join(root, filename)
                    new_path = os.path.join(root, new_filename)

                    # Rename the file
                    os.rename(old_path, new_path)

# Specify the directory path
directory_path = r'C:\Users\kade\Desktop\training_dir_staging'

# Call the function to recursively rename files
rename_files(directory_path)

### Newlines to commas
----

Recursively modify the content of '.txt' files in the specified directory and its subdirectories by replacing newlines with commas and spaces. 

In [7]:
import os

def process_directory(directory):
    for root, dirs, files in os.walk(directory):
        for filename in files:
            if filename.endswith(".txt"):
                file_path = os.path.join(root, filename)
                
                # Read the content of the file
                with open(file_path, 'r') as file:
                    content = file.read()
                
                # Replace newline with a comma and space
                modified_content = content.replace('\n', ', ')
                
                # Write the modified content back to the file
                with open(file_path, 'w') as file:
                    file.write(modified_content)

# Directory path
directory_path = r'C:\Users\kade\Desktop\training_dir_staging'

# Recursively process the directory and its subdirectories
process_directory(directory_path)

### Remove tags ⚠️
----

In [11]:
import os

def remove_tags(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        content = file.read()
    
    # Remove specified tags
    tags_to_remove = [
        "2000",
        "2001",
        "2002",
        "2003",
        "2004",
        "2005",
        "2006",
        "2007",
        "2008",
        "2009",
        "2010",
        "2011",
        "2012",
        "2013",
        "2014",
        "2015",
        "2016",
        "2017",
        "2018",
        "2019",
        "2020",
        "2021",
        "2022",
        "2023",
        "2024",
        "16 10",
        "16 9",
        "2 3",
        "3 4",
        "4 5",
        "5 6",
        "1 1",
        "absurd res",
        "hi res",
        #"elden ring",
        "fromsoftware",
        "canid",
        "canis",
        "mammal",
        "tan ear tips",
        "tan ears",
        "aaa qie"
        "unwanted erection",
    ]

    # Store removal actions for later printing
    removal_actions = []

    for tag in tags_to_remove:
        if tag in content:
            # Remove the tag
            content = content.replace(tag, '')

            # Remove ", " after the tag
            tag_comma = f"{tag}, "
            content = content.replace(tag_comma, '')

            removal_actions.append(f'Removed tag "{tag}" and ", " after it from file: {file_path}')

    with open(file_path, 'w', encoding='utf-8') as file:
        file.write(content)

    return removal_actions

def process_directory(directory):
    all_removal_actions = []

    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith(".txt"):
                file_path = os.path.join(root, file)
                removal_actions = remove_tags(file_path)
                all_removal_actions.extend(removal_actions)

    for action in all_removal_actions:
        print(action)

# Provide the path to the directory
directory_path = r'C:\Users\kade\Desktop\training_dir_staging'

# Recursively remove tags and ", " after tags from *.txt files in the specified directory and print removal actions
process_directory(directory_path)

Removed tag "2016" and ", " after it from file: C:\Users\kade\Desktop\training_dir_staging\1_spectrumshift\002455eb158569bcf5119f0bcc84dc50.txt
Removed tag "mammal" and ", " after it from file: C:\Users\kade\Desktop\training_dir_staging\1_spectrumshift\002455eb158569bcf5119f0bcc84dc50.txt
Removed tag "2020" and ", " after it from file: C:\Users\kade\Desktop\training_dir_staging\1_spectrumshift\0072ddb686ac477455474bef22823c4f.txt
Removed tag "mammal" and ", " after it from file: C:\Users\kade\Desktop\training_dir_staging\1_spectrumshift\0072ddb686ac477455474bef22823c4f.txt
Removed tag "2016" and ", " after it from file: C:\Users\kade\Desktop\training_dir_staging\1_spectrumshift\013a3b9d8973bbd4904a5a3ed320128e.txt
Removed tag "canid" and ", " after it from file: C:\Users\kade\Desktop\training_dir_staging\1_spectrumshift\013a3b9d8973bbd4904a5a3ed320128e.txt
Removed tag "canis" and ", " after it from file: C:\Users\kade\Desktop\training_dir_staging\1_spectrumshift\013a3b9d8973bbd4904a5a3