# training_dir_staging

### Escape parentheses
----

Recursively escape unescaped parentheses in all '.txt' files within the specified directory and its subdirectories. 

In [2]:
import os
import re

def escape_parentheses(file_path):
    with open(file_path, 'r') as file:
        content = file.read()

    # Escape unescaped parentheses
    content = re.sub(r'(?<!\\)([()])', r'\\\1', content)

    with open(file_path, 'w') as file:
        file.write(content)

def process_directory(directory):
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith(".txt"):
                file_path = os.path.join(root, file)
                escape_parentheses(file_path)

                # Recurse into subdirectories
                process_directory(os.path.join(root, file))

directory_path = r'C:\Users\kade\Desktop\training_dir_staging'
process_directory(directory_path)

### Replace underscores with spaces
----

Recursively replaces underscores with spaces in the content of text files in the specified directory and its subdirectories,
excluding specified tags.

In [3]:
import os

excluded_tags = [
    "score_safe",
    "score_explicit",
    "score_questionable"
]

def replace_underscores_with_spaces(directory_path):
    for root, dirs, files in os.walk(directory_path):
        for filename in files:
            if filename.endswith(".txt"):
                file_path = os.path.join(root, filename)

                # Read the content of the file
                with open(file_path, 'r') as file:
                    content = file.read()

                # Define tags to exclude from underscore replacement
                

                # Replace underscores with spaces, excluding specific tags
                for tag in excluded_tags:
                    replacement = tag.replace('_', ' ')
                    content = content.replace(replacement, tag)

                # Write the modified content back to the file
                with open(file_path, 'w') as file:
                    file.write(content)

# Specify the directory path
directory_path = r'C:\Users\kade\Desktop\training_dir_staging'

# Call the function to recursively replace underscores with spaces (excluding specified tags)
replace_underscores_with_spaces(directory_path)

### Remove extra file extension before .txt
----

In [2]:
import os

def rename_files(directory_path):
    """
    Recursively renames files with additional image extensions in the specified directory and its subdirectories.
    """
    for root, dirs, files in os.walk(directory_path):
        for filename in files:
            if filename.endswith('.txt'):
                # Extract the base name without extension
                base_name, extension = os.path.splitext(filename)

                # Check if the file has an additional image extension
                if base_name.endswith(('.png', '.jpg', '.jpeg', '.webp', '.gif')):
                    # Construct the new filename with only the txt extension
                    new_filename = base_name[:-4] + '.txt'

                    # Construct the full file paths
                    old_path = os.path.join(root, filename)
                    new_path = os.path.join(root, new_filename)

                    # Rename the file
                    os.rename(old_path, new_path)

# Specify the directory path
directory_path = r'C:\Users\kade\Desktop\training_dir_staging'

# Call the function to recursively rename files
rename_files(directory_path)

### Newlines to commas
----

Recursively modify the content of '.txt' files in the specified directory and its subdirectories by replacing newlines with commas and spaces. 

In [2]:
import os

def process_directory(directory):
    for root, dirs, files in os.walk(directory):
        for filename in files:
            if filename.endswith(".txt"):
                file_path = os.path.join(root, filename)
                
                # Read the content of the file
                with open(file_path, 'r') as file:
                    content = file.read()
                
                # Replace newline with a comma and space
                modified_content = content.replace('\n', ', ')
                
                # Write the modified content back to the file
                with open(file_path, 'w') as file:
                    file.write(modified_content)

# Directory path
directory_path = r'C:\Users\kade\Desktop\training_dir_staging'

# Recursively process the directory and its subdirectories
process_directory(directory_path)