In [None]:
import re
import os
import csv

from google.colab import drive
try:
    drive.flush_and_unmount()
except Exception:
    pass

drive.mount('/content/drive', force_remount=True)

def extract_dialogue_to_csv_cleaned(input_dir="/content/drive/MyDrive/digphil/chapters", output_dir="/content/drive/MyDrive/digphil/dialogues"):
    # Ensure output directory exists
    os.makedirs(output_dir, exist_ok=True)

    # Regex to find text within curly double quotes
    # Adjust if your text uses straight quotes " "
    # This pattern captures the content inside the quotes
    dialogue_pattern = re.compile(r'"(.+?)"') # Using .+? to match one or more characters non-greedily inside quotes

    # Iterate over chapter files in the input directory
    for filename in sorted(os.listdir(input_dir)):
        if filename.startswith("chapter_") and filename.endswith(".txt"):
            input_filepath = os.path.join(input_dir, filename)
            # Define the output CSV filename
            output_filename = filename.replace(".txt", "_dialogues.csv")
            output_filepath = os.path.join(output_dir, output_filename)

            chapter_dialogue = []

            try:
                with open(input_filepath, "r", encoding="utf-8") as f:
                    chapter_text = f.read()

                # Find all matches of the dialogue pattern in the chapter text
                # findall with a group returns a list of the group contents
                found_dialogue = dialogue_pattern.findall(chapter_text)

                # Add non-empty dialogue lines to the list, preserving original quotes if needed, or just content
                # Here I'll add the content found within the quotes, without the quotes themselves.
                # If you need the quotes included, change `line` to f'“{line}”' below.
                for line in found_dialogue:
                    if line.strip(): # Check if the extracted content is not just whitespace
                         chapter_dialogue.append(line.strip()) # Add the stripped content

            except Exception as e:
                print(f"Error processing file {input_filepath}: {e}")
                continue # Skip to the next file if there's an error

            # Write the extracted dialogue to a CSV file
            if chapter_dialogue:
                try:
                    with open(output_filepath, "w", newline="", encoding="utf-8") as csvfile:
                        writer = csv.writer(csvfile)
                        # Write the header row
                        writer.writerow(["dialogue"])
                        # Write each dialogue line as a row
                        for line in chapter_dialogue:
                            writer.writerow([line])
                    print(f"Saved {len(chapter_dialogue)} non-empty lines of dialogue to '{output_filepath}'.")
                except Exception as e:
                    print(f"Error writing to CSV file {output_filepath}: {e}")
            else:
                print(f"No non-empty dialogue found in {input_filepath}. No CSV file created.")

extract_dialogue_to_csv_cleaned("/content/drive/MyDrive/digphil/chapters", "/content/drive/MyDrive/digphil/dialogues")

Drive not mounted, so nothing to flush and unmount.
Mounted at /content/drive
Saved 148 non-empty lines of dialogue to '/content/drive/MyDrive/digphil/dialogues/chapter_1_dialogues.csv'.
Saved 31 non-empty lines of dialogue to '/content/drive/MyDrive/digphil/dialogues/chapter_10_dialogues.csv'.
Saved 21 non-empty lines of dialogue to '/content/drive/MyDrive/digphil/dialogues/chapter_11_dialogues.csv'.
Saved 99 non-empty lines of dialogue to '/content/drive/MyDrive/digphil/dialogues/chapter_12_dialogues.csv'.
Saved 64 non-empty lines of dialogue to '/content/drive/MyDrive/digphil/dialogues/chapter_13_dialogues.csv'.
Saved 38 non-empty lines of dialogue to '/content/drive/MyDrive/digphil/dialogues/chapter_14_dialogues.csv'.
Saved 48 non-empty lines of dialogue to '/content/drive/MyDrive/digphil/dialogues/chapter_15_dialogues.csv'.
Saved 59 non-empty lines of dialogue to '/content/drive/MyDrive/digphil/dialogues/chapter_16_dialogues.csv'.
Saved 64 non-empty lines of dialogue to '/content/