In [None]:
import os
import pandas as pd

from google.colab import drive
try:
    drive.flush_and_unmount()
except Exception:
    pass

drive.mount('/content/drive', force_remount=True)

def concatenate_dialogues(input_dir="chapters", output_file="full_dialogue.txt"):
    # Find all CSV files that match the pattern
    csv_files = sorted(
        [f for f in os.listdir(input_dir) if f.endswith("_dialogues.csv")]
    )

    if not csv_files:
        print(f"No dialogue CSV files found in '{input_dir}'.")
        return

    all_texts = []

    for file in csv_files:
        path = os.path.join(input_dir, file)
        try:
            df = pd.read_csv(path)
        except Exception as e:
            print(f"Skipping {file}: {e}")
            continue

        # Try to find the dialogue column automatically
        dialogue_col = None
        for col in df.columns:
            if "dialogue" in col.lower() or "text" in col.lower():
                dialogue_col = col
                break

        if dialogue_col is None:
            print(f"Warning: no dialogue column found in {file}, skipping.")
            continue

        # Collect all non-empty dialogue lines
        lines = df[dialogue_col].dropna().astype(str).tolist()
        all_texts.extend(lines)

    # Write everything into one text file
    with open(output_file, "w", encoding="utf-8") as out:
        for line in all_texts:
            out.write(line.strip() + "\n")

    print(f"âœ… Combined {len(csv_files)} files into '{output_file}' ({len(all_texts)} lines).")


concatenate_dialogues(input_dir="/content/drive/MyDrive/digphil/dialogues_filtered", output_file="/content/drive/MyDrive/digphil/dialogues_filtered/full_dialogue.txt")


Drive not mounted, so nothing to flush and unmount.
Mounted at /content/drive
