In [7]:
# First cell - imports and function definition
import re
from pathlib import Path

def read_feynman_chapter(file_path: str) -> str:
    with open(file_path, 'r', encoding='utf-8') as f:
        return f.read()

# Second cell - reading and combining chapters
# Get all chapter directories
base_dir = Path("model_llm/data/feynman_content/volume_TOC/")  # Changed to match your path structure
print(f"Looking for chapters in: {base_dir.absolute()}")
print(f"Directory exists: {base_dir.exists()}")

chapter_dirs = sorted(base_dir.glob('chapter_*'))
print(f"Found {len(chapter_dirs)} chapter directories")

# Print first few chapter directories to verify
for chapter_dir in chapter_dirs[:3]:
    print(f"\nChecking chapter: {chapter_dir}")
    chapter_file = chapter_dir / 'full_chapter.txt'
    print(f"File exists: {chapter_file.exists()}")
    if chapter_file.exists():
        print(f"File size: {chapter_file.stat().st_size} bytes")

# Third cell - reading and combining text
# Read and combine all chapter texts
text_sequence = ""
for chapter_dir in chapter_dirs:
    chapter_file = chapter_dir / 'full_chapter.txt'
    if chapter_file.exists():
        text_sequence += read_feynman_chapter(chapter_file) + "\n\n"
        print(f"Loaded {chapter_dir.name}")

print(f"Total text length: {len(text_sequence)} characters")

# Fourth cell - saving the combined text
# Save the combined text to a file
output_file = Path("output/feynman_combined_text.txt")  # Changed to match your path structure
output_file.parent.mkdir(exist_ok=True)

with open(output_file, "w", encoding='utf-8') as f:
    f.write(text_sequence)

print(f"Saved combined text to {output_file}")

Looking for chapters in: /home/rorschach/model_llm/data/feynman_content/volume_TOC
Directory exists: True
Found 52 chapter directories

Checking chapter: model_llm/data/feynman_content/volume_TOC/chapter_1
File exists: True
File size: 40189 bytes

Checking chapter: model_llm/data/feynman_content/volume_TOC/chapter_10
File exists: True
File size: 39698 bytes

Checking chapter: model_llm/data/feynman_content/volume_TOC/chapter_11
File exists: True
File size: 39566 bytes
Loaded chapter_1
Loaded chapter_10
Loaded chapter_11
Loaded chapter_12
Loaded chapter_13
Loaded chapter_14
Loaded chapter_15
Loaded chapter_16
Loaded chapter_17
Loaded chapter_18
Loaded chapter_19
Loaded chapter_2
Loaded chapter_20
Loaded chapter_21
Loaded chapter_22
Loaded chapter_23
Loaded chapter_24
Loaded chapter_25
Loaded chapter_26
Loaded chapter_27
Loaded chapter_28
Loaded chapter_29
Loaded chapter_3
Loaded chapter_30
Loaded chapter_31
Loaded chapter_32
Loaded chapter_33
Loaded chapter_34
Loaded chapter_35
Loaded c