In [None]:
# Cell 1: Setup
import sys
sys.path.append('../src')

from data_loader import PoetryLoader
from analyzer import PoetryAnalyzer
from visualizer import PoetryVisualizer
import pandas as pd

analyzer = PoetryAnalyzer()
viz = PoetryVisualizer(output_dir='../output')

# Cell 2: Load Both Corpora
print("Loading Tang poems...")
tang_raw = analyzer.loader.load_tang_poems(max_poems=300)
tang_processed = analyzer.process_corpus(tang_raw)

print("Loading Song ci...")
song_raw = analyzer.loader.load_song_ci(max_poems=300)
song_processed = analyzer.process_corpus(song_raw)

print(f"\nTang: {len(tang_processed)} poems")
print(f"Song: {len(song_processed)} poems")

# Cell 3: Compare Distributions
viz.plot_dynasty_comparison(tang_processed, song_processed, 
                           save_name='tang_song_comparison.png')

# Cell 4: Statistical Comparison
comparison = analyzer.compare_dynasties(tang_processed, song_processed)

print("DYNASTY COMPARISON")
print("="*60)
print(f"\nTANG:")
print(f"  Total poems: {comparison['tang']['total_poems']}")
print(f"  Avg line length: {comparison['tang']['avg_line_length']:.2f}")
print(f"  Avg line count: {comparison['tang']['avg_line_count']:.2f}")

print(f"\nSONG:")
print(f"  Total poems: {comparison['song']['total_poems']}")
print(f"  Avg line length: {comparison['song']['avg_line_length']:.2f}")
print(f"  Avg line count: {comparison['song']['avg_line_count']:.2f}")

# Cell 5: Form Comparison
viz.plot_form_comparison_grouped(tang_processed, song_processed,
                                save_name='form_comparison_grouped.png')
