In [None]:
# notebooks/3_run_collection.ipynb

import os
import sys
import getpass
import shutil
from datetime import datetime
sys.path.append(os.path.abspath('../src'))

from config import *
from collectors.aem_collector import AEMCollector
from collectors.pdf_collector import PDFCollector
from collectors.package_builder import create_package


In [None]:
# --- 1. AEM 인증 정보 입력 ---
print("🔐 AEM 접속 정보를 입력하세요.")
username = input("사용자 이름: ")
password = getpass.getpass("비밀번호: ")


In [None]:
all_collected_files = []

# --- 2. AEM 배치 처리 ---
print("\n" + "="*80)
print("--- 2. AEM 페이지 배치 처리를 시작합니다. ---")
while True:
    batch_files = sorted([f for f in os.listdir(AEM_BATCHES_TODO_DIR) if f.endswith('.list')])
    if not batch_files:
        print("\n✅ 모든 AEM 배치가 처리되었습니다.")
        break
    
    current_batch_file = batch_files[0]
    current_batch_path = os.path.join(AEM_BATCHES_TODO_DIR, current_batch_file)
    
    print(f"\n🤖 다음 AEM 배치를 처리합니다: {current_batch_file}")
    
    with open(current_batch_path, 'r', encoding='utf-8') as f:
        page_paths = [line.strip() for line in f if line.strip()]
        
    if not page_paths:
        print("   - ✅ 배치 파일이 비어있어 건너뜁니다.")
        shutil.move(current_batch_path, os.path.join(AEM_BATCHES_DONE_DIR, current_batch_file))
        continue

    try:
        collector = AEMCollector(username, password)
        collected_files = collector.collect_snapshots_for_batch(page_paths)
        all_collected_files.extend(collected_files)
        
        print(f"   - ✅ 배치 성공: {len(collected_files)}개 파일 수집.")
        shutil.move(current_batch_path, os.path.join(AEM_BATCHES_DONE_DIR, current_batch_file))
        
    except Exception as e:
        print(f"   - ❌ 배치 실패: {current_batch_file}. 오류: {e}"); break


In [None]:

# --- 3. PDF 배치 처리 ---
print("\n" + "="*80)
print("--- 3. PDF 목록 배치 처리를 시작합니다. ---")
while True:
    batch_files = sorted([f for f in os.listdir(PDF_BATCHES_TODO_DIR) if f.endswith('.csv')])
    if not batch_files:
        print("\n✅ 모든 PDF 배치가 처리되었습니다.")
        break

    current_batch_file = batch_files[0]
    current_batch_path = os.path.join(PDF_BATCHES_TODO_DIR, current_batch_file)
    
    print(f"\n🤖 다음 PDF 배치를 처리합니다: {current_batch_file}")
    
    try:
        collector = PDFCollector(username, password)
        collected_files = collector.collect_pdfs_for_batch(current_batch_path)
        all_collected_files.extend(collected_files)
        
        print(f"   - ✅ 배치 성공: {len(collected_files)}개 파일 수집.")
        shutil.move(current_batch_path, os.path.join(PDF_BATCHES_DONE_DIR, current_batch_file))
        
    except Exception as e:
        print(f"   - ❌ 배치 실패: {current_batch_file}. 오류: {e}"); break


In [None]:

# --- 4. 최종 패키징 ---
if all_collected_files:
    package_name = f"data_package_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
    create_package(package_name, all_collected_files)
else:
    print("\n⚠️ 수집된 파일이 없어 패키지를 생성하지 않습니다.")

print("\n🎉 모든 데이터 수집 및 패키징 작업이 완료되었습니다!")