# Clear All Session Data

**WARNING:** This will clear ALL records from all session_* tables and CSV files. This action cannot be undone!

**Use this when:**
- You want to start fresh with clean data
- You need to reset all session data
- After testing or development

**What it does:**
1. Deletes all records from session_* tables in DynamoDB (if enabled)
2. Truncates all session_*.csv files (keeps headers, removes all data rows)
3. Clears Summary_Sessions table/file
4. Initializes Summary_Sessions with default values

**Note:** Question files (GetToKnowQuestions.csv, RedFlagQuestions.csv, etc.) are NOT affected.


In [None]:
import sys
from pathlib import Path

# Add project root to path
project_root = Path().resolve().parent.parent
sys.path.insert(0, str(project_root))

from src.adapters.database.database_handler import DatabaseHandler
from src.utils.summary_initializer import initialize_summary_sessions
import pandas as pd
from src.utils.constants import CSV_SEPARATOR


## Configuration

Set `USE_DYNAMODB = True` to use DynamoDB, or `False` to use CSV files.


In [None]:
# Configuration
USE_DYNAMODB = True  # Set to False to use CSV instead


In [None]:
def clear_all_session_data(use_dynamodb: bool = True, confirm: bool = False):
    """
    Clear all session data from both DynamoDB and CSV files.
    
    This function:
    1. Deletes all records from session_* tables in DynamoDB (if use_dynamodb=True)
    2. Truncates all session_*.csv files (keeps headers, removes all data rows)
    3. Clears Summary_Sessions table/file
    4. Initializes Summary_Sessions with default values
    
    Args:
        use_dynamodb: If True, clear DynamoDB tables; if False, only clear CSV files
        confirm: Must be True to actually perform the truncation (safety check)
    
    Returns:
        True if successful, False otherwise
    """
    if not confirm:
        print("=" * 60)
        print("SAFETY CHECK")
        print("=" * 60)
        print("[WARNING] This will truncate ALL session data!")
        print("To proceed, call this function with confirm=True")
        print("=" * 60)
        return False
    
    print("=" * 60)
    print("Clearing All Session Data")
    print("=" * 60)
    print(f"Backend: {'DynamoDB' if use_dynamodb else 'CSV only'}")
    print("=" * 60)
    
    session_tables = [
        "session_responses",
        "session_gtk_responses",
        "session_feedback",
        "session_toxicity_rating",
        "session_insights",
    ]
    
    # Clear DynamoDB tables
    if use_dynamodb:
        print("\n[1] Clearing DynamoDB tables...")
        db_handler = DatabaseHandler(db_read_allowed=True, db_write_allowed=True)
        
        try:
            for table_name in session_tables:
                try:
                    table = db_handler.backend.dynamodb.Table(table_name)
                    
                    # Scan and delete all items
                    deleted_count = 0
                    while True:
                        response = table.scan()
                        items = response.get("Items", [])
                        
                        if not items:
                            break
                        
                        # Delete items in batch
                        with table.batch_writer() as batch:
                            for item in items:
                                # Get the primary key (usually 'id')
                                key = {"id": item["id"]}
                                batch.delete_item(Key=key)
                                deleted_count += 1
                        
                        # Check if there are more items
                        if "LastEvaluatedKey" not in response:
                            break
                    
                    print(f"  [OK] Cleared {table_name}: {deleted_count} records deleted")
                except Exception as e:
                    print(f"  [WARNING] Could not clear {table_name}: {e}")
            
            # Clear Summary_Sessions
            try:
                table = db_handler.backend.dynamodb.Table("Summary_Sessions")
                response = table.scan()
                items = response.get("Items", [])
                
                with table.batch_writer() as batch:
                    for item in items:
                        key = {"summary_id": item["summary_id"]}
                        batch.delete_item(Key=key)
                
                print(f"  [OK] Cleared Summary_Sessions: {len(items)} records deleted")
            except Exception as e:
                print(f"  [WARNING] Could not clear Summary_Sessions: {e}")
            
            db_handler.close()
            print("[OK] DynamoDB tables cleared")
        except Exception as e:
            print(f"[ERROR] Error clearing DynamoDB: {e}")
            db_handler.close()
    
    # Truncate CSV files (keep headers, remove all data rows)
    print("\n[2] Truncating CSV files...")
    data_dir = project_root / "data"
    
    truncated_files = []
    for table_name in session_tables:
        csv_file = data_dir / f"{table_name}.csv"
        if csv_file.exists():
            try:
                # Read the file to get headers
                df = pd.read_csv(csv_file, sep=CSV_SEPARATOR, nrows=0)  # Read only headers
                # Write back only headers (empty DataFrame with same columns)
                df.to_csv(csv_file, sep=CSV_SEPARATOR, index=False)
                truncated_files.append(csv_file.name)
                print(f"  [OK] Truncated {csv_file.name} (kept headers, removed all data)")
            except Exception as e:
                print(f"  [ERROR] Could not truncate {csv_file.name}: {e}")
    
    # Truncate Summary_Sessions.csv
    summary_file = data_dir / "Summary_Sessions.csv"
    if summary_file.exists():
        try:
            # Read the file to get headers
            df = pd.read_csv(summary_file, sep=CSV_SEPARATOR, nrows=0)  # Read only headers
            # Write back only headers (empty DataFrame with same columns)
            df.to_csv(summary_file, sep=CSV_SEPARATOR, index=False)
            truncated_files.append(summary_file.name)
            print(f"  [OK] Truncated {summary_file.name} (kept headers, removed all data)")
        except Exception as e:
            print(f"  [ERROR] Could not truncate {summary_file.name}: {e}")
    
    print(f"\n[OK] CSV files truncated: {len(truncated_files)} files cleared")
    
    # Initialize Summary_Sessions with default values after clearing
    print("\n[3] Initializing Summary_Sessions with default values...")
    if use_dynamodb:
        db_handler = DatabaseHandler(db_read_allowed=True, db_write_allowed=True)
    else:
        db_handler = DatabaseHandler(db_read_allowed=False, db_write_allowed=True)
    
    try:
        success = initialize_summary_sessions(db_handler)
        if success:
            print("[OK] Summary_Sessions initialized with default values")
        else:
            print("[WARNING] Summary_Sessions initialization failed")
    except Exception as e:
        print(f"[WARNING] Error initializing Summary_Sessions: {e}")
    finally:
        db_handler.close()
    
    print("\n" + "=" * 60)
    print("[SUCCESS] All session data cleared and Summary_Sessions initialized!")
    print("=" * 60)
    return True


In [None]:
# Clear all session data
# WARNING: This will truncate ALL session records!
# Uncomment and set confirm=True to proceed
# clear_all_session_data(use_dynamodb=USE_DYNAMODB, confirm=False)  # Set to True to actually truncate
