# Notebook Testing Harness

This notebook executes all Jupyter notebooks in the project recursively and reports their completion status.

It serves as a testing harness to verify that all notebooks run successfully without errors.

In [None]:
from aips.spark import get_spark_session

import os
import glob
import nbformat
from nbconvert.preprocessors import ExecutePreprocessor
from datetime import datetime
import sys
from aips import set_engine
import traceback
from pathlib import Path
import json
from tqdm.notebook import tqdm

spark = get_spark_session()

## Configuration

In [None]:
KERNEL_NAME = "python3"
KERNEL_OVERRIDES = {"1.open-information-extraction.ipynb": "ch5-spacy"}
EXCLUDE_DIRS = [".ipynb_checkpoints", "__pycache__"]
EXPORT_RESULTS = False
EXCLUDED_NOTEBOOKS = ["bonus.related-terms-from-documents.ipynb"
                      "bonus.phrase-detection.ipynb",
                      "bonus.phrase-detection.ipynb",
                      "bonus.related-terms-from-documents.ipynb",
                      "a.defunct.synthesize-search-sessions.ipynb",
                      "a.synthesize-search-sessions.ipynb",
                      "a.generate-movie-embeddings.ipynb",
                      "welcome.ipynb",
                      "aips-test-suite.ipynb",
                      "4.train-upload-search-ltr.ipynb",
                      "ch13-tokenizer-analysis.ipynb"]
ALL_CHAPTERS = ["ch3", "ch4", "ch5", "ch6", "ch7", "ch8","ch9", "ch10", "ch11", 
                "ch12", "ch13", "ch14", "ch15"]
ALL_ENGINES = ["solr", "opensearch"]

## Utility Functions

In [None]:
def format_time(seconds):
    if seconds < 60:
        formatted = f"{seconds:.2f}s"
    elif seconds < 3600:
        formatted =  f"{seconds / 60:.2f}m"
    else:
        formatted =  f"{seconds / 3600:.2f}h"
    return formatted

def export_results(results, filename="test_results.json"):
    if results:
        with open(filename, "w") as f:
            json.dump({"timestamp": datetime.now().isoformat(),
                       "results": results["details"],
                       "summary": results["summary"]}, f, indent=2)
        print(f"📄 Results exported to {filename}")

In [None]:
def get_notebook_files(root_dir=".", exclude_dirs=None, excluded_notebooks=None):
    exclude_dirs = exclude_dirs or EXCLUDE_DIRS    
    excluded_notebooks = excluded_notebooks or EXCLUDED_NOTEBOOKS
    notebook_files = []
    for directory in os.walk(root_dir):
        for path in Path(directory[0]).rglob("*.ipynb"):
            if any(exclude_dir in str(path) for exclude_dir in exclude_dirs) or \
                path.name in excluded_notebooks:
                continue
            if str(path) not in notebook_files:
                notebook_files.append(str(path))
    return list(map(Path, sorted(notebook_files)))

In [None]:
def execute_notebook(notebook_path, timeout=600, kernel_name="python3"):
    start_time = datetime.now()
    
    try:
        with open(notebook_path, "r", encoding="utf-8") as f:
            nb = nbformat.read(f, as_version=4)
            ep = ExecutePreprocessor(timeout=timeout, kernel_name=kernel_name)
            ep.preprocess(nb, {"metadata": {"path": os.path.dirname(notebook_path)}})
        execution_time = (datetime.now() - start_time).total_seconds()
        return True, execution_time, None
    
    except Exception as e:
        execution_time = (datetime.now() - start_time).total_seconds()
        error_type = type(e).__name__
        error_msg = str(e)
        tb = traceback.format_exc()
        
        return False, execution_time, {"type": error_type,
                                       "message": error_msg,
                                       "traceback": tb}

## Main Testing Function

In [None]:
def run_test_harness(exclude_dirs=None, exclude_notebooks=None, stop_on_failure=True,
                     chapter_to_run=None, verbose_errors=True):
    root_dir = "."
    timeout = 600 
    print(f"🔍 Notebook Testing Harness - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print(f"📁 Testing from root directory: {os.path.abspath(root_dir)}")
    print("=" * 80)

    notebook_files = get_notebook_files(root_dir, exclude_dirs, exclude_notebooks)

    if chapter_to_run:
        notebook_files = [f for f in notebook_files if (chapter_to_run in str(f))]

    print(f"📋 Found {len(notebook_files)} notebook(s) to test.")
    for nb_file in notebook_files:
        rel_path = os.path.relpath(str(nb_file), root_dir)
        print(f"   • {rel_path}")
    print()
    
    results = {"details": [],
               "summary": {"total": len(notebook_files),
                           "successful": 0,
                           "failed": 0,
                           "total_time": 0}}
    
    print("🚀 Executing notebooks:")
    for notebook_path in tqdm(notebook_files, desc="Progress", colour="purple"):
        rel_path = os.path.relpath(str(notebook_path), root_dir)
        print(f"\n📔 Testing: {rel_path}")
        if "checkpoint" in str(notebook_path):
            print(f"\n📔 Skipping: {rel_path}")
            continue
        kernel_name = KERNEL_OVERRIDES.get(notebook_path.name, "python3")
        success, execution_time, error = execute_notebook(str(notebook_path), timeout, kernel_name)
        
        results["summary"]["total_time"] += execution_time
        
        if success:
            print(f"   ✅ SUCCESS - Completed in {format_time(execution_time)}")
            results["summary"]["successful"] += 1
        else:
            print(f"   ❌ FAILED - Error after {format_time(execution_time)}")
            print(f"      Error: {error['type']}")
            results["summary"]["failed"] += 1
        
        results["details"].append({"notebook": rel_path,
                                   "success": success,
                                   "execution_time": execution_time,
                                   "error": error})
        if not success and stop_on_failure:
            print("Terminating test run due to test failure.")
            break
    
    print("\n" + "=" * 80)
    print(f"📊 SUMMARY:")
    print(f"   Total notebooks tested: {results['summary']['total']}")
    print(f"   ✅ Successful: {results['summary']['successful']}")
    if results["summary"]["failed"] > 0:
        print(f"   ❌ Failed: {results['summary']['failed']}")
    
    success_rate = (results["summary"]["successful"] / results["summary"]["total"]) * 100
    print(f"   Success rate: {success_rate:.1f}%")
    print(f"   Total execution time: {format_time(results['summary']['total_time'])}")

    if results["summary"]["failed"] > 0:
        print(f"\n❌ FAILED NOTEBOOKS:")
        for result in results["details"]:
            if not result["success"]:
                if verbose_errors:
                    print(f"   • {result['notebook']}: {result['error']['type']}: {result['error']['message']}")
                else:
                    print(f"   • {result['notebook']}: {result['error']['type']}")
    
    print(f"\n🏁 Testing completed at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    
    if EXPORT_RESULTS and results:
        export_results(results, "AIPS_test_resulsts.json")
    
    return results

In [None]:
# Last cell in chapter 3 will fail for most non-solr notebookes without {!funct} translation capabilities
# Chapter 15 for opensearch fails when creating a view from the tmdb_with_embeddings collection. The image embedding vector fails to load into the datafram
for engine in ALL_ENGINES:
    set_engine(engine)    
    results = run_test_harness(stop_on_failure=False)