man-group · jonbannister · Dec 8, 2021 · Nov 19, 2021 · Nov 21, 2021 · Nov 23, 2021
diff --git a/README.md b/README.md
@@ -16,7 +16,10 @@ Productionise and schedule your Jupyter Notebooks, just as interactively as you
 ![Screenshot of some notebook results](https://raw.githubusercontent.com/man-group/notebooker/master/docs/images/nbkr_results.png)
 
 ## All results are accessible from the home page
-![Screenshot of Executing a notebook](https://raw.githubusercontent.com/man-group/notebooker/master/docs/images/nbkr_homepage.png)
+![Screenshot of the Notebooker homepage](https://raw.githubusercontent.com/man-group/notebooker/master/docs/images/nbkr_homepage.png)
+
+## Drill down into each template's results
+![Screenshot of result listings](https://raw.githubusercontent.com/man-group/notebooker/master/docs/images/nbkr_results_listing.png)
 
 
 ## Getting started

diff --git a/docs/images/nbkr_homepage.png b/docs/images/nbkr_homepage.png
diff --git a/docs/images/nbkr_results_listing.png b/docs/images/nbkr_results_listing.png
diff --git a/docs/images/notebooker_homepage.png b/docs/images/notebooker_homepage.png
diff --git a/docs/webapp/webapp.rst b/docs/webapp/webapp.rst
@@ -2,22 +2,29 @@ The Notebooker webapp
 =====================
 
 Notebooker's primary interface is a simple webapp written to allow users to view and
-run Notebooker reports. It displays all results in a handy grid, and allows for rerunning
+run Notebooker reports. It first displays all unique template names which have ever run, and a drill-down
+view lists all results for that notebook template in a handy grid, allowing for rerunning
 and parameter tweaking.
 The entrypoint used to run Notebooks via the webapp is the
-same as the external API, so as long as you are using the same environment (e.g. within
+same as the external API; as long as you are using the same environment (e.g. within
 a docker image) you will get consistent results.
 
 
 Report dashboard
 ----------------
 The home page of the Notebooker webapp displays an overview of all reports which have recently run.
+
+.. image:: /images/nbkr_homepage.png
+   :width: 400
+   :alt: Screenshot of Notebooker webapp homepage
+
+Clicking on one of these elements will bring up an overview of all reports which have recently run.
 It is possible to view each full report by clicking "Result". It's also possible to rerun, delete, and
 copy parameters of each report in the grid.
 
-.. image:: /images/notebooker_homepage.png
+.. image:: /images/nbkr_results_listing.png
    :width: 400
-   :alt: Screenshot of Notebooker webapp homepage
+   :alt: Screenshot of Notebooker results listing
 
 
 Running a report
@@ -40,7 +47,7 @@ Running a report
 .. warning::
     In order to prevent users having to write JSON, the Override parameters box actually takes raw python statements
     and converts them into JSON. Therefore, it is strongly recommended that you run Notebooker in an environment
-    where you either completely trust all of the user base,  or within a docker container
+    where you either completely trust all of the user base, or within a docker container
     where executing variable assignments will not have any negative side-effects.
 
 Customisable elements:
@@ -49,6 +56,7 @@ Customisable elements:
 * Override parameters - the values which will override the parameters in the report (in python). Can be left blank.
 * Email to - upon completion of the report, who should it be emailed to? Can be left blank.
 * Generate PDF output - whether to generate PDFs or not. Requires xelatex to be installed - see :ref:`export to pdf`
+* Hide code from email and PDF output - whether to display the notebook code when producing output emails and PDFs.
 
 Viewing results
 ---------------
@@ -67,6 +75,7 @@ If the job fails, the stack trace will be presented to allow for easier debuggin
 
 
 | If the job succeeds, the .ipynb will have been converted into HTML for viewing on this page.
+| **Please note** for user convenience, all notebook code is hidden by default.
 | You can also get to this view by clicking the blue "Result" button on the homepage.
 | If you are using a framework such as seaborn or matplotlib, the images will be available and served by the webapp.
 | If you are using plotly, you can use offline mode to store the required javascript within the HTML render,

diff --git a/notebooker/constants.py b/notebooker/constants.py
@@ -8,6 +8,7 @@
 
 SUBMISSION_TIMEOUT = 3
 RUNNING_TIMEOUT = 60
+DEFAULT_RESULT_LIMIT = 100
 CANCEL_MESSAGE = "The webapp shut down while this job was running. Please resubmit with the same parameters."
 TEMPLATE_DIR_SEPARATOR = "^"
 DEFAULT_SERIALIZER = "PyMongoResultSerializer"

diff --git a/notebooker/serialization/mongo.py b/notebooker/serialization/mongo.py
@@ -1,5 +1,7 @@
 import datetime
 import json
+from collections import Counter, defaultdict
+
 from abc import ABC
 from logging import getLogger
 from typing import Any, AnyStr, Dict, List, Optional, Tuple, Union, Iterator
@@ -300,20 +302,45 @@ def get_check_result(
         result = self.library.find_one({"job_id": job_id}, {"_id": 0})
         return self._convert_result(result)
 
+    def _get_raw_results(self, base_filter, projection, limit):
+        if "status" in base_filter:
+            base_filter["status"].update({"$ne": JobStatus.DELETED.value})
+        else:
+            base_filter["status"] = {"$ne": JobStatus.DELETED.value}
+        return self.library.find(base_filter, projection).sort("update_time", -1).limit(limit)
+
+    def get_count_and_latest_time_per_report(self):
+        reports = list(
+            self._get_raw_results(
+                base_filter={},
+                projection={"report_name": 1, "job_start_time": 1, "scheduler_job_id": 1, "_id": 0},
+                limit=0,
+            )
+        )
+        jobs_by_name = defaultdict(list)
+        for r in reports:
+            jobs_by_name[r["report_name"]].append(r)
+        output = {}
+        for report, all_runs in jobs_by_name.items():
+            latest_start_time = max(r["job_start_time"] for r in all_runs)
+            scheduled_runs = len([x for x in all_runs if x.get("scheduler_job_id")])
+            output[report] = {"count": len(all_runs), "latest_run": latest_start_time, "scheduler_runs": scheduled_runs}
+        return output
+
     def get_all_results(
         self,
         since: Optional[datetime.datetime] = None,
         limit: Optional[int] = 100,
         mongo_filter: Optional[Dict] = None,
         load_payload: bool = True,
     ) -> Iterator[Union[NotebookResultComplete, NotebookResultError, NotebookResultPending]]:
-        base_filter = {"status": {"$ne": JobStatus.DELETED.value}}
+        base_filter = {}
         if mongo_filter:
             base_filter.update(mongo_filter)
         if since:
             base_filter.update({"update_time": {"$gt": since}})
         projection = REMOVE_ID_PROJECTION if load_payload else REMOVE_PAYLOAD_FIELDS_AND_ID_PROJECTION
-        results = self.library.find(base_filter, projection).sort("update_time", -1).limit(limit)
+        results = self._get_raw_results(base_filter, projection, limit)
         for res in results:
             if res:
                 converted_result = self._convert_result(res, load_payload=load_payload)
@@ -404,8 +431,8 @@ def get_latest_successful_job_ids_for_name_all_params(self, report_name: str) ->
 
         return [result["job_id"] for result in results]
 
-    def n_all_results(self):
-        return self.library.find({"status": {"$ne": JobStatus.DELETED.value}}).count()
+    def n_all_results_for_report_name(self, report_name: str) -> int:
+        return self._get_raw_results({"report_name": report_name}, {}, 0).count()
 
     def delete_result(self, job_id: AnyStr) -> None:
         self.update_check_status(job_id, JobStatus.DELETED)

diff --git a/notebooker/utils/results.py b/notebooker/utils/results.py
@@ -1,7 +1,11 @@
+import datetime
+from collections import defaultdict
 from datetime import datetime as dt
 from logging import getLogger
 from typing import Callable, Dict, Iterator, List, Mapping, Optional, Tuple
 
+import babel.dates
+import inflection
 from flask import url_for
 
 from notebooker import constants
@@ -106,9 +110,10 @@ def get_all_result_keys(
     return all_keys
 
 
-def get_all_available_results_json(serializer: MongoResultSerializer, limit: int) -> List[constants.NotebookResultBase]:
+def get_all_available_results_json(serializer: MongoResultSerializer, limit: int, report_name: str = None) -> List[constants.NotebookResultBase]:
     json_output = []
-    for result in serializer.get_all_results(limit=limit, load_payload=False):
+    mongo_filter = {"report_name": report_name} if report_name is not None else {}
+    for result in serializer.get_all_results(mongo_filter=mongo_filter, limit=limit, load_payload=False):
         output = result.saveable_output()
         output["result_url"] = url_for(
             "serve_results_bp.task_results", job_id=output["job_id"], report_name=output["report_name"]
@@ -126,6 +131,16 @@ def get_all_available_results_json(serializer: MongoResultSerializer, limit: int
     return json_output
 
 
+def get_count_and_latest_time_per_report(serializer: MongoResultSerializer):
+    reports = serializer.get_count_and_latest_time_per_report()
+    output = {}
+    for report_name, metadata in sorted(reports.items(), key=lambda x: x[1]["latest_run"], reverse=True):
+        metadata["report_name"] = report_name
+        metadata["time_diff"] = babel.dates.format_timedelta(datetime.datetime.utcnow() - metadata["latest_run"])
+        output[inflection.titleize(report_name)] = metadata
+    return output
+
+
 def get_latest_successful_job_results_all_params(
     report_name: str,
     serializer: MongoResultSerializer,

diff --git a/notebooker/web/app.py b/notebooker/web/app.py
@@ -121,7 +121,10 @@ def setup_app(flask_app: Flask, web_config: WebappConfig):
     logging.basicConfig(level=logging.getLevelName(web_config.LOGGING_LEVEL))
     flask_app.config.from_object(web_config)
     flask_app.config.update(
-        TEMPLATES_AUTO_RELOAD=web_config.DEBUG, EXPLAIN_TEMPLATE_LOADING=True, DEBUG=web_config.DEBUG
+        TEMPLATES_AUTO_RELOAD=web_config.DEBUG,
+        EXPLAIN_TEMPLATE_LOADING=True,
+        DEBUG=web_config.DEBUG,
+        TESTING=web_config.DEBUG,
     )
     flask_app = setup_scheduler(flask_app, web_config)
     return flask_app

diff --git a/notebooker/web/report_hunter.py b/notebooker/web/report_hunter.py
@@ -39,6 +39,7 @@ def _report_hunter(webapp_config: WebappConfig, run_once: bool = False, timeout:
                 JobStatus.SUBMITTED: now - datetime.timedelta(minutes=SUBMISSION_TIMEOUT),
                 JobStatus.PENDING: now - datetime.timedelta(minutes=RUNNING_TIMEOUT),
             }
+            cutoff.update({k.value: v for (k, v) in cutoff.items()})  # Add value to dict for backwards compat
             for result in all_pending:
                 this_cutoff = cutoff.get(result.status)
                 if result.job_start_time <= this_cutoff:

diff --git a/notebooker/web/routes/core.py b/notebooker/web/routes/core.py
@@ -1,7 +1,8 @@
 from flask import Blueprint, jsonify, request
 
 import notebooker.version
-from notebooker.utils.results import get_all_available_results_json
+from notebooker.constants import DEFAULT_RESULT_LIMIT
+from notebooker.utils.results import get_all_available_results_json, get_count_and_latest_time_per_report
 from notebooker.web.utils import get_serializer, get_all_possible_templates, all_templates_flattened
 
 core_bp = Blueprint("core_bp", __name__)
@@ -24,13 +25,24 @@ def user_profile():
 def all_available_results():
     """
     Core function for the homepage/index page which returns all available results.
-    Defaults to the top 50 results.
+    Defaults to the top DEFAULT_RESULT_LIMIT results.
 
     :returns: A JSON containing a list of results. The actual payload data is substituted with URLs that would \
     kick off a download, if requested.
     """
-    limit = int(request.args.get("limit", 50))
-    return jsonify(get_all_available_results_json(get_serializer(), limit))
+    limit = int(request.args.get("limit") or DEFAULT_RESULT_LIMIT)
+    report_name = request.args.get("report_name")
+    return jsonify(get_all_available_results_json(get_serializer(), limit, report_name=report_name))
+
+
+@core_bp.route("/core/get_all_templates_with_results")
+def all_available_templates_with_results():
+    """
+    Core function for the index.html view which shows the templates which have results available.
+
+    :returns: A JSON containing a list of template names with a count of how many results are in each.
+    """
+    return jsonify(get_count_and_latest_time_per_report(get_serializer()))
 
 
 @core_bp.route("/core/all_possible_templates")

diff --git a/notebooker/web/routes/index.py b/notebooker/web/routes/index.py
@@ -1,7 +1,8 @@
 import traceback
 
+import inflection
 from flask import Blueprint, current_app, request, render_template, url_for, jsonify
-from notebooker.constants import JobStatus
+from notebooker.constants import JobStatus, DEFAULT_RESULT_LIMIT
 from notebooker.utils.results import get_all_result_keys
 from notebooker.web.utils import get_serializer, get_all_possible_templates
 
@@ -10,21 +11,41 @@
 
 @index_bp.route("/", methods=["GET"])
 def index():
+    """
+    The index page which shows cards of each report which has at least one result in the database.
+    """
+    username = request.headers.get("X-Auth-Username")
+    all_reports = get_all_possible_templates()
+    with current_app.app_context():
+        result = render_template(
+            "index.html",
+            all_reports=all_reports,
+            donevalue=JobStatus.DONE,  # needed so we can check if a result is available
+            username=username,
+        )
+        return result
+
+
+@index_bp.route("/result_listing/<path:report_name>", methods=["GET"])
+def result_listing(report_name):
     """
     The index page which returns a blank table which is async populated by /core/all_available_results.
     Async populating the table from a different URL means that we can lock down the "core" blueprint to
     only users with correct privileges.
     """
     username = request.headers.get("X-Auth-Username")
+    result_limit = int(request.args.get("limit") or DEFAULT_RESULT_LIMIT)
     all_reports = get_all_possible_templates()
     with current_app.app_context():
         result = render_template(
-            "index.html",
-            all_jobs_url=url_for("core_bp.all_available_results"),
+            "result_listing.html",
             all_reports=all_reports,
-            n_results_available=get_serializer().n_all_results(),
             donevalue=JobStatus.DONE,  # needed so we can check if a result is available
             username=username,
+            report_name=report_name,
+            result_limit=result_limit,
+            n_results_available=get_serializer().n_all_results_for_report_name(report_name),
+            titleised_report_name=inflection.titleize(report_name)
         )
         return result
 
@@ -41,7 +62,7 @@ def delete_report(job_id):
     """
     try:
         get_serializer().delete_result(job_id)
-        get_all_result_keys(get_serializer(), limit=50, force_reload=True)
+        get_all_result_keys(get_serializer(), limit=DEFAULT_RESULT_LIMIT, force_reload=True)
         result = {"status": "ok"}
     except Exception:
         error_info = traceback.format_exc()

diff --git a/notebooker/web/routes/run_report.py b/notebooker/web/routes/run_report.py
@@ -103,14 +103,28 @@ def run_report_http(report_name):
     report_name = convert_report_name_url_to_path(report_name)
     json_params = request.args.get("json_params")
     initial_python_parameters = json_to_python(json_params) or ""
-    nb = get_report_as_nb(report_name)
+    try:
+        nb = get_report_as_nb(report_name)
+    except FileNotFoundError:
+        logger.exception("Report was not found.")
+        return render_template(
+            "run_report.html",
+            report_found=False,
+            parameters_as_html="REPORT NOT FOUND",
+            has_prefix=False,
+            has_suffix=False,
+            report_name=report_name,
+            all_reports=get_all_possible_templates(),
+            initialPythonParameters={},
+        )
     metadata_idx = _get_parameters_cell_idx(nb)
     has_prefix = has_suffix = False
     if metadata_idx is not None:
         has_prefix, has_suffix = (bool(nb["cells"][:metadata_idx]), bool(nb["cells"][metadata_idx + 1 :]))
     return render_template(
         "run_report.html",
         parameters_as_html=get_report_parameters_html(report_name),
+        report_found=True,
         has_prefix=has_prefix,
         has_suffix=has_suffix,
         report_name=report_name,