From 1a077714428814ce4de39ab3f435f340e9898618 Mon Sep 17 00:00:00 2001 From: Richard Georg Schotte Date: Sun, 12 Apr 2026 22:54:08 +0200 Subject: [PATCH 1/5] chore(wger): add catalog diff validator --- pubspec.yaml | 2 +- skript/wger_catalog_diff.py | 535 ++++++++++++++++++++++++++++++++++++ 2 files changed, 536 insertions(+), 1 deletion(-) create mode 100644 skript/wger_catalog_diff.py diff --git a/pubspec.yaml b/pubspec.yaml index 6708233..dbfbb01 100644 --- a/pubspec.yaml +++ b/pubspec.yaml @@ -1,7 +1,7 @@ name: hypertrack description: "A modern, privacy-first fitness and nutrition tracking app. Offline-first, no cloud dependency, built with Flutter." publish_to: "none" -version: 0.8.3+80011 +version: 0.8.4+80012 environment: sdk: ">=3.0.0 <4.0.0" # Angepasst für moderne SDKs diff --git a/skript/wger_catalog_diff.py b/skript/wger_catalog_diff.py new file mode 100644 index 0000000..e053a51 --- /dev/null +++ b/skript/wger_catalog_diff.py @@ -0,0 +1,535 @@ +#!/usr/bin/env python3 +import argparse +import json +import os +import sqlite3 +import sys +from typing import Any, Dict, List, Tuple + +REQUIRED_TABLES = ("exercises", "metadata") +REQUIRED_EXERCISE_COLUMNS = ( + "id", + "name_de", + "name_en", + "description_de", + "description_en", + "category_name", + "muscles_primary", + "muscles_secondary", +) +OPTIONAL_EXERCISE_COLUMNS = ( + "image_path", + "source", + "created_by", + "is_custom", +) +MAIN_COMPARE_FIELDS = ( + "name_de", + "name_en", + "description_de", + "description_en", + "category_name", + "muscles_primary", + "muscles_secondary", +) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Compare two generated hypertrack_training.db catalogs." + ) + parser.add_argument("--old", required=True, help="Path to old database file") + parser.add_argument("--new", required=True, help="Path to new database file") + parser.add_argument("--json-out", help="Write full machine-readable diff report to JSON") + parser.add_argument( + "--examples", + type=int, + default=10, + help="How many example IDs/rows to print in console output (default: 10)", + ) + parser.add_argument( + "--removed-severe-threshold", + type=int, + default=25, + help="Removed ID count at or above this threshold is severe (default: 25)", + ) + parser.add_argument( + "--row-drop-warn-percent", + type=float, + default=5.0, + help="Warn when total row count drop is at least this percent (default: 5.0)", + ) + parser.add_argument( + "--row-drop-severe-percent", + type=float, + default=20.0, + help="Severe when total row count drop is at least this percent (default: 20.0)", + ) + parser.add_argument( + "--category-regression-threshold", + type=int, + default=10, + help="Warn when category regressions reach this count (default: 10)", + ) + parser.add_argument( + "--muscle-regression-threshold", + type=int, + default=10, + help="Warn when muscle regressions reach this count (default: 10)", + ) + parser.add_argument( + "--de-fallback-shift-threshold", + type=int, + default=10, + help="Warn when DE-name losses with EN still present reach this count (default: 10)", + ) + parser.add_argument( + "--fail-on-breaking", + action="store_true", + help=( + "Exit with non-zero status on dangerous changes " + "(removed IDs or severe/suspicious regressions)." + ), + ) + parser.add_argument( + "--fail-on-removed-threshold", + type=int, + default=0, + help=( + "With --fail-on-breaking, fail if removed ID count is above this value " + "(default: 0)." + ), + ) + return parser.parse_args() + + +def normalize_value(value: Any) -> Any: + if value is None: + return "" + if isinstance(value, str): + return value.strip() + return value + + +def is_blank(value: Any) -> bool: + return normalize_value(value) == "" + + +def load_catalog(db_path: str) -> Dict[str, Any]: + if not os.path.exists(db_path): + raise FileNotFoundError(f"Database not found: {db_path}") + + conn = sqlite3.connect(db_path) + conn.row_factory = sqlite3.Row + try: + cursor = conn.cursor() + + cursor.execute( + "SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'" + ) + tables = {row["name"] for row in cursor.fetchall()} + missing_tables = [table for table in REQUIRED_TABLES if table not in tables] + if missing_tables: + raise ValueError( + f"Missing required tables in {db_path}: {', '.join(missing_tables)}" + ) + + cursor.execute("PRAGMA table_info(exercises)") + exercise_columns = {row["name"] for row in cursor.fetchall()} + missing_columns = [ + column for column in REQUIRED_EXERCISE_COLUMNS if column not in exercise_columns + ] + if missing_columns: + raise ValueError( + f"Missing required exercise columns in {db_path}: {', '.join(missing_columns)}" + ) + + compare_fields = list(MAIN_COMPARE_FIELDS) + for optional_field in OPTIONAL_EXERCISE_COLUMNS: + if optional_field in exercise_columns: + compare_fields.append(optional_field) + + cursor.execute("SELECT key, value FROM metadata") + metadata = {row["key"]: row["value"] for row in cursor.fetchall()} + + select_columns = ["id"] + compare_fields + column_sql = ", ".join(select_columns) + cursor.execute(f"SELECT {column_sql} FROM exercises") + rows = cursor.fetchall() + + exercises: Dict[str, Dict[str, Any]] = {} + for row in rows: + row_dict = dict(row) + exercise_id = str(row_dict["id"]) + normalized = { + field: normalize_value(row_dict.get(field)) for field in compare_fields + } + exercises[exercise_id] = normalized + + return { + "path": db_path, + "version": metadata.get("version", ""), + "metadata": metadata, + "compare_fields": compare_fields, + "exercise_count": len(exercises), + "exercises": exercises, + } + finally: + conn.close() + + +def compare_catalogs( + old_catalog: Dict[str, Any], new_catalog: Dict[str, Any], args: argparse.Namespace +) -> Dict[str, Any]: + old_ids = set(old_catalog["exercises"].keys()) + new_ids = set(new_catalog["exercises"].keys()) + + removed_ids = sorted(old_ids - new_ids) + added_ids = sorted(new_ids - old_ids) + shared_ids = sorted(old_ids & new_ids) + + compare_fields = sorted(set(old_catalog["compare_fields"]) | set(new_catalog["compare_fields"])) + changed_fields_by_id: Dict[str, Dict[str, Dict[str, Any]]] = {} + changed_field_counts = {field: 0 for field in compare_fields} + + regressions = { + "name_de_became_blank": 0, + "name_en_became_blank": 0, + "description_de_became_blank": 0, + "description_en_became_blank": 0, + "category_became_blank": 0, + "muscles_primary_became_blank": 0, + "muscles_secondary_became_blank": 0, + "de_name_lost_en_still_present": 0, + } + + for exercise_id in shared_ids: + old_row = old_catalog["exercises"][exercise_id] + new_row = new_catalog["exercises"][exercise_id] + field_changes: Dict[str, Dict[str, Any]] = {} + + for field in compare_fields: + old_value = normalize_value(old_row.get(field)) + new_value = normalize_value(new_row.get(field)) + if old_value != new_value: + field_changes[field] = {"old": old_value, "new": new_value} + changed_field_counts[field] += 1 + + if field == "name_de" and not is_blank(old_value) and is_blank(new_value): + regressions["name_de_became_blank"] += 1 + elif field == "name_en" and not is_blank(old_value) and is_blank(new_value): + regressions["name_en_became_blank"] += 1 + elif field == "description_de" and not is_blank(old_value) and is_blank(new_value): + regressions["description_de_became_blank"] += 1 + elif field == "description_en" and not is_blank(old_value) and is_blank(new_value): + regressions["description_en_became_blank"] += 1 + elif field == "category_name" and not is_blank(old_value) and is_blank(new_value): + regressions["category_became_blank"] += 1 + elif field == "muscles_primary" and not is_blank(old_value) and is_blank(new_value): + regressions["muscles_primary_became_blank"] += 1 + elif field == "muscles_secondary" and not is_blank(old_value) and is_blank(new_value): + regressions["muscles_secondary_became_blank"] += 1 + + if not is_blank(old_row.get("name_de")) and is_blank(new_row.get("name_de")) and not is_blank( + new_row.get("name_en") + ): + regressions["de_name_lost_en_still_present"] += 1 + + if field_changes: + changed_fields_by_id[exercise_id] = field_changes + + old_count = old_catalog["exercise_count"] + new_count = new_catalog["exercise_count"] + count_delta = new_count - old_count + row_drop_percent = 0.0 + if old_count > 0 and new_count < old_count: + row_drop_percent = ((old_count - new_count) / old_count) * 100.0 + + warnings: List[Dict[str, Any]] = [] + + if len(removed_ids) > 0: + warnings.append( + { + "code": "REMOVED_IDS", + "severity": "warning", + "value": len(removed_ids), + "message": f"Exercises removed: {len(removed_ids)}", + } + ) + + if len(removed_ids) >= args.removed_severe_threshold: + warnings.append( + { + "code": "REMOVED_IDS_SEVERE", + "severity": "severe", + "value": len(removed_ids), + "message": ( + f"Exercises removed exceeds severe threshold " + f"({len(removed_ids)} >= {args.removed_severe_threshold})" + ), + } + ) + + if regressions["name_de_became_blank"] > 0 or regressions["name_en_became_blank"] > 0: + warnings.append( + { + "code": "NAME_REGRESSION", + "severity": "warning", + "value": { + "name_de_became_blank": regressions["name_de_became_blank"], + "name_en_became_blank": regressions["name_en_became_blank"], + }, + "message": "Previously non-empty exercise names became blank.", + } + ) + + category_loss = regressions["category_became_blank"] + if category_loss > 0: + severity = ( + "severe" + if category_loss >= max(1, args.category_regression_threshold * 2) + else "warning" + ) + warnings.append( + { + "code": "CATEGORY_REGRESSION", + "severity": severity, + "value": category_loss, + "message": f"Categories became blank for {category_loss} exercises.", + } + ) + + muscle_loss = ( + regressions["muscles_primary_became_blank"] + + regressions["muscles_secondary_became_blank"] + ) + if muscle_loss > 0: + severity = ( + "severe" if muscle_loss >= max(1, args.muscle_regression_threshold * 2) else "warning" + ) + warnings.append( + { + "code": "MUSCLE_REGRESSION", + "severity": severity, + "value": { + "muscles_primary_became_blank": regressions["muscles_primary_became_blank"], + "muscles_secondary_became_blank": regressions["muscles_secondary_became_blank"], + }, + "message": "Muscle lists became blank unexpectedly for shared IDs.", + } + ) + + if regressions["de_name_lost_en_still_present"] >= args.de_fallback_shift_threshold: + warnings.append( + { + "code": "DE_FALLBACK_SHIFT", + "severity": "warning", + "value": regressions["de_name_lost_en_still_present"], + "message": ( + "Large fallback shift detected: many DE names disappeared while EN remains." + ), + } + ) + + if row_drop_percent >= args.row_drop_warn_percent: + severity = "severe" if row_drop_percent >= args.row_drop_severe_percent else "warning" + warnings.append( + { + "code": "ROW_COUNT_DROP", + "severity": severity, + "value": row_drop_percent, + "message": ( + f"Total exercise row count dropped by {row_drop_percent:.2f}% " + f"({old_count} -> {new_count})." + ), + } + ) + + changed_exercise_count = len(changed_fields_by_id) + changed_field_counts = { + field: count for field, count in changed_field_counts.items() if count > 0 + } + + report = { + "old": { + "path": old_catalog["path"], + "version": old_catalog["version"], + "exercise_count": old_count, + }, + "new": { + "path": new_catalog["path"], + "version": new_catalog["version"], + "exercise_count": new_count, + }, + "delta": {"exercise_count": count_delta}, + "removed_ids": removed_ids, + "added_ids": added_ids, + "changed_fields_by_id": changed_fields_by_id, + "summary": { + "shared_id_count": len(shared_ids), + "removed_count": len(removed_ids), + "added_count": len(added_ids), + "changed_exercise_count": changed_exercise_count, + "changed_field_counts": changed_field_counts, + "row_drop_percent": row_drop_percent, + "regressions": regressions, + }, + "warning_flags": warnings, + "examples": { + "removed_ids": removed_ids[: args.examples], + "added_ids": added_ids[: args.examples], + "changed_ids": sorted(changed_fields_by_id.keys())[: args.examples], + "changed_rows": build_changed_row_examples(changed_fields_by_id, args.examples), + }, + } + return report + + +def build_changed_row_examples( + changed_fields_by_id: Dict[str, Dict[str, Dict[str, Any]]], limit: int +) -> List[Dict[str, Any]]: + rows: List[Dict[str, Any]] = [] + for exercise_id in sorted(changed_fields_by_id.keys())[:limit]: + for field, values in changed_fields_by_id[exercise_id].items(): + rows.append( + { + "id": exercise_id, + "field": field, + "old": values["old"], + "new": values["new"], + } + ) + if len(rows) >= limit: + return rows + return rows + + +def print_console_report(report: Dict[str, Any], examples: int) -> None: + old = report["old"] + new = report["new"] + summary = report["summary"] + warnings = report["warning_flags"] + + print("=" * 72) + print("WGER CATALOG DIFF REPORT") + print("=" * 72) + print("Metadata / Version:") + print(f" Old version: {old['version'] or '(missing)'}") + print(f" New version: {new['version'] or '(missing)'}") + print(f" Old row count: {old['exercise_count']}") + print(f" New row count: {new['exercise_count']}") + delta = report["delta"]["exercise_count"] + print(f" Total delta: {delta:+d}") + print("") + + print("ID-level catalog diff:") + print(f" Removed IDs: {summary['removed_count']}") + print(f" Added IDs: {summary['added_count']}") + if report["examples"]["removed_ids"]: + print(f" Removed examples ({min(examples, summary['removed_count'])}):") + for exercise_id in report["examples"]["removed_ids"]: + print(f" - {exercise_id}") + if report["examples"]["added_ids"]: + print(f" Added examples ({min(examples, summary['added_count'])}):") + for exercise_id in report["examples"]["added_ids"]: + print(f" - {exercise_id}") + print("") + + print("Field-level changes (shared IDs):") + print(f" Shared IDs: {summary['shared_id_count']}") + print(f" Exercises with any field changes: {summary['changed_exercise_count']}") + changed_field_counts = summary["changed_field_counts"] + if changed_field_counts: + for field in sorted(changed_field_counts.keys()): + print(f" - {field}: {changed_field_counts[field]}") + else: + print(" No field changes detected on shared IDs.") + print("") + + print("Suspicious regressions:") + regressions = summary["regressions"] + print(f" name_de became blank: {regressions['name_de_became_blank']}") + print(f" name_en became blank: {regressions['name_en_became_blank']}") + print(f" description_de became blank: {regressions['description_de_became_blank']}") + print(f" description_en became blank: {regressions['description_en_became_blank']}") + print(f" category became blank: {regressions['category_became_blank']}") + print(f" muscles_primary became blank: {regressions['muscles_primary_became_blank']}") + print(f" muscles_secondary became blank: {regressions['muscles_secondary_became_blank']}") + print( + f" de_name lost while en still present: {regressions['de_name_lost_en_still_present']}" + ) + print(f" Row drop percent: {summary['row_drop_percent']:.2f}%") + print("") + + if warnings: + print("Warning flags:") + for warning in warnings: + print( + f" [{warning['severity'].upper()}] {warning['code']}: {warning['message']}" + ) + else: + print("Warning flags: none") + print("") + + if report["examples"]["changed_rows"]: + print("Changed field examples:") + for row in report["examples"]["changed_rows"][:examples]: + print( + f" id={row['id']} field={row['field']} " + f"old={json.dumps(row['old'], ensure_ascii=False)} " + f"new={json.dumps(row['new'], ensure_ascii=False)}" + ) + print("") + + +def should_fail(report: Dict[str, Any], args: argparse.Namespace) -> Tuple[bool, List[str]]: + reasons: List[str] = [] + removed_count = report["summary"]["removed_count"] + regressions = report["summary"]["regressions"] + warnings = report["warning_flags"] + + if removed_count > args.fail_on_removed_threshold: + reasons.append( + f"removed_count={removed_count} > fail_on_removed_threshold={args.fail_on_removed_threshold}" + ) + + if regressions["name_de_became_blank"] > 0 or regressions["name_en_became_blank"] > 0: + reasons.append("name regression detected (non-empty name became blank)") + + if any(warning["severity"] == "severe" for warning in warnings): + reasons.append("severe warning present") + + return len(reasons) > 0, reasons + + +def main() -> int: + args = parse_args() + try: + old_catalog = load_catalog(args.old) + new_catalog = load_catalog(args.new) + report = compare_catalogs(old_catalog, new_catalog, args) + except Exception as exc: + print(f"ERROR: {exc}", file=sys.stderr) + return 2 + + print_console_report(report, args.examples) + + if args.json_out: + with open(args.json_out, "w", encoding="utf-8") as f: + json.dump(report, f, ensure_ascii=False, indent=2) + print(f"JSON report written to: {args.json_out}") + + if args.fail_on_breaking: + fail, reasons = should_fail(report, args) + if fail: + print("") + print("FAIL-ON-BREAKING triggered:") + for reason in reasons: + print(f" - {reason}") + return 1 + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) From a53ae34c1c27a7c4bc39d6b0dcceef123fa27299 Mon Sep 17 00:00:00 2001 From: Richard Georg Schotte Date: Sun, 12 Apr 2026 23:01:34 +0200 Subject: [PATCH 2/5] chore(wger): add generator build report --- skript/create_wger_exercise_db.py | 604 +++++++++++++++++++++++++----- 1 file changed, 504 insertions(+), 100 deletions(-) diff --git a/skript/create_wger_exercise_db.py b/skript/create_wger_exercise_db.py index b34cfe1..2f2d783 100644 --- a/skript/create_wger_exercise_db.py +++ b/skript/create_wger_exercise_db.py @@ -1,141 +1,545 @@ -import requests -import pandas as pd -import sqlite3 -import re -import json +#!/usr/bin/env python3 +import argparse import datetime +import json import os +import re +import sqlite3 +from collections import Counter +from typing import Any, Dict, List, Optional + +import requests -def clean_html(raw_html): - """Entfernt HTML-Tags aus einem String.""" +CATEGORY_OTHER = "Andere" + +SOURCE_ENDPOINTS = { + "categories": "https://wger.de/api/v2/exercisecategory/", + "muscles": "https://wger.de/api/v2/muscle/", + "exerciseinfo": "https://wger.de/api/v2/exerciseinfo/?limit=9999", +} + +REJECTION_REASON_KEYS = ( + "missing_usable_title", + "missing_usable_localized_title_after_fallback", + "malformed_payload", + "missing_required_source_fields", + "duplicate_conflicting_id", + "other", +) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Generate hypertrack_training.db from wger and optionally emit a build report." + ) + parser.add_argument( + "--db-out", + default="hypertrack_training.db", + help="Output SQLite DB path (default: hypertrack_training.db)", + ) + parser.add_argument( + "--report-json-out", + help="Optional output path for machine-readable build report JSON.", + ) + parser.add_argument( + "--report-max-examples", + type=int, + default=25, + help="Max rejected examples to store in report (default: 25)", + ) + return parser.parse_args() + + +def clean_html(raw_html: Any) -> str: if not isinstance(raw_html, str): - return '' - cleanr = re.compile('<.*?>') - cleantext = re.sub(cleanr, '', raw_html) + return "" + cleanr = re.compile("<.*?>") + cleantext = re.sub(cleanr, "", raw_html) return cleantext.strip() -def get_id(x): - """Extrahiert die 'id' aus einem dict oder gibt den Wert zurück.""" + +def normalize_text(value: Any) -> str: + if not isinstance(value, str): + return "" + return value.strip() + + +def get_id(x: Any) -> Any: if isinstance(x, dict): - return x.get('id') + return x.get("id") return x -def process_and_create_db(): - print("🚀 Starte: Lade Daten von wger.de ...") + +def now_iso_utc() -> str: + return datetime.datetime.now(datetime.timezone.utc).replace(microsecond=0).isoformat() + + +def fetch_endpoint(url: str) -> Dict[str, Any]: + response = requests.get(url, timeout=30) + response.raise_for_status() + payload = response.json() + if not isinstance(payload, dict): + raise ValueError(f"Unexpected payload shape from {url}: expected JSON object") + results = payload.get("results", []) + if not isinstance(results, list): + raise ValueError(f"Unexpected payload shape from {url}: 'results' is not a list") + return { + "results": results, + "status_code": response.status_code, + "fetched_at": now_iso_utc(), + "source_date_header": response.headers.get("Date", ""), + } + + +def add_rejection( + rejected_examples: List[Dict[str, Any]], + reason_counts: Counter, + reason: str, + max_examples: int, + example: Dict[str, Any], +) -> None: + reason_counts[reason] += 1 + if len(rejected_examples) < max_examples: + rejected_examples.append(example) + + +def process_and_create_db( + db_out: str = "hypertrack_training.db", + report_json_out: Optional[str] = None, + report_max_examples: int = 25, +) -> int: + print("Starte: Lade Daten von wger.de ...") try: - # API Abfragen - categories_res = requests.get("https://wger.de/api/v2/exercisecategory/") - muscles_res = requests.get("https://wger.de/api/v2/muscle/") - # Limit erhöht, um sicher alle zu haben - exercises_info_res = requests.get("https://wger.de/api/v2/exerciseinfo/?limit=9999") - - categories_data = categories_res.json().get('results', []) - muscles_data = muscles_res.json().get('results', []) - exercises_info_data = exercises_info_res.json().get('results', []) + categories_payload = fetch_endpoint(SOURCE_ENDPOINTS["categories"]) + muscles_payload = fetch_endpoint(SOURCE_ENDPOINTS["muscles"]) + exercises_payload = fetch_endpoint(SOURCE_ENDPOINTS["exerciseinfo"]) except requests.RequestException as e: - print(f"❌ Fehler beim Download: {e}") - return + print(f"Fehler beim Download: {e}") + return 2 + except Exception as e: + print(f"Kritischer Fehler beim Laden der Quellen: {e}") + return 2 + + categories_data = categories_payload["results"] + muscles_data = muscles_payload["results"] + exercises_info_data = exercises_payload["results"] + + print( + f"Geladen: {len(exercises_info_data)} Übungen, " + f"{len(categories_data)} Kategorien, {len(muscles_data)} Muskeln." + ) - print(f"📦 Geladen: {len(exercises_info_data)} Übungen, {len(categories_data)} Kategorien, {len(muscles_data)} Muskeln.") + category_map = { + cat["id"]: cat.get("name") + for cat in categories_data + if isinstance(cat, dict) and "id" in cat + } + muscle_map = { + m["id"]: m.get("name_en") or m.get("name") + for m in muscles_data + if isinstance(m, dict) and "id" in m + } - # Maps erstellen - category_map = {cat['id']: cat.get('name') for cat in categories_data} - muscle_map = {m['id']: m.get('name_en') or m.get('name') for m in muscles_data} + processed_exercises: Dict[str, Dict[str, Any]] = {} + reason_counts: Counter = Counter({key: 0 for key in REJECTION_REASON_KEYS}) + rejected_examples: List[Dict[str, Any]] = [] - processed_exercises = {} + duplicate_payload_count = 0 + duplicate_conflict_count = 0 + malformed_translation_count = 0 - for exercise_info in exercises_info_data: - exercise_id = str(exercise_info.get('id')) # ID als String für Drift + for raw_index, exercise_info in enumerate(exercises_info_data): + if not isinstance(exercise_info, dict): + add_rejection( + rejected_examples, + reason_counts, + "malformed_payload", + report_max_examples, + { + "id": None, + "reason": "malformed_payload", + "details": "exercise payload is not an object", + "raw_index": raw_index, + }, + ) + continue + + raw_id = exercise_info.get("id") + if raw_id is None: + add_rejection( + rejected_examples, + reason_counts, + "missing_required_source_fields", + report_max_examples, + { + "id": None, + "reason": "missing_required_source_fields", + "details": "missing source field: id", + "raw_index": raw_index, + }, + ) + continue + + exercise_id = normalize_text(str(raw_id)) if not exercise_id: + add_rejection( + rejected_examples, + reason_counts, + "missing_required_source_fields", + report_max_examples, + { + "id": str(raw_id), + "reason": "missing_required_source_fields", + "details": "blank source field: id", + "raw_index": raw_index, + }, + ) continue - if exercise_id not in processed_exercises: - # Muskeln als Liste sammeln (für JSON) - prim_muscles = sorted({muscle_map.get(get_id(m)) for m in exercise_info.get('muscles', []) if muscle_map.get(get_id(m))}) - sec_muscles = sorted({muscle_map.get(get_id(m)) for m in exercise_info.get('muscles_secondary', []) if muscle_map.get(get_id(m))}) + translations = exercise_info.get("translations", []) + if translations is None: + translations = [] + if not isinstance(translations, list): + add_rejection( + rejected_examples, + reason_counts, + "malformed_payload", + report_max_examples, + { + "id": exercise_id, + "reason": "malformed_payload", + "details": "translations is not a list", + "raw_index": raw_index, + }, + ) + continue + raw_muscles = exercise_info.get("muscles", []) + raw_muscles_secondary = exercise_info.get("muscles_secondary", []) + if raw_muscles is None: + raw_muscles = [] + if raw_muscles_secondary is None: + raw_muscles_secondary = [] + if not isinstance(raw_muscles, list) or not isinstance(raw_muscles_secondary, list): + add_rejection( + rejected_examples, + reason_counts, + "malformed_payload", + report_max_examples, + { + "id": exercise_id, + "reason": "malformed_payload", + "details": "muscles or muscles_secondary is not a list", + "raw_index": raw_index, + }, + ) + continue + + prim_muscles = sorted( + { + muscle_map.get(get_id(m)) + for m in raw_muscles + if muscle_map.get(get_id(m)) + } + ) + sec_muscles = sorted( + { + muscle_map.get(get_id(m)) + for m in raw_muscles_secondary + if muscle_map.get(get_id(m)) + } + ) + category_name = category_map.get(get_id(exercise_info.get("category")), CATEGORY_OTHER) + + is_duplicate_payload = exercise_id in processed_exercises + if is_duplicate_payload: + duplicate_payload_count += 1 + existing = processed_exercises[exercise_id] + existing_category = existing.get("category_name", CATEGORY_OTHER) + existing_prim = existing.get("muscles_primary", "[]") + existing_sec = existing.get("muscles_secondary", "[]") + if ( + existing_category != category_name + or existing_prim != json.dumps(prim_muscles) + or existing_sec != json.dumps(sec_muscles) + ): + duplicate_conflict_count += 1 + else: processed_exercises[exercise_id] = { - 'id': exercise_id, - 'category_name': category_map.get(get_id(exercise_info.get('category')), 'Andere'), - # WICHTIG: Als JSON-String speichern, damit die App es parsen kann - 'muscles_primary': json.dumps(prim_muscles), - 'muscles_secondary': json.dumps(sec_muscles), - 'name_de': '', 'description_de': '', - 'name_en': '', 'description_en': '', - # Neue Felder für deine App-Logik - 'is_custom': 0, - 'created_by': 'system', - 'source': 'base', - 'image_path': '' + "id": exercise_id, + "category_name": category_name, + "muscles_primary": json.dumps(prim_muscles), + "muscles_secondary": json.dumps(sec_muscles), + "name_de": "", + "description_de": "", + "name_en": "", + "description_en": "", + "is_custom": 0, + "created_by": "system", + "source": "base", + "image_path": "", } - # Übersetzungen verarbeiten - for t in exercise_info.get('translations', []): - lang = t.get('language') - name = (t.get('name') or '').strip() - desc = clean_html(t.get('description') or '') - - if lang == 1: # Deutsch - if name: processed_exercises[exercise_id]['name_de'] = name - if desc: processed_exercises[exercise_id]['description_de'] = desc - elif lang == 2: # Englisch - if name: processed_exercises[exercise_id]['name_en'] = name - if desc: processed_exercises[exercise_id]['description_en'] = desc - - # In DataFrame umwandeln - final_list = list(processed_exercises.values()) - df = pd.DataFrame(final_list) - - # Fallbacks für Sprachen - df['name_de'] = df.apply(lambda row: row['name_en'] if not row['name_de'] else row['name_de'], axis=1) - df['name_en'] = df.apply(lambda row: row['name_de'] if not row['name_en'] else row['name_en'], axis=1) - df['description_de'] = df.apply(lambda row: row['description_en'] if not row['description_de'] else row['description_de'], axis=1) - df['description_en'] = df.apply(lambda row: row['description_de'] if not row['description_en'] else row['description_en'], axis=1) - - # Leere entfernen - df.dropna(subset=['name_de', 'name_en'], how='all', inplace=True) - - print(f"✨ {len(df)} Übungen fertig verarbeitet.") - - # DB erstellen - db_name = 'hypertrack_training.db' - if os.path.exists(db_name): - os.remove(db_name) # Alte löschen für sauberen Neustart - - conn = sqlite3.connect(db_name) + target = processed_exercises[exercise_id] + + raw_name_de = normalize_text(target.get("name_de")) + raw_name_en = normalize_text(target.get("name_en")) + saw_any_non_empty_title_any_lang = bool(raw_name_de or raw_name_en) + + for t in translations: + if not isinstance(t, dict): + malformed_translation_count += 1 + continue + + lang = t.get("language") + name = normalize_text(t.get("name")) + desc = clean_html(t.get("description")) + + if name: + saw_any_non_empty_title_any_lang = True + + if lang == 1: + if name: + target["name_de"] = name + if desc: + target["description_de"] = desc + elif lang == 2: + if name: + target["name_en"] = name + if desc: + target["description_en"] = desc + + previous_any_title = bool(target.get("_debug_has_any_title_any_lang", False)) + target["_debug_has_any_title_any_lang"] = ( + previous_any_title or saw_any_non_empty_title_any_lang + ) + + final_rows: List[Dict[str, Any]] = [] + + fallback_stats = { + "used_en_for_de": 0, + "used_de_for_en": 0, + "both_present": 0, + "de_only": 0, + "en_only": 0, + "neither_present": 0, + "original_de_present": 0, + "original_en_present": 0, + } + + for exercise_id in sorted(processed_exercises.keys(), key=lambda x: int(x) if x.isdigit() else x): + row = processed_exercises[exercise_id] + + orig_name_de = normalize_text(row.get("name_de")) + orig_name_en = normalize_text(row.get("name_en")) + + if orig_name_de: + fallback_stats["original_de_present"] += 1 + if orig_name_en: + fallback_stats["original_en_present"] += 1 + + final_name_de = orig_name_de if orig_name_de else orig_name_en + final_name_en = orig_name_en if orig_name_en else orig_name_de + + if not orig_name_de and final_name_de: + fallback_stats["used_en_for_de"] += 1 + if not orig_name_en and final_name_en: + fallback_stats["used_de_for_en"] += 1 + + row["name_de"] = final_name_de + row["name_en"] = final_name_en + + orig_description_de = normalize_text(row.get("description_de")) + orig_description_en = normalize_text(row.get("description_en")) + row["description_de"] = orig_description_de if orig_description_de else orig_description_en + row["description_en"] = orig_description_en if orig_description_en else orig_description_de + + has_de = bool(row["name_de"]) + has_en = bool(row["name_en"]) + + if has_de and has_en: + fallback_stats["both_present"] += 1 + elif has_de: + fallback_stats["de_only"] += 1 + elif has_en: + fallback_stats["en_only"] += 1 + else: + fallback_stats["neither_present"] += 1 + reason = "missing_usable_title" + if row.get("_debug_has_any_title_any_lang"): + reason = "missing_usable_localized_title_after_fallback" + + add_rejection( + rejected_examples, + reason_counts, + reason, + report_max_examples, + { + "id": exercise_id, + "reason": reason, + "name_de_raw": orig_name_de, + "name_en_raw": orig_name_en, + "category_name": row.get("category_name", ""), + }, + ) + continue + + row.pop("_debug_has_any_title_any_lang", None) + final_rows.append(row) + + generated_at = now_iso_utc() + db_version = datetime.datetime.now().strftime("%Y%m%d%H%M") + + db_dir = os.path.dirname(db_out) + if db_dir: + os.makedirs(db_dir, exist_ok=True) + + if os.path.exists(db_out): + os.remove(db_out) + + conn = sqlite3.connect(db_out) cursor = conn.cursor() - # Tabelle exakt wie in Drift definieren - cursor.execute(''' + cursor.execute( + """ CREATE TABLE exercises ( id TEXT PRIMARY KEY, - name_de TEXT, + name_de TEXT, name_en TEXT, - description_de TEXT, + description_de TEXT, description_en TEXT, category_name TEXT, - muscles_primary TEXT, + muscles_primary TEXT, muscles_secondary TEXT, image_path TEXT, is_custom INTEGER DEFAULT 0, created_by TEXT DEFAULT 'system', source TEXT DEFAULT 'base' - )''') + )""" + ) - # Metadaten für Versionierung - cursor.execute('CREATE TABLE metadata (key TEXT PRIMARY KEY, value TEXT)') - version = datetime.datetime.now().strftime("%Y%m%d%H%M") - cursor.execute("INSERT INTO metadata VALUES ('version', ?)", (version,)) + cursor.execute("CREATE TABLE metadata (key TEXT PRIMARY KEY, value TEXT)") + cursor.execute("INSERT INTO metadata VALUES ('version', ?)", (db_version,)) + + insert_columns = [ + "id", + "name_de", + "name_en", + "description_de", + "description_en", + "category_name", + "muscles_primary", + "muscles_secondary", + "image_path", + "is_custom", + "created_by", + "source", + ] + insert_values = [tuple(row[col] for col in insert_columns) for row in final_rows] + cursor.executemany( + """ + INSERT INTO exercises ( + id, name_de, name_en, description_de, description_en, + category_name, muscles_primary, muscles_secondary, + image_path, is_custom, created_by, source + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + insert_values, + ) - # Daten schreiben - df.to_sql('exercises', conn, if_exists='append', index=False) - conn.commit() conn.close() - print(f"\n✅ ERFOLG: '{db_name}' erstellt (Version: {version}).") - print("👉 Kopiere diese Datei jetzt nach 'assets/db/'!") + raw_exercise_count = len(exercises_info_data) + imported_count = len(final_rows) + rejected_count = sum(reason_counts.values()) + + import_rate = (imported_count / raw_exercise_count) if raw_exercise_count else 0.0 + rejection_rate = (rejected_count / raw_exercise_count) if raw_exercise_count else 0.0 + + report = { + "build": { + "generated_at": generated_at, + "db_version": db_version, + "db_output_path": db_out, + "source_endpoints": [ + { + "name": "categories", + "url": SOURCE_ENDPOINTS["categories"], + "status_code": categories_payload["status_code"], + "fetched_at": categories_payload["fetched_at"], + "source_date_header": categories_payload["source_date_header"], + }, + { + "name": "muscles", + "url": SOURCE_ENDPOINTS["muscles"], + "status_code": muscles_payload["status_code"], + "fetched_at": muscles_payload["fetched_at"], + "source_date_header": muscles_payload["source_date_header"], + }, + { + "name": "exerciseinfo", + "url": SOURCE_ENDPOINTS["exerciseinfo"], + "status_code": exercises_payload["status_code"], + "fetched_at": exercises_payload["fetched_at"], + "source_date_header": exercises_payload["source_date_header"], + }, + ], + "source_timestamp": exercises_payload["source_date_header"] or "", + }, + "summary": { + "raw_exercise_count": raw_exercise_count, + "imported_count": imported_count, + "rejected_count": rejected_count, + "import_rate": round(import_rate, 6), + "rejection_rate": round(rejection_rate, 6), + }, + "import_metadata": { + "categories_loaded": len(categories_data), + "muscles_loaded": len(muscles_data), + "duplicate_payload_count": duplicate_payload_count, + "duplicate_conflict_count": duplicate_conflict_count, + "malformed_translation_count": malformed_translation_count, + }, + "language_fallbacks": fallback_stats, + "rejection_reasons": {reason: int(reason_counts[reason]) for reason in REJECTION_REASON_KEYS}, + "rejected_examples": rejected_examples, + } + + print("") + print(f"ERFOLG: '{db_out}' erstellt (Version: {db_version}).") + print("Build summary:") + print(f" raw exercises: {raw_exercise_count}") + print(f" imported: {imported_count}") + print(f" rejected: {rejected_count}") + print(f" import rate: {import_rate:.2%}") + print(f" rejection rate: {rejection_rate:.2%}") + print("Rejection reasons:") + for reason in REJECTION_REASON_KEYS: + print(f" - {reason}: {reason_counts[reason]}") + + if report_json_out: + report_dir = os.path.dirname(report_json_out) + if report_dir: + os.makedirs(report_dir, exist_ok=True) + with open(report_json_out, "w", encoding="utf-8") as f: + json.dump(report, f, ensure_ascii=False, indent=2) + print(f"Report JSON geschrieben: {report_json_out}") + + print("Kopiere die DB-Datei jetzt nach 'assets/db/' falls gewünscht.") + + return 0 + + +def main() -> int: + args = parse_args() + return process_and_create_db( + db_out=args.db_out, + report_json_out=args.report_json_out, + report_max_examples=max(1, args.report_max_examples), + ) + -if __name__ == '__main__': - process_and_create_db() \ No newline at end of file +if __name__ == "__main__": + raise SystemExit(main()) From 3a548907fe23212c358d067a2f0abcf95f1ce6fb Mon Sep 17 00:00:00 2001 From: Richard Georg Schotte Date: Sun, 12 Apr 2026 23:07:36 +0200 Subject: [PATCH 3/5] ci(wger): add catalog refresh workflow --- .github/workflows/wger-catalog-refresh.yml | 152 +++++++++++++++++++++ 1 file changed, 152 insertions(+) create mode 100644 .github/workflows/wger-catalog-refresh.yml diff --git a/.github/workflows/wger-catalog-refresh.yml b/.github/workflows/wger-catalog-refresh.yml new file mode 100644 index 0000000..230fa48 --- /dev/null +++ b/.github/workflows/wger-catalog-refresh.yml @@ -0,0 +1,152 @@ +name: Wger Catalog Refresh + +'on': + workflow_dispatch: + inputs: + fail_on_breaking: + description: "Fail workflow if diff detects breaking changes" + required: false + default: true + type: boolean + schedule: + - cron: "0 5 * * 1" + +permissions: + contents: read + +jobs: + refresh-catalog: + runs-on: ubuntu-latest + env: + GENERATED_DB_PATH: artifacts/hypertrack_training.db + BUILD_REPORT_PATH: artifacts/wger_build_report.json + DIFF_REPORT_PATH: artifacts/wger_diff_report.json + REFERENCE_DB_PATH: assets/db/hypertrack_training.db + FAIL_ON_BREAKING: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.fail_on_breaking || 'true' }} + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install requests + + - name: Validate script syntax + run: | + python -m py_compile skript/create_wger_exercise_db.py + python -m py_compile skript/wger_catalog_diff.py + + - name: Generate exercise catalog and build report + run: | + mkdir -p artifacts + python skript/create_wger_exercise_db.py \ + --db-out "$GENERATED_DB_PATH" \ + --report-json-out "$BUILD_REPORT_PATH" + + - name: Diff against committed reference DB (if available) + id: diff + continue-on-error: true + run: | + if [ -f "$REFERENCE_DB_PATH" ]; then + echo "reference_db_found=true" >> "$GITHUB_OUTPUT" + DIFF_CMD="python skript/wger_catalog_diff.py --old \"$REFERENCE_DB_PATH\" --new \"$GENERATED_DB_PATH\" --json-out \"$DIFF_REPORT_PATH\"" + if [ "$FAIL_ON_BREAKING" = "true" ]; then + DIFF_CMD="$DIFF_CMD --fail-on-breaking" + fi + echo "Running diff command: $DIFF_CMD" + eval "$DIFF_CMD" + else + echo "reference_db_found=false" >> "$GITHUB_OUTPUT" + echo "Reference DB not found at $REFERENCE_DB_PATH. Skipping diff." | tee artifacts/wger_diff_skipped.txt + fi + + - name: Upload generated DB artifact + if: always() + uses: actions/upload-artifact@v4 + with: + name: wger-generated-db + path: ${{ env.GENERATED_DB_PATH }} + if-no-files-found: error + + - name: Upload build report artifact + if: always() + uses: actions/upload-artifact@v4 + with: + name: wger-build-report + path: ${{ env.BUILD_REPORT_PATH }} + if-no-files-found: error + + - name: Upload diff report artifact + if: always() && steps.diff.outputs.reference_db_found == 'true' + uses: actions/upload-artifact@v4 + with: + name: wger-diff-report + path: ${{ env.DIFF_REPORT_PATH }} + if-no-files-found: error + + - name: Upload diff-skip info artifact + if: always() && steps.diff.outputs.reference_db_found != 'true' + uses: actions/upload-artifact@v4 + with: + name: wger-diff-report + path: artifacts/wger_diff_skipped.txt + if-no-files-found: error + + - name: Publish run summary + if: always() + run: | + python - <<'PY' + import json + import os + + build_path = os.environ["BUILD_REPORT_PATH"] + diff_path = os.environ["DIFF_REPORT_PATH"] + reference_found = os.environ.get("REFERENCE_FOUND", "false") + diff_outcome = os.environ.get("DIFF_OUTCOME", "unknown") + + lines = ["## Wger Catalog Refresh Summary", ""] + + if os.path.exists(build_path): + with open(build_path, "r", encoding="utf-8") as f: + build = json.load(f) + bmeta = build.get("build", {}) + summary = build.get("summary", {}) + lines.append(f"- DB version: `{bmeta.get('db_version', 'n/a')}`") + lines.append(f"- Generated at: `{bmeta.get('generated_at', 'n/a')}`") + lines.append(f"- Raw exercises: `{summary.get('raw_exercise_count', 'n/a')}`") + lines.append(f"- Imported: `{summary.get('imported_count', 'n/a')}`") + lines.append(f"- Rejected: `{summary.get('rejected_count', 'n/a')}`") + else: + lines.append("- Build report not found.") + + if reference_found == "true" and os.path.exists(diff_path): + with open(diff_path, "r", encoding="utf-8") as f: + diff = json.load(f) + ds = diff.get("summary", {}) + lines.append(f"- Removed IDs: `{ds.get('removed_count', 'n/a')}`") + lines.append(f"- Added IDs: `{ds.get('added_count', 'n/a')}`") + lines.append(f"- Safety validation outcome: `{diff_outcome}`") + elif reference_found == "true": + lines.append("- Diff step ran but no diff JSON was produced.") + else: + lines.append("- Diff skipped: reference DB not found.") + + with open(os.environ["GITHUB_STEP_SUMMARY"], "a", encoding="utf-8") as f: + f.write("\n".join(lines) + "\n") + PY + env: + REFERENCE_FOUND: ${{ steps.diff.outputs.reference_db_found }} + DIFF_OUTCOME: ${{ steps.diff.outcome }} + + - name: Enforce safety gate result + if: always() && steps.diff.outputs.reference_db_found == 'true' && env.FAIL_ON_BREAKING == 'true' && steps.diff.outcome == 'failure' + run: | + echo "Diff safety validation failed under --fail-on-breaking." + exit 1 From ecaa3ede7a5c4d6a5b292e253e06ad1e39612703 Mon Sep 17 00:00:00 2001 From: Richard Georg Schotte Date: Sun, 12 Apr 2026 23:57:11 +0200 Subject: [PATCH 4/5] 1 --- .github/workflows/wger-catalog-refresh.yml | 194 +++++- README.md | 1 + assets/db/wger_catalog_manifest.json | 17 + documentation/data_models_and_storage.md | 25 + documentation/overview.md | 2 + documentation/wger_catalog_refresh_system.md | 171 +++++ lib/config/app_data_sources.dart | 72 ++ lib/data/basis_data_manager.dart | 96 ++- lib/services/db_service.dart | 4 +- .../exercise_catalog_refresh_service.dart | 617 ++++++++++++++++++ ...exercise_catalog_refresh_service_test.dart | 134 ++++ 11 files changed, 1287 insertions(+), 46 deletions(-) create mode 100644 assets/db/wger_catalog_manifest.json create mode 100644 documentation/wger_catalog_refresh_system.md create mode 100644 lib/config/app_data_sources.dart create mode 100644 lib/services/exercise_catalog_refresh_service.dart create mode 100644 test/services/exercise_catalog_refresh_service_test.dart diff --git a/.github/workflows/wger-catalog-refresh.yml b/.github/workflows/wger-catalog-refresh.yml index 230fa48..0e3c931 100644 --- a/.github/workflows/wger-catalog-refresh.yml +++ b/.github/workflows/wger-catalog-refresh.yml @@ -8,11 +8,16 @@ name: Wger Catalog Refresh required: false default: true type: boolean + publish_release_assets: + description: "Publish DB/manifest/build-report to the catalog release channel" + required: false + default: true + type: boolean schedule: - cron: "0 5 * * 1" permissions: - contents: read + contents: write jobs: refresh-catalog: @@ -21,8 +26,16 @@ jobs: GENERATED_DB_PATH: artifacts/hypertrack_training.db BUILD_REPORT_PATH: artifacts/wger_build_report.json DIFF_REPORT_PATH: artifacts/wger_diff_report.json + MANIFEST_PATH: artifacts/wger_catalog_manifest.json + RELEASE_NOTES_PATH: artifacts/wger_release_notes.md REFERENCE_DB_PATH: assets/db/hypertrack_training.db + RELEASE_TAG: wger-catalog-stable + RELEASE_NAME: Wger Catalog Data (stable channel) + RELEASE_CHANNEL: stable + RELEASE_DOWNLOAD_BASE: https://github.com/${{ github.repository }}/releases/download/wger-catalog-stable/ + RELEASE_PAGE_URL: https://github.com/${{ github.repository }}/releases/tag/wger-catalog-stable FAIL_ON_BREAKING: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.fail_on_breaking || 'true' }} + PUBLISH_RELEASE_ASSETS: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.publish_release_assets || 'true' }} steps: - name: Checkout repository @@ -65,8 +78,121 @@ jobs: else echo "reference_db_found=false" >> "$GITHUB_OUTPUT" echo "Reference DB not found at $REFERENCE_DB_PATH. Skipping diff." | tee artifacts/wger_diff_skipped.txt + python - <<'PY' + import json + import os + + out = os.environ["DIFF_REPORT_PATH"] + payload = { + "skipped": True, + "reason": "reference_db_missing", + "reference_db_path": os.environ.get("REFERENCE_DB_PATH", ""), + } + with open(out, "w", encoding="utf-8") as f: + json.dump(payload, f, ensure_ascii=False, indent=2) + PY fi + - name: Build catalog manifest artifact + run: | + python - <<'PY' + import json + import os + + build_path = os.environ["BUILD_REPORT_PATH"] + diff_path = os.environ["DIFF_REPORT_PATH"] + manifest_path = os.environ["MANIFEST_PATH"] + release_base = os.environ["RELEASE_DOWNLOAD_BASE"] + + with open(build_path, "r", encoding="utf-8") as f: + build_report = json.load(f) + + diff_report = {} + if os.path.exists(diff_path): + with open(diff_path, "r", encoding="utf-8") as f: + diff_report = json.load(f) + + build = build_report.get("build", {}) + summary = build_report.get("summary", {}) + + db_file = os.path.basename(os.environ["GENERATED_DB_PATH"]) + build_report_file = os.path.basename(build_path) + diff_report_file = os.path.basename(diff_path) + + imported_count = int(summary.get("imported_count", 0) or 0) + manifest = { + "source_id": "wger_catalog", + "channel": os.environ.get("RELEASE_CHANNEL", "stable"), + "release_tag": os.environ.get("RELEASE_TAG", ""), + "release_page_url": os.environ.get("RELEASE_PAGE_URL", ""), + "asset_base_url": release_base, + "version": build.get("db_version", ""), + "generated_at": build.get("generated_at"), + "db_file": db_file, + "db_url": f"{release_base}{db_file}", + "build_report_file": build_report_file, + "build_report_url": f"{release_base}{build_report_file}", + "diff_report_file": diff_report_file, + "diff_report_url": f"{release_base}{diff_report_file}", + "expected_exercise_count": imported_count, + "min_exercise_count": imported_count if imported_count > 0 else 50, + "safety": { + "diff_skipped": bool(diff_report.get("skipped", False)), + "diff_removed_count": diff_report.get("summary", {}).get("removed_count"), + "diff_added_count": diff_report.get("summary", {}).get("added_count"), + }, + } + + with open(manifest_path, "w", encoding="utf-8") as f: + json.dump(manifest, f, ensure_ascii=False, indent=2) + PY + + - name: Build release notes + run: | + python - <<'PY' + import json + import os + + build_path = os.environ["BUILD_REPORT_PATH"] + diff_path = os.environ["DIFF_REPORT_PATH"] + out_path = os.environ["RELEASE_NOTES_PATH"] + + with open(build_path, "r", encoding="utf-8") as f: + build = json.load(f) + + diff = {} + if os.path.exists(diff_path): + with open(diff_path, "r", encoding="utf-8") as f: + diff = json.load(f) + + bmeta = build.get("build", {}) + bsum = build.get("summary", {}) + dsum = diff.get("summary", {}) if isinstance(diff, dict) else {} + + lines = [ + "# Wger Catalog Data Refresh", + "", + f"- Version: `{bmeta.get('db_version', 'n/a')}`", + f"- Generated at: `{bmeta.get('generated_at', 'n/a')}`", + f"- Imported exercises: `{bsum.get('imported_count', 'n/a')}`", + f"- Rejected exercises: `{bsum.get('rejected_count', 'n/a')}`", + ] + + if diff.get("skipped"): + lines.append("- Diff: skipped (reference DB missing)") + elif dsum: + lines.append(f"- Diff removed IDs: `{dsum.get('removed_count', 'n/a')}`") + lines.append(f"- Diff added IDs: `{dsum.get('added_count', 'n/a')}`") + else: + lines.append("- Diff summary unavailable") + + lines.append("") + lines.append("This is a data-artifact release channel used by app-side catalog refresh.") + + with open(out_path, "w", encoding="utf-8") as f: + f.write("\n".join(lines) + "\n") + PY + - name: Upload generated DB artifact if: always() uses: actions/upload-artifact@v4 @@ -83,22 +209,49 @@ jobs: path: ${{ env.BUILD_REPORT_PATH }} if-no-files-found: error + - name: Upload catalog manifest artifact + if: always() + uses: actions/upload-artifact@v4 + with: + name: wger-catalog-manifest + path: ${{ env.MANIFEST_PATH }} + if-no-files-found: error + - name: Upload diff report artifact - if: always() && steps.diff.outputs.reference_db_found == 'true' + if: always() uses: actions/upload-artifact@v4 with: name: wger-diff-report path: ${{ env.DIFF_REPORT_PATH }} if-no-files-found: error - - name: Upload diff-skip info artifact - if: always() && steps.diff.outputs.reference_db_found != 'true' + - name: Upload release notes artifact + if: always() uses: actions/upload-artifact@v4 with: - name: wger-diff-report - path: artifacts/wger_diff_skipped.txt + name: wger-release-notes + path: ${{ env.RELEASE_NOTES_PATH }} if-no-files-found: error + - name: Publish release assets (catalog channel) + id: publish_release + if: always() && env.PUBLISH_RELEASE_ASSETS == 'true' && (steps.diff.outputs.reference_db_found != 'true' || steps.diff.outcome == 'success' || env.FAIL_ON_BREAKING != 'true') + uses: ncipollo/release-action@v1 + with: + token: ${{ secrets.GITHUB_TOKEN }} + tag: ${{ env.RELEASE_TAG }} + name: ${{ env.RELEASE_NAME }} + prerelease: true + allowUpdates: true + replacesArtifacts: true + makeLatest: 'false' + artifacts: | + ${{ env.GENERATED_DB_PATH }} + ${{ env.BUILD_REPORT_PATH }} + ${{ env.DIFF_REPORT_PATH }} + ${{ env.MANIFEST_PATH }} + bodyFile: ${{ env.RELEASE_NOTES_PATH }} + - name: Publish run summary if: always() run: | @@ -108,8 +261,8 @@ jobs: build_path = os.environ["BUILD_REPORT_PATH"] diff_path = os.environ["DIFF_REPORT_PATH"] - reference_found = os.environ.get("REFERENCE_FOUND", "false") - diff_outcome = os.environ.get("DIFF_OUTCOME", "unknown") + release_page_url = os.environ.get("RELEASE_PAGE_URL", "") + publish_outcome = os.environ.get("PUBLISH_OUTCOME", "skipped") lines = ["## Wger Catalog Refresh Summary", ""] @@ -120,30 +273,27 @@ jobs: summary = build.get("summary", {}) lines.append(f"- DB version: `{bmeta.get('db_version', 'n/a')}`") lines.append(f"- Generated at: `{bmeta.get('generated_at', 'n/a')}`") - lines.append(f"- Raw exercises: `{summary.get('raw_exercise_count', 'n/a')}`") lines.append(f"- Imported: `{summary.get('imported_count', 'n/a')}`") lines.append(f"- Rejected: `{summary.get('rejected_count', 'n/a')}`") - else: - lines.append("- Build report not found.") - if reference_found == "true" and os.path.exists(diff_path): + if os.path.exists(diff_path): with open(diff_path, "r", encoding="utf-8") as f: diff = json.load(f) - ds = diff.get("summary", {}) - lines.append(f"- Removed IDs: `{ds.get('removed_count', 'n/a')}`") - lines.append(f"- Added IDs: `{ds.get('added_count', 'n/a')}`") - lines.append(f"- Safety validation outcome: `{diff_outcome}`") - elif reference_found == "true": - lines.append("- Diff step ran but no diff JSON was produced.") - else: - lines.append("- Diff skipped: reference DB not found.") + if diff.get("skipped"): + lines.append("- Diff: skipped (reference DB missing)") + else: + dsum = diff.get("summary", {}) + lines.append(f"- Removed IDs: `{dsum.get('removed_count', 'n/a')}`") + lines.append(f"- Added IDs: `{dsum.get('added_count', 'n/a')}`") + + lines.append(f"- Release publication: `{publish_outcome}`") + lines.append(f"- Catalog release page: {release_page_url}") with open(os.environ["GITHUB_STEP_SUMMARY"], "a", encoding="utf-8") as f: f.write("\n".join(lines) + "\n") PY env: - REFERENCE_FOUND: ${{ steps.diff.outputs.reference_db_found }} - DIFF_OUTCOME: ${{ steps.diff.outcome }} + PUBLISH_OUTCOME: ${{ steps.publish_release.outcome }} - name: Enforce safety gate result if: always() && steps.diff.outputs.reference_db_found == 'true' && env.FAIL_ON_BREAKING == 'true' && steps.diff.outcome == 'failure' diff --git a/README.md b/README.md index a83d696..db44aca 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,7 @@ This README is intentionally implementation-focused and reflects the **current w - [UI & Widgets](documentation/ui_and_widgets.md) - [Health Steps Module (Current Implementation)](documentation/health_steps_alpha.md) - [One-way Health Export (Current Implementation)](documentation/health_export_one_way.md) +- [Wger Catalog Refresh & Distribution](documentation/wger_catalog_refresh_system.md) - [Shared Analytics Definitions (Legacy Reference)](documentation/analytics_definitions.md) ## What Hypertrack currently supports diff --git a/assets/db/wger_catalog_manifest.json b/assets/db/wger_catalog_manifest.json new file mode 100644 index 0000000..d35263a --- /dev/null +++ b/assets/db/wger_catalog_manifest.json @@ -0,0 +1,17 @@ +{ + "source_id": "wger_catalog", + "channel": "stable", + "release_tag": "wger-catalog-stable", + "release_page_url": "https://github.com/rfivesix/hypertrack/releases/tag/wger-catalog-stable", + "asset_base_url": "https://github.com/rfivesix/hypertrack/releases/download/wger-catalog-stable/", + "version": "202512260209", + "generated_at": "2025-12-26T02:09:00Z", + "db_file": "hypertrack_training.db", + "db_url": "https://github.com/rfivesix/hypertrack/releases/download/wger-catalog-stable/hypertrack_training.db", + "build_report_file": "wger_build_report.json", + "build_report_url": "https://github.com/rfivesix/hypertrack/releases/download/wger-catalog-stable/wger_build_report.json", + "diff_report_file": "wger_diff_report.json", + "diff_report_url": "https://github.com/rfivesix/hypertrack/releases/download/wger-catalog-stable/wger_diff_report.json", + "expected_exercise_count": 789, + "min_exercise_count": 789 +} diff --git a/documentation/data_models_and_storage.md b/documentation/data_models_and_storage.md index 14cae7e..17e4a80 100644 --- a/documentation/data_models_and_storage.md +++ b/documentation/data_models_and_storage.md @@ -13,6 +13,31 @@ Main access paths currently in use: - `lib/data/product_database_helper.dart` - Sleep DAOs in `lib/features/sleep/data/persistence/dao/*` +## Exercise catalog source and refresh + +Bundled exercise seed data ships as: + +- `assets/db/hypertrack_training.db` + +Startup import path: + +- `lib/screens/app_initializer_screen.dart` -> `BasisDataManager.checkForBasisDataUpdate(...)` + +Remote refresh service: + +- `lib/services/exercise_catalog_refresh_service.dart` + +Remote source configuration is centralized in: + +- `lib/config/app_data_sources.dart` + +The app checks the release-distributed catalog manifest and can adopt a newer +catalog DB after structural validation. On any remote error, startup falls back +to the bundled asset source. + +Tracking state for remote refresh checks is kept in `SharedPreferences` keys +under the `exercise_catalog_*` namespace. + ## Core app entities (non-sleep) The traditional app model classes remain under `lib/models/*` (nutrition, workouts, measurements, supplements, chart/timeline helpers, backup serialization). diff --git a/documentation/overview.md b/documentation/overview.md index 2eb330e..c9227f8 100644 --- a/documentation/overview.md +++ b/documentation/overview.md @@ -7,6 +7,7 @@ This document describes the app as implemented in the **current working copy**. Hypertrack currently implements: - Workout tracking and analytics +- Exercise catalog refresh via release-distributed wger data artifacts - Nutrition/fluid logging - Adaptive nutrition recommendation generation (weekly due-week model with explicit manual apply) - Measurements @@ -122,5 +123,6 @@ Implemented controls include: - [Statistics module](statistics_module.md) - [Sleep current state](sleep/sleep_current_state.md) - [Health export one-way](health_export_one_way.md) +- [Wger catalog refresh & distribution](wger_catalog_refresh_system.md) - [Architecture](architecture.md) - [Data models and storage](data_models_and_storage.md) diff --git a/documentation/wger_catalog_refresh_system.md b/documentation/wger_catalog_refresh_system.md new file mode 100644 index 0000000..a561dff --- /dev/null +++ b/documentation/wger_catalog_refresh_system.md @@ -0,0 +1,171 @@ +# Wger Catalog Refresh & Distribution (Release-Asset Channel) + +This document describes the current end-to-end exercise catalog refresh system: + +- catalog generation from wger data +- safety validation +- GitHub Actions automation +- GitHub Release asset distribution +- app-side remote refresh/adoption + +## Scope and intent + +This flow distributes **data artifacts** (exercise catalog DB + metadata), not app binaries. + +- It is intentionally separate from app version release publishing. +- It is designed for safe, repeatable catalog refreshes. + +## Components + +### 1) Generator + build report + +Script: `skript/create_wger_exercise_db.py` + +Produces: + +- `hypertrack_training.db` +- build/rejection report JSON (optional via `--report-json-out`) + +The build report includes: + +- source/build timestamps +- imported/rejected counts +- fallback stats +- rejection reason breakdown + +### 2) Catalog diff safety validator + +Script: `skript/wger_catalog_diff.py` + +Compares old vs new catalog DB and emits: + +- metadata/version deltas +- removed/added IDs +- field-level changes +- warning flags +- optional CI-safe nonzero exit via `--fail-on-breaking` + +### 3) GitHub Actions refresh workflow + +Workflow: `.github/workflows/wger-catalog-refresh.yml` + +Triggers: + +- manual (`workflow_dispatch`) +- scheduled weekly refresh + +Build outputs: + +- `hypertrack_training.db` +- `wger_build_report.json` +- `wger_diff_report.json` +- `wger_catalog_manifest.json` + +Safety gate: + +- diff step can run with `--fail-on-breaking` +- workflow fails on dangerous changes when enabled +- artifacts are still uploaded for inspection + +Distribution: + +- workflow publishes generated files to a dedicated rolling GitHub Release tag: + - tag: `wger-catalog-stable` + - release assets: DB + manifest + reports + +## Distribution channel design + +Runtime channel is GitHub Release assets, not raw repository files. + +Stable release download base: + +- `https://github.com///releases/download/wger-catalog-stable/` + +Key assets: + +- `wger_catalog_manifest.json` (canonical discovery doc) +- `hypertrack_training.db` (catalog payload) +- `wger_build_report.json` (diagnostics) +- `wger_diff_report.json` (safety diagnostics) + +## Manifest schema (runtime contract) + +The manifest is the canonical app discovery document. + +Important fields: + +- `source_id` +- `channel` +- `release_tag` +- `release_page_url` +- `asset_base_url` +- `version` +- `generated_at` +- `db_file` +- `db_url` +- `build_report_file` +- `build_report_url` +- `diff_report_file` +- `diff_report_url` +- `expected_exercise_count` +- `min_exercise_count` + +The app parser supports: + +- absolute URLs (`db_url`, `build_report_url`) +- release-style file keys resolved against `asset_base_url` (`db_file`, `build_report_file`) + +## App-side integration + +Central source config: + +- `lib/config/app_data_sources.dart` + +Remote refresh service: + +- `lib/services/exercise_catalog_refresh_service.dart` + +Responsibilities: + +- fetch manifest +- decide if remote version is newer +- download DB +- validate DB file/tables/columns/version/row-count threshold +- cache validated DB + manifest snapshot +- track last-check/last-error/version state in `SharedPreferences` + +Startup integration: + +- `lib/screens/app_initializer_screen.dart` -> `BasisDataManager.checkForBasisDataUpdate(...)` +- `lib/data/basis_data_manager.dart` attempts remote exercise candidate first +- on any remote failure/invalid payload, import falls back to bundled asset DB safely + +## Data safety behavior + +- Import path is currently non-destructive (`insertOrReplace`) for base exercises. +- Existing routine/history links are preserved by avoiding hard delete sweeps of exercise IDs. +- Remote validation is structural/sanity-level, not cryptographic signature verification. + +## Operational testing checklist (before broader rollout) + +1. Run workflow manually with `publish_release_assets=true`. +2. Verify release `wger-catalog-stable` assets were replaced. +3. Verify manifest version/URLs match uploaded assets. +4. Install app build using this code. +5. Trigger app startup with network enabled and confirm remote catalog adoption. +6. Confirm startup remains stable with network disabled or manifest fetch failure. +7. Verify routines/history still resolve exercises and analytics remain functional. + +## Changing source/channel configuration + +Update only the central config in: + +- `lib/config/app_data_sources.dart` + +Typical changes: + +- move to a different repo/org +- rename release tag/channel +- move to another hosting base URL + +Feature logic should not require URL/path edits outside this file. diff --git a/lib/config/app_data_sources.dart b/lib/config/app_data_sources.dart new file mode 100644 index 0000000..641cc00 --- /dev/null +++ b/lib/config/app_data_sources.dart @@ -0,0 +1,72 @@ +/// Central configuration for bundled and remote data sources. +/// +/// Keep URLs, asset paths, and naming conventions here so feature logic +/// doesn't hardcode environment-specific locations. +class AppDataSources { + const AppDataSources._(); + + // Bundled assets + static const String trainingAssetDbPath = 'assets/db/hypertrack_training.db'; + static const String baseFoodsAssetDbPath = + 'assets/db/hypertrack_base_foods.db'; + static const String offFoodsAssetDbPath = 'assets/db/hypertrack_prep_de.db'; + static const String foodCategoriesAssetDbPath = + 'assets/db/hypertrack_base_foods.db'; + + // Remote training-catalog source (wger-based build output channel). + static const exerciseCatalog = ExerciseCatalogRemoteSourceConfig( + enabled: true, + sourceId: 'wger_catalog', + channel: 'stable', + baseUrl: + 'https://github.com/rfivesix/hypertrack/releases/download/wger-catalog-stable/', + manifestPath: 'wger_catalog_manifest.json', + defaultDbPath: 'hypertrack_training.db', + defaultBuildReportPath: 'wger_build_report.json', + localCacheDirectoryName: 'catalog_refresh', + localCacheDbFileName: 'hypertrack_training_remote.db', + localManifestFileName: 'wger_catalog_manifest_cached.json', + manifestTimeoutSeconds: 6, + downloadTimeoutSeconds: 30, + minCheckIntervalHours: 12, + minimumExerciseRows: 50, + ); +} + +class ExerciseCatalogRemoteSourceConfig { + final bool enabled; + final String sourceId; + final String channel; + final String baseUrl; + final String manifestPath; + final String defaultDbPath; + final String defaultBuildReportPath; + final String localCacheDirectoryName; + final String localCacheDbFileName; + final String localManifestFileName; + final int manifestTimeoutSeconds; + final int downloadTimeoutSeconds; + final int minCheckIntervalHours; + final int minimumExerciseRows; + + const ExerciseCatalogRemoteSourceConfig({ + required this.enabled, + required this.sourceId, + required this.channel, + required this.baseUrl, + required this.manifestPath, + required this.defaultDbPath, + required this.defaultBuildReportPath, + required this.localCacheDirectoryName, + required this.localCacheDbFileName, + required this.localManifestFileName, + required this.manifestTimeoutSeconds, + required this.downloadTimeoutSeconds, + required this.minCheckIntervalHours, + required this.minimumExerciseRows, + }); + + Duration get manifestTimeout => Duration(seconds: manifestTimeoutSeconds); + Duration get downloadTimeout => Duration(seconds: downloadTimeoutSeconds); + Duration get minCheckInterval => Duration(hours: minCheckIntervalHours); +} diff --git a/lib/data/basis_data_manager.dart b/lib/data/basis_data_manager.dart index 4b919e9..1877c9c 100644 --- a/lib/data/basis_data_manager.dart +++ b/lib/data/basis_data_manager.dart @@ -2,6 +2,7 @@ import 'dart:io'; +import 'package:flutter/foundation.dart'; import 'package:flutter/services.dart' show rootBundle; import 'database_helper.dart'; import 'drift_database.dart'; @@ -11,6 +12,9 @@ import 'package:shared_preferences/shared_preferences.dart'; import 'package:sqflite/sqflite.dart' as sqflite; import 'package:drift/drift.dart' as drift; +import '../config/app_data_sources.dart'; +import '../services/exercise_catalog_refresh_service.dart'; + // Typ-Definition für den Callback typedef ProgressCallback = void Function( String task, String detail, double progress); @@ -58,10 +62,12 @@ class BasisDataManager { String key, String table, Function(Map) mapper, { + String? sourceFilePath, String? driftTable, }) async { - await _updateDatabaseFromAsset( + await _updateDatabaseFromSource( assetPath: asset, + sourceFilePath: sourceFilePath, prefKey: key, prefs: prefs, tableName: table, @@ -73,19 +79,46 @@ class BasisDataManager { ); } - // 1. Übungen + String? remoteTrainingDbPath; + final installedTrainingVersion = + prefs.getString(_keyVersionTraining) ?? '0'; + try { + onProgress?.call( + "Prüfe Übungen...", + "Suche nach Remote-Katalog-Updates...", + 0.0, + ); + final remoteCandidate = + await ExerciseCatalogRefreshService.instance.prepareUpdateCandidate( + installedVersion: installedTrainingVersion, + force: force, + ); + if (remoteCandidate != null) { + remoteTrainingDbPath = remoteCandidate.localDbPath; + onProgress?.call( + "Update Übungen", + "Remote-Katalog ${remoteCandidate.version} gefunden.", + 0.02, + ); + } + } catch (e) { + debugPrint('Remote exercise catalog check skipped safely: $e'); + } + + // 1. Übungen (Remote-Candidate wenn verfügbar, sonst Asset) await process( 'Übungen', - 'assets/db/hypertrack_training.db', + AppDataSources.trainingAssetDbPath, _keyVersionTraining, 'exercises', _mapExerciseRow, + sourceFilePath: remoteTrainingDbPath, ); // 2a. Base Foods await process( 'Basis-Produkte', - 'assets/db/hypertrack_base_foods.db', + AppDataSources.baseFoodsAssetDbPath, _keyVersionFood, 'products', (row) => _mapProductRow(row, sourceLabel: 'base'), @@ -94,7 +127,7 @@ class BasisDataManager { // 2b. Kategorien await process( 'Kategorien', - 'assets/db/hypertrack_base_foods.db', + AppDataSources.foodCategoriesAssetDbPath, _keyVersionCats, 'categories', _mapCategoryRow, @@ -104,15 +137,16 @@ class BasisDataManager { // 3. OFF Datenbank (Das große File) await process( 'Produktdatenbank', - 'assets/db/hypertrack_prep_de.db', + AppDataSources.offFoodsAssetDbPath, _keyVersionOff, 'products', (row) => _mapProductRow(row, sourceLabel: 'off'), ); } - Future _updateDatabaseFromAsset({ + Future _updateDatabaseFromSource({ required String assetPath, + String? sourceFilePath, required String prefKey, required SharedPreferences prefs, required String tableName, @@ -129,23 +163,37 @@ class BasisDataManager { // Initiale Meldung (0%) onProgress?.call("Prüfe $taskLabel...", "Initialisiere...", 0.0); - final tempDir = await getTemporaryDirectory(); - final tempPath = p.join(tempDir.path, p.basename(assetPath)); - - try { - final byteData = await rootBundle.load(assetPath); - tempFile = File(tempPath); - await tempFile.writeAsBytes( - byteData.buffer.asUint8List( - byteData.offsetInBytes, - byteData.lengthInBytes, - ), - ); - } catch (e) { - return; + if (sourceFilePath != null && + sourceFilePath.isNotEmpty && + await File(sourceFilePath).exists()) { + try { + assetDb = await sqflite.openDatabase(sourceFilePath, readOnly: true); + } catch (e) { + debugPrint( + 'Falling back to bundled asset for $taskLabel (remote source failed): $e', + ); + } } - assetDb = await sqflite.openDatabase(tempPath, readOnly: true); + if (assetDb == null) { + final tempDir = await getTemporaryDirectory(); + final tempPath = p.join(tempDir.path, p.basename(assetPath)); + + try { + final byteData = await rootBundle.load(assetPath); + tempFile = File(tempPath); + await tempFile.writeAsBytes( + byteData.buffer.asUint8List( + byteData.offsetInBytes, + byteData.lengthInBytes, + ), + ); + } catch (e) { + return; + } + + assetDb = await sqflite.openDatabase(tempPath, readOnly: true); + } var checkTable = tableName; if (tableName == 'exercises') { @@ -336,7 +384,9 @@ class BasisDataManager { mode: drift.InsertMode.insertOrReplace, ); } - } catch (e) {} + } catch (e) { + debugPrint('Skipping malformed import row for $taskLabel: $e'); + } } }); diff --git a/lib/services/db_service.dart b/lib/services/db_service.dart index 7b28d80..0a964c3 100644 --- a/lib/services/db_service.dart +++ b/lib/services/db_service.dart @@ -3,6 +3,8 @@ import 'package:flutter/services.dart' show rootBundle; import 'package:path/path.dart' as p; import 'package:sqflite/sqflite.dart'; +import '../config/app_data_sources.dart'; + /// Service responsible for managing the local SQLite database. /// /// Handles database initialization, asset copying for pre-populated data, @@ -28,7 +30,7 @@ class DbService { // Falls Datei noch nicht existiert: aus Assets kopieren if (!await File(dbPath).exists()) { - final bytes = await rootBundle.load('assets/db/hypertrack_training.db'); + final bytes = await rootBundle.load(AppDataSources.trainingAssetDbPath); await File(dbPath).writeAsBytes( bytes.buffer.asUint8List(bytes.offsetInBytes, bytes.lengthInBytes), flush: true, diff --git a/lib/services/exercise_catalog_refresh_service.dart b/lib/services/exercise_catalog_refresh_service.dart new file mode 100644 index 0000000..171add9 --- /dev/null +++ b/lib/services/exercise_catalog_refresh_service.dart @@ -0,0 +1,617 @@ +import 'dart:convert'; +import 'dart:io'; + +import 'package:flutter/foundation.dart'; +import 'package:http/http.dart' as http; +import 'package:path/path.dart' as p; +import 'package:path_provider/path_provider.dart'; +import 'package:shared_preferences/shared_preferences.dart'; +import 'package:sqflite/sqflite.dart' as sqflite; + +import '../config/app_data_sources.dart'; + +class ExerciseCatalogManifest { + final String version; + final Uri dbUri; + final Uri? buildReportUri; + final String sourceId; + final String channel; + final DateTime? generatedAt; + final int? minimumExerciseRows; + + const ExerciseCatalogManifest({ + required this.version, + required this.dbUri, + required this.buildReportUri, + required this.sourceId, + required this.channel, + required this.generatedAt, + required this.minimumExerciseRows, + }); +} + +class ExerciseCatalogUpdateCandidate { + final String version; + final String localDbPath; + final Uri manifestUri; + final Uri dbUri; + final bool fromCache; + + const ExerciseCatalogUpdateCandidate({ + required this.version, + required this.localDbPath, + required this.manifestUri, + required this.dbUri, + required this.fromCache, + }); +} + +class ExerciseCatalogRefreshSnapshot { + final String installedVersion; + final String? cachedVersion; + final String? lastKnownRemoteVersion; + final DateTime? lastCheckedAt; + final String? lastError; + + const ExerciseCatalogRefreshSnapshot({ + required this.installedVersion, + required this.cachedVersion, + required this.lastKnownRemoteVersion, + required this.lastCheckedAt, + required this.lastError, + }); +} + +typedef NowProvider = DateTime Function(); +typedef SupportDirectoryProvider = Future Function(); +typedef TempDirectoryProvider = Future Function(); +typedef PrefsProvider = Future Function(); + +/// Handles remote exercise-catalog update discovery, download, and validation. +/// +/// The service keeps network/source details in central config and degrades +/// gracefully by returning `null` on any remote failure. +class ExerciseCatalogRefreshService { + ExerciseCatalogRefreshService._({ + http.Client? httpClient, + ExerciseCatalogRemoteSourceConfig? config, + NowProvider? nowProvider, + SupportDirectoryProvider? supportDirectoryProvider, + TempDirectoryProvider? tempDirectoryProvider, + PrefsProvider? prefsProvider, + }) : _httpClient = httpClient ?? http.Client(), + _config = config ?? AppDataSources.exerciseCatalog, + _nowProvider = nowProvider ?? DateTime.now, + _supportDirectoryProvider = + supportDirectoryProvider ?? getApplicationSupportDirectory, + _tempDirectoryProvider = tempDirectoryProvider ?? getTemporaryDirectory, + _prefsProvider = prefsProvider ?? SharedPreferences.getInstance; + + static final ExerciseCatalogRefreshService instance = + ExerciseCatalogRefreshService._(); + + @visibleForTesting + factory ExerciseCatalogRefreshService.forTesting({ + http.Client? httpClient, + ExerciseCatalogRemoteSourceConfig? config, + NowProvider? nowProvider, + SupportDirectoryProvider? supportDirectoryProvider, + TempDirectoryProvider? tempDirectoryProvider, + PrefsProvider? prefsProvider, + }) { + return ExerciseCatalogRefreshService._( + httpClient: httpClient, + config: config, + nowProvider: nowProvider, + supportDirectoryProvider: supportDirectoryProvider, + tempDirectoryProvider: tempDirectoryProvider, + prefsProvider: prefsProvider, + ); + } + + final http.Client _httpClient; + final ExerciseCatalogRemoteSourceConfig _config; + final NowProvider _nowProvider; + final SupportDirectoryProvider _supportDirectoryProvider; + final TempDirectoryProvider _tempDirectoryProvider; + final PrefsProvider _prefsProvider; + + static const String _keyLastRemoteVersion = + 'exercise_catalog_last_remote_version'; + static const String _keyLastCheckedAtMs = 'exercise_catalog_last_checked_at'; + static const String _keyCachedCatalogVersion = + 'exercise_catalog_cached_version'; + static const String _keyLastError = 'exercise_catalog_last_error'; + + static const Set _requiredTables = {'exercises', 'metadata'}; + static const Set _requiredExerciseColumns = { + 'id', + 'name_de', + 'name_en', + 'description_de', + 'description_en', + 'category_name', + 'muscles_primary', + 'muscles_secondary', + }; + + Future prepareUpdateCandidate({ + required String installedVersion, + bool force = false, + }) async { + if (!_config.enabled) { + return null; + } + + final prefs = await _prefsProvider(); + final cachePath = await _cachedDbPath(); + final manifestUri = _resolveUrlOrPath( + _config.baseUrl, + _config.manifestPath, + ); + + // If a valid cached catalog exists and is newer than installed, use it. + final cachedVersion = prefs.getString(_keyCachedCatalogVersion); + if (cachedVersion != null && + isRemoteVersionNewer( + remoteVersion: cachedVersion, + installedVersion: installedVersion, + )) { + final cachedValidation = await _validateCatalogDb( + dbPath: cachePath, + expectedVersion: cachedVersion, + minimumRows: _config.minimumExerciseRows, + ); + if (cachedValidation.isValid) { + return ExerciseCatalogUpdateCandidate( + version: cachedVersion, + localDbPath: cachePath, + manifestUri: manifestUri, + dbUri: Uri.file(cachePath), + fromCache: true, + ); + } + } + + final now = _nowProvider(); + final lastCheckedMs = prefs.getInt(_keyLastCheckedAtMs); + if (!force && + !shouldCheckRemoteNow( + now: now, + lastCheckedEpochMs: lastCheckedMs, + minCheckInterval: _config.minCheckInterval, + )) { + return null; + } + await prefs.setInt(_keyLastCheckedAtMs, now.millisecondsSinceEpoch); + + try { + final manifest = await _fetchManifest(manifestUri); + if (manifest == null) { + await prefs.setString( + _keyLastError, + 'Manifest fetch failed or invalid payload.', + ); + return null; + } + + await prefs.setString(_keyLastRemoteVersion, manifest.version); + + final shouldDownload = force || + isRemoteVersionNewer( + remoteVersion: manifest.version, + installedVersion: installedVersion, + ); + if (!shouldDownload) { + await prefs.remove(_keyLastError); + return null; + } + + final tempDir = await _tempDirectoryProvider(); + final tempDbPath = p.join( + tempDir.path, + 'hypertrack_training_remote_${now.millisecondsSinceEpoch}.db', + ); + + final downloaded = await _downloadFile( + manifest.dbUri, + tempDbPath, + timeout: _config.downloadTimeout, + ); + if (!downloaded) { + await prefs.setString( + _keyLastError, + 'Download failed for ${manifest.dbUri}', + ); + return null; + } + + final validated = await _validateCatalogDb( + dbPath: tempDbPath, + expectedVersion: manifest.version, + minimumRows: + manifest.minimumExerciseRows ?? _config.minimumExerciseRows, + ); + if (!validated.isValid) { + await prefs.setString( + _keyLastError, + validated.error ?? 'Downloaded DB validation failed.', + ); + await _deleteIfExists(tempDbPath); + return null; + } + + await File(cachePath).parent.create(recursive: true); + await _deleteIfExists(cachePath); + await File(tempDbPath).copy(cachePath); + await _deleteIfExists(tempDbPath); + + await prefs.setString(_keyCachedCatalogVersion, manifest.version); + await prefs.remove(_keyLastError); + + await _cacheManifestJson( + manifestUri: manifestUri, + manifest: manifest, + ); + + return ExerciseCatalogUpdateCandidate( + version: manifest.version, + localDbPath: cachePath, + manifestUri: manifestUri, + dbUri: manifest.dbUri, + fromCache: false, + ); + } catch (e) { + await prefs.setString(_keyLastError, e.toString()); + debugPrint('Exercise catalog refresh skipped (safe fallback): $e'); + return null; + } + } + + Future readSnapshot({ + required String installedVersion, + }) async { + final prefs = await _prefsProvider(); + final lastCheckedMs = prefs.getInt(_keyLastCheckedAtMs); + return ExerciseCatalogRefreshSnapshot( + installedVersion: installedVersion, + cachedVersion: prefs.getString(_keyCachedCatalogVersion), + lastKnownRemoteVersion: prefs.getString(_keyLastRemoteVersion), + lastCheckedAt: lastCheckedMs == null + ? null + : DateTime.fromMillisecondsSinceEpoch(lastCheckedMs), + lastError: prefs.getString(_keyLastError), + ); + } + + Future _cachedDbPath() async { + final supportDir = await _supportDirectoryProvider(); + final cacheDir = Directory( + p.join(supportDir.path, _config.localCacheDirectoryName), + ); + if (!await cacheDir.exists()) { + await cacheDir.create(recursive: true); + } + return p.join(cacheDir.path, _config.localCacheDbFileName); + } + + Future _cacheManifestJson({ + required Uri manifestUri, + required ExerciseCatalogManifest manifest, + }) async { + final supportDir = await _supportDirectoryProvider(); + final cacheDir = Directory( + p.join(supportDir.path, _config.localCacheDirectoryName), + ); + if (!await cacheDir.exists()) { + await cacheDir.create(recursive: true); + } + final manifestFile = File( + p.join(cacheDir.path, _config.localManifestFileName), + ); + final map = { + 'source_id': manifest.sourceId, + 'channel': manifest.channel, + 'version': manifest.version, + 'generated_at': manifest.generatedAt?.toIso8601String(), + 'db_url': manifest.dbUri.toString(), + 'build_report_url': manifest.buildReportUri?.toString(), + 'manifest_url': manifestUri.toString(), + 'minimum_exercise_rows': manifest.minimumExerciseRows, + 'cached_at': _nowProvider().toIso8601String(), + }; + await manifestFile.writeAsString( + jsonEncode(map), + flush: true, + ); + } + + Future _fetchManifest(Uri manifestUri) async { + final response = await _httpClient.get(manifestUri, headers: const { + 'Accept': 'application/json' + }).timeout(_config.manifestTimeout); + + if (response.statusCode < 200 || response.statusCode >= 300) { + return null; + } + + final decoded = jsonDecode(utf8.decode(response.bodyBytes)); + if (decoded is! Map) { + return null; + } + return parseManifest(decoded, _config); + } + + static ExerciseCatalogManifest? parseManifest( + Map json, + ExerciseCatalogRemoteSourceConfig config, + ) { + final build = json['build'] is Map + ? json['build'] as Map + : const {}; + + final version = _firstNonBlankString([ + json['version'], + json['db_version'], + build['db_version'], + ]); + if (version == null) { + return null; + } + + final sourceId = + _firstNonBlankString([json['source_id'], build['source_id']]) ?? + config.sourceId; + final channel = _firstNonBlankString([json['channel'], build['channel']]) ?? + config.channel; + final effectiveBaseUrl = _firstNonBlankString([ + json['asset_base_url'], + json['download_base_url'], + json['base_url'], + ]) ?? + config.baseUrl; + + final dbUri = _resolveFromManifest( + baseUrl: effectiveBaseUrl, + urlValue: _firstNonBlankString([ + json['db_url'], + json['database_url'], + ]), + pathValue: _firstNonBlankString([ + json['db_path'], + json['database_path'], + json['db_file'], + ]), + fallbackPath: config.defaultDbPath, + ); + if (dbUri == null) { + return null; + } + + final buildReportUri = _resolveFromManifest( + baseUrl: effectiveBaseUrl, + urlValue: _firstNonBlankString([ + json['build_report_url'], + json['report_url'], + ]), + pathValue: _firstNonBlankString([ + json['build_report_file'], + json['build_report_path'], + json['report_path'], + ]), + fallbackPath: config.defaultBuildReportPath, + ); + + final generatedAtRaw = + _firstNonBlankString([json['generated_at'], build['generated_at']]); + final generatedAt = + generatedAtRaw != null ? DateTime.tryParse(generatedAtRaw) : null; + + final minimumRows = _parseInt(json['minimum_exercise_rows']) ?? + _parseInt(json['min_exercise_count']) ?? + _parseInt(json['expected_exercise_count']) ?? + _parseInt(json['min_rows']); + + return ExerciseCatalogManifest( + version: version, + dbUri: dbUri, + buildReportUri: buildReportUri, + sourceId: sourceId, + channel: channel, + generatedAt: generatedAt, + minimumExerciseRows: minimumRows, + ); + } + + static bool isRemoteVersionNewer({ + required String remoteVersion, + required String installedVersion, + }) { + final normalizedRemote = remoteVersion.trim(); + final normalizedInstalled = installedVersion.trim(); + if (normalizedRemote.isEmpty) return false; + if (normalizedInstalled.isEmpty) return true; + return normalizedRemote.compareTo(normalizedInstalled) > 0; + } + + static bool shouldCheckRemoteNow({ + required DateTime now, + required int? lastCheckedEpochMs, + required Duration minCheckInterval, + }) { + if (lastCheckedEpochMs == null) return true; + final lastChecked = DateTime.fromMillisecondsSinceEpoch(lastCheckedEpochMs); + return now.difference(lastChecked) >= minCheckInterval; + } + + Future _downloadFile( + Uri uri, + String destinationPath, { + required Duration timeout, + }) async { + final response = await _httpClient.get(uri).timeout(timeout); + if (response.statusCode < 200 || response.statusCode >= 300) { + return false; + } + final file = File(destinationPath); + await file.parent.create(recursive: true); + await file.writeAsBytes(response.bodyBytes, flush: true); + return true; + } + + Future<_CatalogDbValidationResult> _validateCatalogDb({ + required String dbPath, + required String expectedVersion, + required int minimumRows, + }) async { + if (!await File(dbPath).exists()) { + return const _CatalogDbValidationResult( + isValid: false, + error: 'Catalog DB file is missing.', + ); + } + + sqflite.Database? db; + try { + db = await sqflite.openDatabase(dbPath, readOnly: true); + final tableRows = await db.rawQuery( + "SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'", + ); + final tables = tableRows + .map((row) => row['name']?.toString()) + .whereType() + .toSet(); + if (!_requiredTables.every(tables.contains)) { + return const _CatalogDbValidationResult( + isValid: false, + error: 'Catalog DB missing required tables.', + ); + } + + final pragmaRows = await db.rawQuery('PRAGMA table_info(exercises)'); + final columns = pragmaRows + .map((row) => row['name']?.toString()) + .whereType() + .toSet(); + if (!_requiredExerciseColumns.every(columns.contains)) { + return const _CatalogDbValidationResult( + isValid: false, + error: 'Catalog DB missing required exercise columns.', + ); + } + + final versionRows = await db.query( + 'metadata', + where: 'key = ?', + whereArgs: ['version'], + ); + final version = versionRows.isNotEmpty + ? (versionRows.first['value']?.toString().trim() ?? '') + : ''; + if (version.isEmpty) { + return const _CatalogDbValidationResult( + isValid: false, + error: 'Catalog DB metadata.version is missing.', + ); + } + if (expectedVersion.isNotEmpty && version != expectedVersion) { + return _CatalogDbValidationResult( + isValid: false, + error: + 'Catalog DB version mismatch. expected=$expectedVersion actual=$version', + ); + } + + final countRows = + await db.rawQuery('SELECT COUNT(*) as c FROM exercises'); + final rowCount = sqflite.Sqflite.firstIntValue(countRows) ?? 0; + if (rowCount < minimumRows) { + return _CatalogDbValidationResult( + isValid: false, + error: + 'Catalog DB row count too low. count=$rowCount minimum=$minimumRows', + ); + } + + return _CatalogDbValidationResult( + isValid: true, + version: version, + rowCount: rowCount, + ); + } catch (e) { + return _CatalogDbValidationResult( + isValid: false, + error: 'Catalog DB validation failed: $e', + ); + } finally { + await db?.close(); + } + } + + Future _deleteIfExists(String filePath) async { + final file = File(filePath); + if (await file.exists()) { + await file.delete(); + } + } + + static Uri _resolveUrlOrPath(String baseUrl, String value) { + final parsed = Uri.tryParse(value); + if (parsed != null && parsed.hasScheme) { + return parsed; + } + final base = Uri.parse(baseUrl); + return base.resolve(value); + } + + static Uri? _resolveFromManifest({ + required String baseUrl, + required String? urlValue, + required String? pathValue, + required String fallbackPath, + }) { + final preferred = urlValue?.trim(); + if (preferred != null && preferred.isNotEmpty) { + final uri = Uri.tryParse(preferred); + if (uri != null && uri.hasScheme) return uri; + return _resolveUrlOrPath(baseUrl, preferred); + } + + final path = (pathValue?.trim().isNotEmpty ?? false) + ? pathValue!.trim() + : fallbackPath; + if (path.isEmpty) return null; + return _resolveUrlOrPath(baseUrl, path); + } + + static String? _firstNonBlankString(List values) { + for (final value in values) { + final text = value?.toString().trim() ?? ''; + if (text.isNotEmpty) { + return text; + } + } + return null; + } + + static int? _parseInt(dynamic value) { + if (value == null) return null; + if (value is int) return value; + if (value is num) return value.toInt(); + return int.tryParse(value.toString()); + } +} + +class _CatalogDbValidationResult { + final bool isValid; + final String? version; + final int? rowCount; + final String? error; + + const _CatalogDbValidationResult({ + required this.isValid, + this.version, + this.rowCount, + this.error, + }); +} diff --git a/test/services/exercise_catalog_refresh_service_test.dart b/test/services/exercise_catalog_refresh_service_test.dart new file mode 100644 index 0000000..f7bcea8 --- /dev/null +++ b/test/services/exercise_catalog_refresh_service_test.dart @@ -0,0 +1,134 @@ +import 'package:flutter_test/flutter_test.dart'; +import 'package:hypertrack/config/app_data_sources.dart'; +import 'package:hypertrack/services/exercise_catalog_refresh_service.dart'; + +void main() { + const config = ExerciseCatalogRemoteSourceConfig( + enabled: true, + sourceId: 'wger_catalog', + channel: 'stable', + baseUrl: 'https://example.com/root/', + manifestPath: 'manifest.json', + defaultDbPath: 'db/hypertrack_training.db', + defaultBuildReportPath: 'reports/wger_build_report.json', + localCacheDirectoryName: 'catalog_refresh', + localCacheDbFileName: 'hypertrack_training_remote.db', + localManifestFileName: 'wger_manifest_cached.json', + manifestTimeoutSeconds: 5, + downloadTimeoutSeconds: 15, + minCheckIntervalHours: 6, + minimumExerciseRows: 50, + ); + + group('ExerciseCatalogRefreshService.parseManifest', () { + test('parses relative db/report paths against base url', () { + final manifest = ExerciseCatalogRefreshService.parseManifest({ + 'version': '202601010001', + 'db_path': 'artifacts/hypertrack_training.db', + 'build_report_path': 'artifacts/wger_build_report.json', + 'min_exercise_count': 123, + }, config); + + expect(manifest, isNotNull); + expect( + manifest!.dbUri.toString(), + 'https://example.com/root/artifacts/hypertrack_training.db', + ); + expect( + manifest.buildReportUri!.toString(), + 'https://example.com/root/artifacts/wger_build_report.json', + ); + expect(manifest.minimumExerciseRows, 123); + expect(manifest.version, '202601010001'); + }); + + test('uses absolute urls from manifest directly', () { + final manifest = ExerciseCatalogRefreshService.parseManifest({ + 'version': '202602020002', + 'db_url': 'https://cdn.example.net/catalog/training.db', + }, config); + + expect(manifest, isNotNull); + expect( + manifest!.dbUri.toString(), + 'https://cdn.example.net/catalog/training.db', + ); + expect(manifest.sourceId, 'wger_catalog'); + expect(manifest.channel, 'stable'); + }); + + test('supports release-style manifest with asset_base_url and *_file keys', + () { + final manifest = ExerciseCatalogRefreshService.parseManifest({ + 'version': '202603030003', + 'asset_base_url': 'https://github.com/org/repo/releases/download/tag/', + 'db_file': 'hypertrack_training.db', + 'build_report_file': 'wger_build_report.json', + 'expected_exercise_count': 777, + }, config); + + expect(manifest, isNotNull); + expect( + manifest!.dbUri.toString(), + 'https://github.com/org/repo/releases/download/tag/hypertrack_training.db', + ); + expect( + manifest.buildReportUri!.toString(), + 'https://github.com/org/repo/releases/download/tag/wger_build_report.json', + ); + expect(manifest.minimumExerciseRows, 777); + }); + }); + + group('ExerciseCatalogRefreshService version/check logic', () { + test('detects newer remote version', () { + expect( + ExerciseCatalogRefreshService.isRemoteVersionNewer( + remoteVersion: '202701010001', + installedVersion: '202601010001', + ), + isTrue, + ); + expect( + ExerciseCatalogRefreshService.isRemoteVersionNewer( + remoteVersion: '202601010001', + installedVersion: '202601010001', + ), + isFalse, + ); + }); + + test('respects minimum remote-check interval', () { + final now = DateTime(2026, 4, 12, 12, 0, 0); + final oneHourAgo = now.subtract(const Duration(hours: 1)); + final sevenHoursAgo = now.subtract(const Duration(hours: 7)); + + expect( + ExerciseCatalogRefreshService.shouldCheckRemoteNow( + now: now, + lastCheckedEpochMs: null, + minCheckInterval: const Duration(hours: 6), + ), + isTrue, + ); + + expect( + ExerciseCatalogRefreshService.shouldCheckRemoteNow( + now: now, + lastCheckedEpochMs: oneHourAgo.millisecondsSinceEpoch, + minCheckInterval: const Duration(hours: 6), + ), + isFalse, + ); + + expect( + ExerciseCatalogRefreshService.shouldCheckRemoteNow( + now: now, + lastCheckedEpochMs: sevenHoursAgo.millisecondsSinceEpoch, + minCheckInterval: const Duration(hours: 6), + ), + isTrue, + ); + }); + }); +} From 375ac079a94641d20ce0fab669dda7cead6398f8 Mon Sep 17 00:00:00 2001 From: Richard Georg Schotte Date: Mon, 13 Apr 2026 00:29:57 +0200 Subject: [PATCH 5/5] 0.8.4 v1 --- .github/workflows/wger-catalog-refresh.yml | 19 +++- assets/db/wger_catalog_manifest.json | 3 +- documentation/wger_catalog_refresh_system.md | 25 +++++ .../exercise_catalog_refresh_service.dart | 84 +++++++++++++++- ...exercise_catalog_refresh_service_test.dart | 95 ++++++++++++++++++- 5 files changed, 218 insertions(+), 8 deletions(-) diff --git a/.github/workflows/wger-catalog-refresh.yml b/.github/workflows/wger-catalog-refresh.yml index 0e3c931..c34c771 100644 --- a/.github/workflows/wger-catalog-refresh.yml +++ b/.github/workflows/wger-catalog-refresh.yml @@ -96,7 +96,9 @@ jobs: - name: Build catalog manifest artifact run: | python - <<'PY' + import hashlib import json + import math import os build_path = os.environ["BUILD_REPORT_PATH"] @@ -119,7 +121,19 @@ jobs: build_report_file = os.path.basename(build_path) diff_report_file = os.path.basename(diff_path) + def sha256_file(path: str) -> str: + h = hashlib.sha256() + with open(path, "rb") as f: + for chunk in iter(lambda: f.read(1024 * 1024), b""): + h.update(chunk) + return h.hexdigest() + + db_sha256 = sha256_file(os.environ["GENERATED_DB_PATH"]) + build_report_sha256 = sha256_file(build_path) + diff_report_sha256 = sha256_file(diff_path) if os.path.exists(diff_path) else None + imported_count = int(summary.get("imported_count", 0) or 0) + min_exercise_count = max(50, math.floor(imported_count * 0.85)) manifest = { "source_id": "wger_catalog", "channel": os.environ.get("RELEASE_CHANNEL", "stable"), @@ -130,12 +144,15 @@ jobs: "generated_at": build.get("generated_at"), "db_file": db_file, "db_url": f"{release_base}{db_file}", + "db_sha256": db_sha256, "build_report_file": build_report_file, "build_report_url": f"{release_base}{build_report_file}", + "build_report_sha256": build_report_sha256, "diff_report_file": diff_report_file, "diff_report_url": f"{release_base}{diff_report_file}", + "diff_report_sha256": diff_report_sha256, "expected_exercise_count": imported_count, - "min_exercise_count": imported_count if imported_count > 0 else 50, + "min_exercise_count": int(min_exercise_count), "safety": { "diff_skipped": bool(diff_report.get("skipped", False)), "diff_removed_count": diff_report.get("summary", {}).get("removed_count"), diff --git a/assets/db/wger_catalog_manifest.json b/assets/db/wger_catalog_manifest.json index d35263a..60cca90 100644 --- a/assets/db/wger_catalog_manifest.json +++ b/assets/db/wger_catalog_manifest.json @@ -8,10 +8,11 @@ "generated_at": "2025-12-26T02:09:00Z", "db_file": "hypertrack_training.db", "db_url": "https://github.com/rfivesix/hypertrack/releases/download/wger-catalog-stable/hypertrack_training.db", + "db_sha256": "77967b5658cb841cdfbef830088167289f3e30000bc64b101fad5de1281fd12a", "build_report_file": "wger_build_report.json", "build_report_url": "https://github.com/rfivesix/hypertrack/releases/download/wger-catalog-stable/wger_build_report.json", "diff_report_file": "wger_diff_report.json", "diff_report_url": "https://github.com/rfivesix/hypertrack/releases/download/wger-catalog-stable/wger_diff_report.json", "expected_exercise_count": 789, - "min_exercise_count": 789 + "min_exercise_count": 670 } diff --git a/documentation/wger_catalog_refresh_system.md b/documentation/wger_catalog_refresh_system.md index a561dff..56f6d9c 100644 --- a/documentation/wger_catalog_refresh_system.md +++ b/documentation/wger_catalog_refresh_system.md @@ -103,6 +103,7 @@ Important fields: - `generated_at` - `db_file` - `db_url` +- `db_sha256` - `build_report_file` - `build_report_url` - `diff_report_file` @@ -110,6 +111,28 @@ Important fields: - `expected_exercise_count` - `min_exercise_count` +Validation semantics: + +- `expected_exercise_count` is informational metadata (observability/debugging only). +- `min_exercise_count` is the actual hard lower validation floor used by the app. +- The workflow computes `min_exercise_count` conservatively as: + - `max(50, floor(imported_count * 0.85))` + - This intentionally allows small legitimate source fluctuations while still + blocking obviously broken payloads. +- `db_sha256` is required for payload integrity: + - the app computes SHA-256 of the downloaded DB and rejects mismatches before + structural DB validation. + +Manifest validation hardening (app side): + +- `source_id` and `channel` must match configured expected values. +- `version` must be present and non-blank. +- DB location must be resolvable. +- remote URLs must use `https`. +- `min_exercise_count` must be `> 0` if present. +- if both counts are present: `expected_exercise_count >= min_exercise_count`. +- invalid/malformed manifests are rejected with safe fallback. + The app parser supports: - absolute URLs (`db_url`, `build_report_url`) @@ -145,6 +168,8 @@ Startup integration: - Import path is currently non-destructive (`insertOrReplace`) for base exercises. - Existing routine/history links are preserved by avoiding hard delete sweeps of exercise IDs. - Remote validation is structural/sanity-level, not cryptographic signature verification. +- Payload integrity currently uses SHA-256 checksums from the manifest. +- Digital signature verification is intentionally not implemented yet. ## Operational testing checklist (before broader rollout) diff --git a/lib/services/exercise_catalog_refresh_service.dart b/lib/services/exercise_catalog_refresh_service.dart index 171add9..86d6af4 100644 --- a/lib/services/exercise_catalog_refresh_service.dart +++ b/lib/services/exercise_catalog_refresh_service.dart @@ -1,6 +1,7 @@ import 'dart:convert'; import 'dart:io'; +import 'package:crypto/crypto.dart'; import 'package:flutter/foundation.dart'; import 'package:http/http.dart' as http; import 'package:path/path.dart' as p; @@ -17,7 +18,9 @@ class ExerciseCatalogManifest { final String sourceId; final String channel; final DateTime? generatedAt; + final int? expectedExerciseRows; final int? minimumExerciseRows; + final String dbSha256; const ExerciseCatalogManifest({ required this.version, @@ -26,7 +29,9 @@ class ExerciseCatalogManifest { required this.sourceId, required this.channel, required this.generatedAt, + required this.expectedExerciseRows, required this.minimumExerciseRows, + required this.dbSha256, }); } @@ -226,6 +231,16 @@ class ExerciseCatalogRefreshService { return null; } + final actualDbSha256 = await _computeFileSha256(tempDbPath); + if (!_sha256Equals(actualDbSha256, manifest.dbSha256)) { + await prefs.setString( + _keyLastError, + 'Downloaded DB checksum mismatch. expected=${manifest.dbSha256} actual=$actualDbSha256', + ); + await _deleteIfExists(tempDbPath); + return null; + } + final validated = await _validateCatalogDb( dbPath: tempDbPath, expectedVersion: manifest.version, @@ -315,8 +330,10 @@ class ExerciseCatalogRefreshService { 'version': manifest.version, 'generated_at': manifest.generatedAt?.toIso8601String(), 'db_url': manifest.dbUri.toString(), + 'db_sha256': manifest.dbSha256, 'build_report_url': manifest.buildReportUri?.toString(), 'manifest_url': manifestUri.toString(), + 'expected_exercise_count': manifest.expectedExerciseRows, 'minimum_exercise_rows': manifest.minimumExerciseRows, 'cached_at': _nowProvider().toIso8601String(), }; @@ -364,12 +381,49 @@ class ExerciseCatalogRefreshService { config.sourceId; final channel = _firstNonBlankString([json['channel'], build['channel']]) ?? config.channel; + if (sourceId != config.sourceId || channel != config.channel) { + return null; + } + + final dbSha256 = _firstNonBlankString([json['db_sha256']]); + if (dbSha256 == null || !_isValidSha256(dbSha256)) { + return null; + } + + final expectedRows = _parseInt(json['expected_exercise_count']); + final minimumRows = _parseInt(json['minimum_exercise_rows']) ?? + _parseInt(json['min_exercise_count']) ?? + _parseInt(json['min_rows']); + if (minimumRows != null && minimumRows <= 0) { + return null; + } + if (expectedRows != null && expectedRows <= 0) { + return null; + } + if (expectedRows != null && + minimumRows != null && + expectedRows < minimumRows) { + return null; + } + + final usesFileKeys = _firstNonBlankString([ + json['db_file'], + json['db_path'], + json['database_path'], + json['build_report_file'], + json['build_report_path'], + json['report_path'], + ]) != + null; final effectiveBaseUrl = _firstNonBlankString([ json['asset_base_url'], json['download_base_url'], json['base_url'], ]) ?? config.baseUrl; + if (usesFileKeys && effectiveBaseUrl.trim().isEmpty) { + return null; + } final dbUri = _resolveFromManifest( baseUrl: effectiveBaseUrl, @@ -387,6 +441,9 @@ class ExerciseCatalogRefreshService { if (dbUri == null) { return null; } + if (!_isSecureRemoteUri(dbUri)) { + return null; + } final buildReportUri = _resolveFromManifest( baseUrl: effectiveBaseUrl, @@ -401,17 +458,15 @@ class ExerciseCatalogRefreshService { ]), fallbackPath: config.defaultBuildReportPath, ); + if (buildReportUri != null && !_isSecureRemoteUri(buildReportUri)) { + return null; + } final generatedAtRaw = _firstNonBlankString([json['generated_at'], build['generated_at']]); final generatedAt = generatedAtRaw != null ? DateTime.tryParse(generatedAtRaw) : null; - final minimumRows = _parseInt(json['minimum_exercise_rows']) ?? - _parseInt(json['min_exercise_count']) ?? - _parseInt(json['expected_exercise_count']) ?? - _parseInt(json['min_rows']); - return ExerciseCatalogManifest( version: version, dbUri: dbUri, @@ -419,7 +474,9 @@ class ExerciseCatalogRefreshService { sourceId: sourceId, channel: channel, generatedAt: generatedAt, + expectedExerciseRows: expectedRows, minimumExerciseRows: minimumRows, + dbSha256: dbSha256.toLowerCase(), ); } @@ -555,6 +612,15 @@ class ExerciseCatalogRefreshService { } } + Future _computeFileSha256(String filePath) async { + final bytes = await File(filePath).readAsBytes(); + return sha256.convert(bytes).toString(); + } + + static bool _sha256Equals(String a, String b) { + return a.trim().toLowerCase() == b.trim().toLowerCase(); + } + static Uri _resolveUrlOrPath(String baseUrl, String value) { final parsed = Uri.tryParse(value); if (parsed != null && parsed.hasScheme) { @@ -600,6 +666,14 @@ class ExerciseCatalogRefreshService { if (value is num) return value.toInt(); return int.tryParse(value.toString()); } + + static bool _isSecureRemoteUri(Uri uri) { + return uri.hasScheme && uri.scheme == 'https' && uri.host.isNotEmpty; + } + + static bool _isValidSha256(String value) { + return RegExp(r'^[A-Fa-f0-9]{64}$').hasMatch(value.trim()); + } } class _CatalogDbValidationResult { diff --git a/test/services/exercise_catalog_refresh_service_test.dart b/test/services/exercise_catalog_refresh_service_test.dart index f7bcea8..8e93c29 100644 --- a/test/services/exercise_catalog_refresh_service_test.dart +++ b/test/services/exercise_catalog_refresh_service_test.dart @@ -23,9 +23,13 @@ void main() { group('ExerciseCatalogRefreshService.parseManifest', () { test('parses relative db/report paths against base url', () { final manifest = ExerciseCatalogRefreshService.parseManifest({ + 'source_id': 'wger_catalog', + 'channel': 'stable', 'version': '202601010001', 'db_path': 'artifacts/hypertrack_training.db', 'build_report_path': 'artifacts/wger_build_report.json', + 'db_sha256': + 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa', 'min_exercise_count': 123, }, config); @@ -40,12 +44,20 @@ void main() { ); expect(manifest.minimumExerciseRows, 123); expect(manifest.version, '202601010001'); + expect( + manifest.dbSha256, + 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa', + ); }); test('uses absolute urls from manifest directly', () { final manifest = ExerciseCatalogRefreshService.parseManifest({ + 'source_id': 'wger_catalog', + 'channel': 'stable', 'version': '202602020002', 'db_url': 'https://cdn.example.net/catalog/training.db', + 'db_sha256': + 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb', }, config); expect(manifest, isNotNull); @@ -60,10 +72,14 @@ void main() { test('supports release-style manifest with asset_base_url and *_file keys', () { final manifest = ExerciseCatalogRefreshService.parseManifest({ + 'source_id': 'wger_catalog', + 'channel': 'stable', 'version': '202603030003', 'asset_base_url': 'https://github.com/org/repo/releases/download/tag/', 'db_file': 'hypertrack_training.db', 'build_report_file': 'wger_build_report.json', + 'db_sha256': + 'cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc', 'expected_exercise_count': 777, }, config); @@ -76,7 +92,84 @@ void main() { manifest.buildReportUri!.toString(), 'https://github.com/org/repo/releases/download/tag/wger_build_report.json', ); - expect(manifest.minimumExerciseRows, 777); + expect( + manifest.minimumExerciseRows, + isNull, + reason: + 'expected_exercise_count is informational and must not act as hard validation floor', + ); + }); + + test('uses min_exercise_count as the hard validation floor field', () { + final manifest = ExerciseCatalogRefreshService.parseManifest({ + 'source_id': 'wger_catalog', + 'channel': 'stable', + 'version': '202603030004', + 'asset_base_url': 'https://github.com/org/repo/releases/download/tag/', + 'db_file': 'hypertrack_training.db', + 'db_sha256': + 'dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd', + 'expected_exercise_count': 777, + 'min_exercise_count': 640, + }, config); + + expect(manifest, isNotNull); + expect(manifest!.minimumExerciseRows, 640); + }); + + test('rejects manifest with unexpected source_id', () { + final manifest = ExerciseCatalogRefreshService.parseManifest({ + 'source_id': 'other_source', + 'channel': 'stable', + 'version': '202603030005', + 'db_url': 'https://cdn.example.net/catalog/training.db', + 'db_sha256': + 'eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee', + }, config); + + expect(manifest, isNull); + }); + + test('rejects manifest with unexpected channel', () { + final manifest = ExerciseCatalogRefreshService.parseManifest({ + 'source_id': 'wger_catalog', + 'channel': 'beta', + 'version': '202603030006', + 'db_url': 'https://cdn.example.net/catalog/training.db', + 'db_sha256': + 'ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff', + }, config); + + expect(manifest, isNull); + }); + + test('rejects non-https remote urls', () { + final manifest = ExerciseCatalogRefreshService.parseManifest({ + 'source_id': 'wger_catalog', + 'channel': 'stable', + 'version': '202603030007', + 'db_url': 'http://cdn.example.net/catalog/training.db', + 'db_sha256': + '1111111111111111111111111111111111111111111111111111111111111111', + }, config); + + expect(manifest, isNull); + }); + + test('rejects contradictory expected/min exercise counts', () { + final manifest = ExerciseCatalogRefreshService.parseManifest({ + 'source_id': 'wger_catalog', + 'channel': 'stable', + 'version': '202603030008', + 'asset_base_url': 'https://github.com/org/repo/releases/download/tag/', + 'db_file': 'hypertrack_training.db', + 'db_sha256': + '2222222222222222222222222222222222222222222222222222222222222222', + 'expected_exercise_count': 500, + 'min_exercise_count': 650, + }, config); + + expect(manifest, isNull); }); });