knostic · joshbouncesecurity · May 4, 2026 · May 4, 2026
@@ -208,7 +208,7 @@ def gather_context_sources(repo_path: Path) -> dict[str, str]:
         filepath = repo_path / filename
         if filepath.exists():
             try:
-                content = filepath.read_text(errors="ignore")
+                content = filepath.read_text(encoding="utf-8", errors="ignore")
                 # Limit size to avoid token overflow
                 if len(content) > 10000:
                     content = content[:10000] + "\n\n[... truncated ...]"
@@ -289,7 +289,7 @@ def detect_entry_points(repo_path: Path) -> str:
             continue
 
         try:
-            content = py_file.read_text(errors="ignore")
+            content = py_file.read_text(encoding="utf-8", errors="ignore")
             rel_path = py_file.relative_to(repo_path)
 
             for category, patterns in ENTRY_POINT_PATTERNS.items():
@@ -308,7 +308,7 @@ def detect_entry_points(repo_path: Path) -> str:
             continue
 
         try:
-            content = js_file.read_text(errors="ignore")
+            content = js_file.read_text(encoding="utf-8", errors="ignore")
             rel_path = js_file.relative_to(repo_path)
 
             if re.search(r"express\(\)|require\(['\"]express['\"]\)", content):
@@ -340,7 +340,7 @@ def check_manual_override(repo_path: Path) -> ApplicationContext | None:
             continue
 
         try:
-            content = filepath.read_text()
+            content = filepath.read_text(encoding="utf-8")
 
             if filename.endswith('.json'):
                 # Direct JSON format
@@ -545,7 +545,7 @@ def save_context(context: ApplicationContext, output_path: Path) -> None:
     output_path = Path(output_path)
     output_path.parent.mkdir(parents=True, exist_ok=True)
 
-    with open(output_path, 'w') as f:
+    with open(output_path, 'w', encoding="utf-8") as f:
         json.dump(asdict(context), f, indent=2)
 
     print(f"Context saved to {output_path}", file=sys.stderr)
@@ -560,7 +560,7 @@ def load_context(input_path: Path) -> ApplicationContext:
     Returns:
         ApplicationContext loaded from file.
     """
-    with open(input_path) as f:
+    with open(input_path, encoding="utf-8") as f:
         data = json.load(f)
 
     # Mark as manual to skip validation (already validated when saved)

@@ -330,7 +330,7 @@ def run_analysis(
 
     # Load dataset
     print(f"[Analyze] Loading dataset: {dataset_path}", file=sys.stderr)
-    with open(dataset_path) as f:
+    with open(dataset_path, encoding="utf-8") as f:
         dataset = json.load(f)
 
     units = dataset.get("units", [])
@@ -513,7 +513,7 @@ def _summary_callback(finding, usage=None):
         "code_by_route": code_by_route,
     }
 
-    with open(results_path, "w") as f:
+    with open(results_path, "w", encoding="utf-8") as f:
         json.dump(experiment_result, f, indent=2)
 
     print(f"\n[Analyze] Results written to {results_path}", file=sys.stderr)

@@ -79,7 +79,7 @@ def load(self) -> dict[str, dict]:
                 continue
             filepath = os.path.join(self.dir, filename)
             try:
-                with open(filepath, "r") as f:
+                with open(filepath, "r", encoding="utf-8") as f:
                     data = json.load(f)
                 unit_id = data.get("id")
                 if unit_id:
@@ -130,7 +130,7 @@ def save(self, unit_id: str, data: dict):
         filename = self._safe_filename(unit_id) + ".json"
         filepath = os.path.join(self.dir, filename)
         data["id"] = unit_id  # ensure id is always present
-        with open(filepath, "w") as f:
+        with open(filepath, "w", encoding="utf-8") as f:
             json.dump(data, f, indent=2)
 
     def write_summary(
@@ -168,7 +168,7 @@ def write_summary(
         }
         if usage is not None:
             data["usage"] = usage
-        with open(filepath, "w") as f:
+        with open(filepath, "w", encoding="utf-8") as f:
             json.dump(data, f, indent=2)
 
     @staticmethod
@@ -182,7 +182,7 @@ def read_summary(checkpoint_dir: str) -> dict | None:
         if not os.path.isfile(filepath):
             return None
         try:
-            with open(filepath, "r") as f:
+            with open(filepath, "r", encoding="utf-8") as f:
                 return json.load(f)
         except (json.JSONDecodeError, OSError):
             return None
@@ -241,7 +241,7 @@ def status(checkpoint_dir: str) -> dict:
                 continue
             filepath = os.path.join(checkpoint_dir, filename)
             try:
-                with open(filepath, "r") as f:
+                with open(filepath, "r", encoding="utf-8") as f:
                     data = json.load(f)
             except (json.JSONDecodeError, OSError):
                 errors += 1

@@ -51,7 +51,7 @@ def run_tests(
     os.makedirs(output_dir, exist_ok=True)
 
     # Check how many findings to test
-    with open(pipeline_output_path) as f:
+    with open(pipeline_output_path, encoding="utf-8") as f:
         pipeline_data = json.load(f)
 
     findings = pipeline_data.get("findings", [])
@@ -65,7 +65,7 @@ def run_tests(
 
     if not testable:
         results_path = os.path.join(output_dir, "dynamic_test_results.json")
-        with open(results_path, "w") as f:
+        with open(results_path, "w", encoding="utf-8") as f:
             json.dump({"findings_tested": 0, "results": []}, f, indent=2)
 
         return DynamicTestStepResult(

@@ -69,7 +69,7 @@ def enhance_dataset(
 
     # Load dataset
     print(f"[Enhance] Loading dataset: {dataset_path}", file=sys.stderr)
-    with open(dataset_path) as f:
+    with open(dataset_path, encoding="utf-8") as f:
         dataset = json.load(f)
 
     units = dataset.get("units", [])
@@ -138,7 +138,7 @@ def _on_restored(count: int):
 
     # Write enhanced dataset
     os.makedirs(os.path.dirname(os.path.abspath(output_path)), exist_ok=True)
-    with open(output_path, "w") as f:
+    with open(output_path, "w", encoding="utf-8") as f:
         json.dump(enhanced, f, indent=2)
 
     print(f"[Enhance] Enhanced dataset: {output_path}", file=sys.stderr)

@@ -161,7 +161,7 @@ def _maybe_apply_diff_filter(
         )
         return
 
-    with open(result.dataset_path, "r") as f:
+    with open(result.dataset_path, "r", encoding="utf-8") as f:
         dataset = json.load(f)
 
     # Dataset may be a dict with "units" or a raw list.
@@ -172,13 +172,13 @@ def _maybe_apply_diff_filter(
 
     stats = apply_diff_filter(units, manifest)
 
-    with open(result.dataset_path, "w") as f:
+    with open(result.dataset_path, "w", encoding="utf-8") as f:
         json.dump(dataset, f, indent=2)
 
     # Expose stats on the ParseResult via a side-channel file; the parse
     # step_context reads this when assembling parse.report.json.
     diff_report_path = os.path.join(output_dir, "diff_filter.report.json")
-    with open(diff_report_path, "w") as f:
+    with open(diff_report_path, "w", encoding="utf-8") as f:
         json.dump(stats.to_dict(), f, indent=2)
 
     print(
@@ -245,7 +245,7 @@ def _load_module(name, filename):
 
     print(f"\n[Reachability Filter] Filtering to {processing_level} units...", file=sys.stderr)
 
-    with open(call_graph_path, "r") as f:
+    with open(call_graph_path, "r", encoding="utf-8") as f:
         call_graph_data = json.load(f)
 
     functions = call_graph_data.get("functions", {})
@@ -352,10 +352,10 @@ def _parse_python(repo_path: str, output_dir: str, processing_level: str, skip_t
         dataset = _apply_reachability_filter(dataset, output_dir, processing_level)
 
     # Write outputs
-    with open(dataset_path, "w") as f:
+    with open(dataset_path, "w", encoding="utf-8") as f:
         json.dump(dataset, f, indent=2)
 
-    with open(analyzer_output_path, "w") as f:
+    with open(analyzer_output_path, "w", encoding="utf-8") as f:
         json.dump(analyzer_output, f, indent=2)
 
     units_count = len(dataset.get("units", []))
@@ -413,7 +413,7 @@ def _parse_javascript(repo_path: str, output_dir: str, processing_level: str, sk
     # Count units
     units_count = 0
     if os.path.exists(dataset_path):
-        with open(dataset_path) as f:
+        with open(dataset_path, encoding="utf-8") as f:
             data = json.load(f)
         units_count = len(data.get("units", []))
 
@@ -470,7 +470,7 @@ def _parse_go(repo_path: str, output_dir: str, processing_level: str, skip_tests
     # Count units
     units_count = 0
     if os.path.exists(dataset_path):
-        with open(dataset_path) as f:
+        with open(dataset_path, encoding="utf-8") as f:
             data = json.load(f)
         units_count = len(data.get("units", []))
 
@@ -530,7 +530,7 @@ def _parse_c(repo_path: str, output_dir: str, processing_level: str, skip_tests:
     # Count units
     units_count = 0
     if os.path.exists(dataset_path):
-        with open(dataset_path) as f:
+        with open(dataset_path, encoding="utf-8") as f:
             data = json.load(f)
         units_count = len(data.get("units", []))
 
@@ -590,7 +590,7 @@ def _parse_ruby(repo_path: str, output_dir: str, processing_level: str, skip_tes
     # Count units
     units_count = 0
     if os.path.exists(dataset_path):
-        with open(dataset_path) as f:
+        with open(dataset_path, encoding="utf-8") as f:
             data = json.load(f)
         units_count = len(data.get("units", []))
 
@@ -650,7 +650,7 @@ def _parse_php(repo_path: str, output_dir: str, processing_level: str, skip_test
     # Count units
     units_count = 0
     if os.path.exists(dataset_path):
-        with open(dataset_path) as f:
+        with open(dataset_path, encoding="utf-8") as f:
             data = json.load(f)
         units_count = len(data.get("units", []))
 
@@ -710,7 +710,7 @@ def _parse_zig(repo_path: str, output_dir: str, processing_level: str, skip_test
     # Count units
     units_count = 0
     if os.path.exists(dataset_path):
-        with open(dataset_path) as f:
+        with open(dataset_path, encoding="utf-8") as f:
             data = json.load(f)
         units_count = len(data.get("units", []))
 

@@ -34,7 +34,7 @@ def _load_diff_metadata(scan_dir: str) -> dict | None:
     if not os.path.exists(manifest_path):
         return None
     try:
-        with open(manifest_path) as f:
+        with open(manifest_path, encoding="utf-8") as f:
             manifest = json.load(f)
     except (json.JSONDecodeError, OSError):
         return None
@@ -50,7 +50,7 @@ def _load_diff_metadata(scan_dir: str) -> dict | None:
     filter_report = os.path.join(scan_dir, "diff_filter.report.json")
     if os.path.exists(filter_report):
         try:
-            with open(filter_report) as f:
+            with open(filter_report, encoding="utf-8") as f:
                 stats = json.load(f)
             out["units_in_diff"] = stats.get("selected")
             out["units_total_parsed"] = stats.get("total")
@@ -129,7 +129,7 @@ def _dedup_caller_callee(
         return confirmed
 
     try:
-        with open(call_graph_path) as f:
+        with open(call_graph_path, encoding="utf-8") as f:
             cg_data = json.load(f)
     except (json.JSONDecodeError, OSError):
         return confirmed
@@ -212,7 +212,7 @@ def build_pipeline_output(
     """
     print(f"[Report] Building pipeline_output.json...", file=sys.stderr)
 
-    with open(results_path) as f:
+    with open(results_path, encoding="utf-8") as f:
         experiment = json.load(f)
 
     all_results = experiment.get("results", [])
@@ -371,7 +371,7 @@ def build_pipeline_output(
         print(_banner, file=sys.stderr)
 
     os.makedirs(os.path.dirname(os.path.abspath(output_path)), exist_ok=True)
-    with open(output_path, "w") as f:
+    with open(output_path, "w", encoding="utf-8") as f:
         json.dump(pipeline_output, f, indent=2, ensure_ascii=False)
 
     print(f"  pipeline_output.json: {len(findings_data)} findings", file=sys.stderr)
@@ -469,7 +469,7 @@ def generate_summary_report(
 
     print("[Report] Generating summary report (LLM)...", file=sys.stderr)
 
-    with open(results_path) as f:
+    with open(results_path, encoding="utf-8") as f:
         pipeline_data = json.load(f)
 
     # Merge dynamic test results if available
@@ -483,7 +483,7 @@ def generate_summary_report(
     report_text, usage = _generate_summary(pipeline_data)
 
     os.makedirs(os.path.dirname(os.path.abspath(output_path)), exist_ok=True)
-    with open(output_path, "w") as f:
+    with open(output_path, "w", encoding="utf-8") as f:
         f.write(report_text)
 
     print(f"  Summary report: {output_path}", file=sys.stderr)
@@ -517,7 +517,7 @@ def generate_disclosure_docs(
 
     print("[Report] Generating disclosure documents (LLM)...", file=sys.stderr)
 
-    with open(results_path) as f:
+    with open(results_path, encoding="utf-8") as f:
         pipeline_data = json.load(f)
 
     # Merge dynamic test results if available
@@ -552,7 +552,7 @@ def _one(args):
             safe_name = finding["short_name"].replace(" ", "_").upper()
             filename = f"DISCLOSURE_{i:02d}_{safe_name}.md"
             filepath = os.path.join(output_dir, filename)
-            with open(filepath, "w") as f:
+            with open(filepath, "w", encoding="utf-8") as f:
                 f.write(disclosure_text)
             return finding["short_name"], filepath, usage
 

@@ -149,7 +149,7 @@ def _step_label(name: str) -> str:
         _diff_report = os.path.join(output_dir, "diff_filter.report.json")
         if os.path.exists(_diff_report):
             try:
-                with open(_diff_report) as _f:
+                with open(_diff_report, encoding="utf-8") as _f:
                     ctx.summary["diff_stats"] = json.load(_f)
             except (json.JSONDecodeError, OSError):
                 pass
@@ -542,7 +542,7 @@ def _load_step_report(output_dir: str, step: str) -> dict:
     """Load a step report JSON from disk. Returns empty dict on failure."""
     path = os.path.join(output_dir, f"{step}.report.json")
     try:
-        with open(path) as f:
+        with open(path, encoding="utf-8") as f:
             return json.load(f)
     except Exception:
         return {"step": step, "status": "unknown"}
@@ -551,7 +551,7 @@ def _load_step_report(output_dir: str, step: str) -> dict:
 def _read_app_type(app_context_path: str) -> str | None:
     """Read application_type from an app context JSON file."""
     try:
-        with open(app_context_path) as f:
+        with open(app_context_path, encoding="utf-8") as f:
             data = json.load(f)
         return data.get("application_type")
     except Exception:

@@ -268,6 +268,6 @@ def write(self, output_dir: str) -> str:
         """Write ``{step}.report.json`` to *output_dir*. Returns the path."""
         os.makedirs(output_dir, exist_ok=True)
         path = os.path.join(output_dir, f"{self.step}.report.json")
-        with open(path, "w") as f:
+        with open(path, "w", encoding="utf-8") as f:
             json.dump(self.to_dict(), f, indent=2)
         return path
@@ -80,7 +80,7 @@ def run_verification(
 
     # Load Stage 1 results
     print(f"[Verify] Loading results: {results_path}", file=sys.stderr)
-    with open(results_path) as f:
+    with open(results_path, encoding="utf-8") as f:
         experiment = json.load(f)
 
     all_results = experiment.get("results", [])
@@ -268,7 +268,7 @@ def _write_verified_results(
 
     output["metrics"] = {"total": len(merged_results), **counts}
 
-    with open(path, "w") as f:
+    with open(path, "w", encoding="utf-8") as f:
         json.dump(output, f, indent=2, ensure_ascii=False)