Skip to content

Commit 44e69f6

Browse files
authored
Merge pull request #61 from stackql/claude/centralize-error-detection-011CUwtsDaHZG1TEs9N7i9RR
Centralize error detection logic in pystackql
2 parents ac55a60 + 52d089e commit 44e69f6

File tree

10 files changed

+1007
-23
lines changed

10 files changed

+1007
-23
lines changed

CHANGELOG.md

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,35 @@
11
# Changelog
22

3+
## v3.8.2 (2025-11-09)
4+
5+
### New Features
6+
7+
- **Centralized Error Detection**: Added centralized error detection system with configurable patterns
8+
- New `errors.yaml` configuration file with error patterns
9+
- Supports three pattern types: fuzzy matches, exact matches, and regex matches
10+
- Automatically detects errors in stdout and moves them to error field
11+
- Eliminates need for external applications to parse error messages
12+
- Includes patterns for HTTP 4xx/5xx errors, DNS failures, connection errors, and timeouts
13+
- Added `ErrorDetector` class for pattern-based error detection
14+
15+
- **Markdown-KV Output Format**: Added `markdownkv` output format optimized for LLM understanding
16+
- Based on research showing 60.7% LLM accuracy vs 44.3% for CSV
17+
- Ideal for RAG pipelines and AI-based systems processing tabular data
18+
- Hierarchical structure with markdown headers and code blocks
19+
- Supported in both local and server modes
20+
- Reference: [Which Table Format Do LLMs Understand Best?](https://www.empiricalagents.com/blog/which-table-format-do-llms-understand-best)
21+
22+
### Dependencies
23+
24+
- Added `PyYAML>=5.4.0` for error pattern configuration
25+
26+
### Testing
27+
28+
- Added comprehensive test suite for error detection (`tests/test_error_detection.py`)
29+
- Added test suite for Markdown-KV format (`tests/test_markdownkv_format.py`)
30+
- Tests for regex pattern matching, DNS errors, connection errors, and timeouts
31+
- Tests for LLM-friendly data formatting
32+
333
## v3.8.1 (2025-06-25)
434

535
### Updates

MANIFEST.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
include pystackql/errors.yaml

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "pystackql"
7-
version = "3.8.1"
7+
version = "3.8.2"
88
description = "A Python interface for StackQL"
99
readme = "README.rst"
1010
authors = [
@@ -31,6 +31,7 @@ dependencies = [
3131
"nest-asyncio>=1.5.5",
3232
"termcolor>=1.1.0",
3333
"tqdm>=4.61.0",
34+
"PyYAML>=5.4.0",
3435
]
3536

3637
[tool.setuptools.packages.find]

pystackql/core/error_detector.py

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
# pystackql/core/error_detector.py
2+
3+
"""
4+
Error detection module for PyStackQL.
5+
6+
This module provides centralized error detection logic that checks messages
7+
against predefined error patterns loaded from errors.yaml.
8+
"""
9+
10+
import os
11+
import re
12+
import yaml
13+
14+
15+
class ErrorDetector:
16+
"""Detects errors in query results based on predefined patterns.
17+
18+
This class loads error patterns from errors.yaml and provides methods
19+
to check if a message contains any of these error patterns.
20+
"""
21+
22+
def __init__(self):
23+
"""Initialize the ErrorDetector by loading error patterns from errors.yaml."""
24+
self.fuzzy_patterns = []
25+
self.exact_patterns = []
26+
self.regex_patterns = [] # List of compiled regex pattern objects
27+
self._load_error_patterns()
28+
29+
def _load_error_patterns(self):
30+
"""Load error patterns from the errors.yaml file.
31+
32+
The errors.yaml file should be located in the same directory as this module.
33+
"""
34+
# Get the directory containing the pystackql package
35+
current_dir = os.path.dirname(os.path.abspath(__file__))
36+
package_dir = os.path.dirname(current_dir)
37+
errors_file = os.path.join(package_dir, 'errors.yaml')
38+
39+
try:
40+
if os.path.exists(errors_file):
41+
with open(errors_file, 'r') as f:
42+
error_config = yaml.safe_load(f)
43+
44+
if error_config and 'errors' in error_config:
45+
errors = error_config['errors']
46+
47+
# Load fuzzy match patterns (case-insensitive substring matching)
48+
if 'fuzzy_matches' in errors:
49+
self.fuzzy_patterns = [
50+
pattern.lower()
51+
for pattern in errors['fuzzy_matches']
52+
if pattern
53+
]
54+
55+
# Load exact match patterns (case-sensitive exact/prefix matching)
56+
if 'exact_matches' in errors:
57+
self.exact_patterns = [
58+
pattern
59+
for pattern in errors['exact_matches']
60+
if pattern
61+
]
62+
63+
# Load regex patterns (compile them for efficiency)
64+
if 'regex_matches' in errors:
65+
self.regex_patterns = []
66+
for pattern in errors['regex_matches']:
67+
if pattern:
68+
try:
69+
# Compile with IGNORECASE flag for case-insensitive matching
70+
compiled = re.compile(pattern, re.IGNORECASE)
71+
self.regex_patterns.append((pattern, compiled))
72+
except re.error as regex_err:
73+
print(f"Warning: Invalid regex pattern '{pattern}': {regex_err}")
74+
except Exception as e:
75+
# If we can't load the error patterns, continue with empty lists
76+
# This ensures the module doesn't break existing functionality
77+
print(f"Warning: Could not load error patterns from {errors_file}: {e}")
78+
79+
def is_error(self, message):
80+
"""Check if a message contains any error patterns.
81+
82+
Args:
83+
message (str): The message to check for error patterns
84+
85+
Returns:
86+
bool: True if the message matches any error pattern, False otherwise
87+
"""
88+
if not message or not isinstance(message, str):
89+
return False
90+
91+
message_lower = message.lower()
92+
93+
# Check fuzzy matches (case-insensitive substring matching)
94+
for pattern in self.fuzzy_patterns:
95+
if pattern in message_lower:
96+
return True
97+
98+
# Check exact matches (exact string or starts with prefix)
99+
for pattern in self.exact_patterns:
100+
if message == pattern or message.startswith(pattern):
101+
return True
102+
103+
# Check regex matches
104+
for pattern_str, compiled_pattern in self.regex_patterns:
105+
if compiled_pattern.search(message):
106+
return True
107+
108+
return False
109+
110+
def extract_error_info(self, message):
111+
"""Extract error information from a message.
112+
113+
Args:
114+
message (str): The error message
115+
116+
Returns:
117+
dict: Dictionary containing error details with 'error' and 'detected_pattern' keys
118+
"""
119+
if not self.is_error(message):
120+
return None
121+
122+
message_lower = message.lower()
123+
detected_pattern = None
124+
pattern_type = None
125+
126+
# Find which pattern was matched (check in order: fuzzy, exact, regex)
127+
for pattern in self.fuzzy_patterns:
128+
if pattern in message_lower:
129+
detected_pattern = pattern
130+
pattern_type = "fuzzy"
131+
break
132+
133+
if not detected_pattern:
134+
for pattern in self.exact_patterns:
135+
if message == pattern or message.startswith(pattern):
136+
detected_pattern = pattern
137+
pattern_type = "exact"
138+
break
139+
140+
if not detected_pattern:
141+
for pattern_str, compiled_pattern in self.regex_patterns:
142+
if compiled_pattern.search(message):
143+
detected_pattern = pattern_str
144+
pattern_type = "regex"
145+
break
146+
147+
return {
148+
"error": message,
149+
"detected_pattern": detected_pattern,
150+
"pattern_type": pattern_type
151+
}

0 commit comments

Comments
 (0)