Skip to content

Commit 37225c4

Browse files
grokifyclaude
andcommitted
feat(schema): add graph validation
Add schema validation for graph data: - Validator type with configurable rules - ValidateNode: ID, type, label, allowed types - ValidateEdge: endpoints, type, confidence validation - ValidateEdgeRefs: referential integrity checks - ValidateGraph: full graph validation with error collection - ValidationError type for structured error reporting Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 8fa733a commit 37225c4

File tree

1 file changed

+166
-0
lines changed

1 file changed

+166
-0
lines changed

pkg/schema/validate.go

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
// Package schema provides validation for graph data.
2+
package schema
3+
4+
import (
5+
"fmt"
6+
"strings"
7+
8+
"github.com/plexusone/graphfs/pkg/graph"
9+
)
10+
11+
// Validator validates graph data against schema rules.
12+
type Validator struct {
13+
// AllowedNodeTypes restricts node types. If empty, all types are allowed.
14+
AllowedNodeTypes []string
15+
16+
// AllowedEdgeTypes restricts edge types. If empty, all types are allowed.
17+
AllowedEdgeTypes []string
18+
19+
// RequireNodeLabel requires all nodes to have a non-empty label.
20+
RequireNodeLabel bool
21+
}
22+
23+
// NewValidator creates a validator with default settings.
24+
func NewValidator() *Validator {
25+
return &Validator{}
26+
}
27+
28+
// ValidationError represents a validation failure.
29+
type ValidationError struct {
30+
Field string
31+
Message string
32+
}
33+
34+
func (e *ValidationError) Error() string {
35+
return fmt.Sprintf("%s: %s", e.Field, e.Message)
36+
}
37+
38+
// ValidateNode validates a single node.
39+
func (v *Validator) ValidateNode(n *graph.Node) error {
40+
if n.ID == "" {
41+
return &ValidationError{Field: "id", Message: "node ID is required"}
42+
}
43+
44+
// Check for invalid characters in ID (must be filesystem-safe)
45+
if strings.ContainsAny(n.ID, "/\\:*?\"<>|") {
46+
return &ValidationError{Field: "id", Message: "node ID contains invalid characters"}
47+
}
48+
49+
if n.Type == "" {
50+
return &ValidationError{Field: "type", Message: "node type is required"}
51+
}
52+
53+
if v.RequireNodeLabel && n.Label == "" {
54+
return &ValidationError{Field: "label", Message: "node label is required"}
55+
}
56+
57+
if len(v.AllowedNodeTypes) > 0 && !contains(v.AllowedNodeTypes, n.Type) {
58+
return &ValidationError{
59+
Field: "type",
60+
Message: fmt.Sprintf("node type %q is not allowed", n.Type),
61+
}
62+
}
63+
64+
return nil
65+
}
66+
67+
// ValidateEdge validates a single edge.
68+
func (v *Validator) ValidateEdge(e *graph.Edge) error {
69+
if e.From == "" {
70+
return &ValidationError{Field: "from", Message: "edge source is required"}
71+
}
72+
73+
if e.To == "" {
74+
return &ValidationError{Field: "to", Message: "edge target is required"}
75+
}
76+
77+
if e.Type == "" {
78+
return &ValidationError{Field: "type", Message: "edge type is required"}
79+
}
80+
81+
if e.Confidence == "" {
82+
return &ValidationError{Field: "confidence", Message: "edge confidence is required"}
83+
}
84+
85+
// Validate confidence value
86+
switch e.Confidence {
87+
case graph.ConfidenceExtracted, graph.ConfidenceInferred, graph.ConfidenceAmbiguous:
88+
// Valid
89+
default:
90+
return &ValidationError{
91+
Field: "confidence",
92+
Message: fmt.Sprintf("invalid confidence value %q", e.Confidence),
93+
}
94+
}
95+
96+
// Confidence score only meaningful for inferred edges
97+
if e.Confidence == graph.ConfidenceInferred {
98+
if e.ConfidenceScore < 0 || e.ConfidenceScore > 1 {
99+
return &ValidationError{
100+
Field: "confidence_score",
101+
Message: "confidence score must be between 0 and 1",
102+
}
103+
}
104+
}
105+
106+
if len(v.AllowedEdgeTypes) > 0 && !contains(v.AllowedEdgeTypes, e.Type) {
107+
return &ValidationError{
108+
Field: "type",
109+
Message: fmt.Sprintf("edge type %q is not allowed", e.Type),
110+
}
111+
}
112+
113+
return nil
114+
}
115+
116+
// ValidateEdgeRefs validates that edge references exist in the node set.
117+
func (v *Validator) ValidateEdgeRefs(e *graph.Edge, nodes map[string]*graph.Node) error {
118+
if _, ok := nodes[e.From]; !ok {
119+
return &ValidationError{
120+
Field: "from",
121+
Message: fmt.Sprintf("source node %q does not exist", e.From),
122+
}
123+
}
124+
125+
if _, ok := nodes[e.To]; !ok {
126+
return &ValidationError{
127+
Field: "to",
128+
Message: fmt.Sprintf("target node %q does not exist", e.To),
129+
}
130+
}
131+
132+
return nil
133+
}
134+
135+
// ValidateGraph validates an entire graph.
136+
func (v *Validator) ValidateGraph(g *graph.Graph) []error {
137+
var errs []error
138+
139+
// Validate all nodes
140+
for id, n := range g.Nodes {
141+
if err := v.ValidateNode(n); err != nil {
142+
errs = append(errs, fmt.Errorf("node %s: %w", id, err))
143+
}
144+
}
145+
146+
// Validate all edges
147+
for i, e := range g.Edges {
148+
if err := v.ValidateEdge(e); err != nil {
149+
errs = append(errs, fmt.Errorf("edge[%d]: %w", i, err))
150+
}
151+
if err := v.ValidateEdgeRefs(e, g.Nodes); err != nil {
152+
errs = append(errs, fmt.Errorf("edge[%d]: %w", i, err))
153+
}
154+
}
155+
156+
return errs
157+
}
158+
159+
func contains(slice []string, item string) bool {
160+
for _, s := range slice {
161+
if s == item {
162+
return true
163+
}
164+
}
165+
return false
166+
}

0 commit comments

Comments
 (0)