-
Notifications
You must be signed in to change notification settings - Fork 108
Entity Validation
Brian Sam-Bodden edited this page Jan 11, 2026
·
1 revision
Based on analysis of RediSearch and RedisJSON source code, here are the main reasons why indexing fails and a comprehensive validation utility to detect these issues before saving.
-
Type Mismatches
- Numeric field receiving non-numeric string (e.g., "abc" in numeric field)
- Vector field with wrong dimension size
- Geometry field with invalid WKT format
- Date field with unparseable date string
-
Size Violations
- Vector blob size mismatch (expected vs actual bytes)
- Text field exceeding max length
- Tag field with too many values
- Document exceeding size limits
-
Format Errors
- Invalid UTF-8 encoding in text fields
- Malformed JSON structure
- Invalid geo coordinates (out of range)
- Non-normalized vector values
-
Null/Missing Handling
- Required fields with null values
- Missing fields that are indexed
- Empty arrays where not allowed
- Null in sortable fields
-
Duplicate Fields
- Same field indexed twice in document
- Conflicting field paths in nested JSON
package com.redis.om.spring.validation;
import com.redis.om.spring.annotations.*;
import com.redis.om.spring.metamodel.SearchFieldAccessor;
import org.springframework.stereotype.Component;
import lombok.extern.slf4j.Slf4j;
/**
* Validates entities before indexing to predict and prevent RediSearch failures
*/
@Component
@Slf4j
public class EntityIndexingValidator {
private final GsonBuilder gsonBuilder;
private final RediSearchIndexer indexer;
@Data
@Builder
public static class ValidationResult {
private final boolean valid;
private final String entityId;
private final Class<?> entityClass;
private final List<ValidationError> errors;
private final List<ValidationWarning> warnings;
private final Map<String, FieldValidation> fieldValidations;
private final long estimatedSize;
public boolean hasErrors() {
return !errors.isEmpty();
}
public boolean hasWarnings() {
return !warnings.isEmpty();
}
public String generateReport() {
StringBuilder sb = new StringBuilder();
sb.append(String.format("Validation Report for %s (ID: %s)\n",
entityClass.getSimpleName(), entityId));
sb.append(String.format("Valid: %s | Errors: %d | Warnings: %d\n",
valid, errors.size(), warnings.size()));
if (hasErrors()) {
sb.append("\nERRORS:\n");
errors.forEach(e -> sb.append(String.format(" - [%s] %s: %s\n",
e.getSeverity(), e.getFieldName(), e.getMessage())));
}
if (hasWarnings()) {
sb.append("\nWARNINGS:\n");
warnings.forEach(w -> sb.append(String.format(" - %s: %s\n",
w.getFieldName(), w.getMessage())));
}
return sb.toString();
}
}
@Data
@Builder
public static class ValidationError {
public enum Severity { CRITICAL, HIGH, MEDIUM, LOW }
private final String fieldName;
private final String fieldPath;
private final String message;
private final Severity severity;
private final ErrorType type;
private final Object actualValue;
private final Object expectedValue;
}
public enum ErrorType {
TYPE_MISMATCH,
SIZE_VIOLATION,
FORMAT_ERROR,
NULL_NOT_ALLOWED,
DUPLICATE_FIELD,
RANGE_VIOLATION,
ENCODING_ERROR,
MISSING_REQUIRED,
INVALID_VECTOR_DIMENSION,
INVALID_GEO_COORDINATES,
INVALID_DATE_FORMAT,
NUMERIC_PARSE_ERROR,
TEXT_TOO_LONG,
TAG_LIMIT_EXCEEDED,
INVALID_JSON_STRUCTURE
}
/**
* Validates an entity before saving to predict indexing failures
*/
public <T> ValidationResult validate(T entity) {
Class<?> entityClass = entity.getClass();
List<ValidationError> errors = new ArrayList<>();
List<ValidationWarning> warnings = new ArrayList<>();
Map<String, FieldValidation> fieldValidations = new HashMap<>();
// Get index metadata
String indexName = indexer.getIndexName(entityClass);
IndexSpec spec = indexer.getSpec(indexName);
if (spec == null) {
errors.add(ValidationError.builder()
.message("No index specification found for entity")
.severity(ValidationError.Severity.CRITICAL)
.type(ErrorType.MISSING_REQUIRED)
.build());
return ValidationResult.builder()
.valid(false)
.entityId(extractId(entity))
.entityClass(entityClass)
.errors(errors)
.warnings(warnings)
.build();
}
// Validate each field
for (Field field : entityClass.getDeclaredFields()) {
field.setAccessible(true);
FieldValidation validation = validateField(entity, field, spec);
fieldValidations.put(field.getName(), validation);
errors.addAll(validation.getErrors());
warnings.addAll(validation.getWarnings());
}
// Check for duplicate fields
errors.addAll(checkDuplicateFields(entity, spec));
// Estimate document size
long estimatedSize = estimateEntitySize(entity);
if (estimatedSize > getMaxDocumentSize()) {
errors.add(ValidationError.builder()
.fieldName("_document")
.message(String.format("Document size %d exceeds maximum %d",
estimatedSize, getMaxDocumentSize()))
.severity(ValidationError.Severity.HIGH)
.type(ErrorType.SIZE_VIOLATION)
.actualValue(estimatedSize)
.expectedValue(getMaxDocumentSize())
.build());
}
// Check JSON structure if Document
if (entity.getClass().isAnnotationPresent(Document.class)) {
errors.addAll(validateJsonStructure(entity));
}
return ValidationResult.builder()
.valid(errors.isEmpty())
.entityId(extractId(entity))
.entityClass(entityClass)
.errors(errors)
.warnings(warnings)
.fieldValidations(fieldValidations)
.estimatedSize(estimatedSize)
.build();
}
/**
* Validates a single field
*/
private FieldValidation validateField(Object entity, Field field, IndexSpec spec) {
List<ValidationError> errors = new ArrayList<>();
List<ValidationWarning> warnings = new ArrayList<>();
try {
Object value = field.get(entity);
FieldSpec fieldSpec = spec.getField(field.getName());
if (fieldSpec == null) {
// Field not indexed, skip validation
return FieldValidation.builder()
.fieldName(field.getName())
.indexed(false)
.build();
}
// Check for null values
if (value == null) {
if (fieldSpec.isRequired()) {
errors.add(ValidationError.builder()
.fieldName(field.getName())
.message("Required field is null")
.severity(ValidationError.Severity.HIGH)
.type(ErrorType.NULL_NOT_ALLOWED)
.build());
} else if (fieldSpec.isSortable()) {
warnings.add(ValidationWarning.builder()
.fieldName(field.getName())
.message("Sortable field is null")
.build());
}
return FieldValidation.builder()
.fieldName(field.getName())
.indexed(true)
.value(null)
.errors(errors)
.warnings(warnings)
.build();
}
// Type-specific validation
if (field.isAnnotationPresent(Indexed.class)) {
Indexed indexed = field.getAnnotation(Indexed.class);
SchemaFieldType type = indexed.schemaFieldType();
switch (type) {
case NUMERIC:
errors.addAll(validateNumericField(field.getName(), value));
break;
case TEXT:
errors.addAll(validateTextField(field.getName(), value, fieldSpec));
break;
case TAG:
errors.addAll(validateTagField(field.getName(), value, fieldSpec));
break;
case VECTOR:
errors.addAll(validateVectorField(field.getName(), value, indexed));
break;
case GEO:
errors.addAll(validateGeoField(field.getName(), value));
break;
}
}
} catch (IllegalAccessException e) {
errors.add(ValidationError.builder()
.fieldName(field.getName())
.message("Could not access field: " + e.getMessage())
.severity(ValidationError.Severity.MEDIUM)
.type(ErrorType.MISSING_REQUIRED)
.build());
}
return FieldValidation.builder()
.fieldName(field.getName())
.indexed(true)
.errors(errors)
.warnings(warnings)
.build();
}
/**
* Validates numeric fields - matches RediSearch numericPreprocessor logic
*/
private List<ValidationError> validateNumericField(String fieldName, Object value) {
List<ValidationError> errors = new ArrayList<>();
if (value instanceof String) {
try {
// RediSearch uses fast_float_strtod - validate parsing
Double.parseDouble((String) value);
} catch (NumberFormatException e) {
errors.add(ValidationError.builder()
.fieldName(fieldName)
.message("Cannot parse as numeric: " + value)
.severity(ValidationError.Severity.HIGH)
.type(ErrorType.NUMERIC_PARSE_ERROR)
.actualValue(value)
.build());
}
} else if (value instanceof Collection) {
// Validate array of numerics
for (Object item : (Collection<?>) value) {
if (!(item instanceof Number)) {
errors.add(ValidationError.builder()
.fieldName(fieldName)
.message("Array contains non-numeric value: " + item)
.severity(ValidationError.Severity.HIGH)
.type(ErrorType.TYPE_MISMATCH)
.actualValue(item)
.build());
}
}
} else if (!(value instanceof Number)) {
errors.add(ValidationError.builder()
.fieldName(fieldName)
.message("Expected numeric type, got: " + value.getClass().getSimpleName())
.severity(ValidationError.Severity.HIGH)
.type(ErrorType.TYPE_MISMATCH)
.actualValue(value.getClass())
.expectedValue("Number")
.build());
}
// Check for infinity or NaN
if (value instanceof Double) {
Double d = (Double) value;
if (Double.isInfinite(d) || Double.isNaN(d)) {
errors.add(ValidationError.builder()
.fieldName(fieldName)
.message("Invalid numeric value: " + d)
.severity(ValidationError.Severity.HIGH)
.type(ErrorType.RANGE_VIOLATION)
.actualValue(d)
.build());
}
}
return errors;
}
/**
* Validates text fields
*/
private List<ValidationError> validateTextField(String fieldName, Object value, FieldSpec spec) {
List<ValidationError> errors = new ArrayList<>();
if (value instanceof Collection) {
// Validate each item in array
for (Object item : (Collection<?>) value) {
errors.addAll(validateSingleText(fieldName, item, spec));
}
} else {
errors.addAll(validateSingleText(fieldName, value, spec));
}
return errors;
}
private List<ValidationError> validateSingleText(String fieldName, Object value, FieldSpec spec) {
List<ValidationError> errors = new ArrayList<>();
String text = null;
if (value instanceof String) {
text = (String) value;
} else if (value != null) {
text = value.toString();
}
if (text != null) {
// Check UTF-8 encoding
if (!isValidUtf8(text)) {
errors.add(ValidationError.builder()
.fieldName(fieldName)
.message("Invalid UTF-8 encoding")
.severity(ValidationError.Severity.HIGH)
.type(ErrorType.ENCODING_ERROR)
.build());
}
// Check length limits
if (spec.getMaxTextLength() > 0 && text.length() > spec.getMaxTextLength()) {
errors.add(ValidationError.builder()
.fieldName(fieldName)
.message(String.format("Text length %d exceeds maximum %d",
text.length(), spec.getMaxTextLength()))
.severity(ValidationError.Severity.MEDIUM)
.type(ErrorType.TEXT_TOO_LONG)
.actualValue(text.length())
.expectedValue(spec.getMaxTextLength())
.build());
}
}
return errors;
}
/**
* Validates vector fields - matches RediSearch vectorPreprocessor
*/
private List<ValidationError> validateVectorField(String fieldName, Object value, Indexed indexed) {
List<ValidationError> errors = new ArrayList<>();
int expectedDimension = indexed.dimension();
VectorType vectorType = indexed.type();
int bytesPerElement = (vectorType == VectorType.FLOAT32) ? 4 : 2;
int expectedBytes = expectedDimension * bytesPerElement;
if (value instanceof byte[]) {
byte[] blob = (byte[]) value;
if (blob.length != expectedBytes) {
errors.add(ValidationError.builder()
.fieldName(fieldName)
.message(String.format("Vector blob size %d != expected %d (dim=%d, type=%s)",
blob.length, expectedBytes, expectedDimension, vectorType))
.severity(ValidationError.Severity.HIGH)
.type(ErrorType.INVALID_VECTOR_DIMENSION)
.actualValue(blob.length)
.expectedValue(expectedBytes)
.build());
}
} else if (value instanceof float[]) {
float[] vector = (float[]) value;
if (vector.length != expectedDimension) {
errors.add(ValidationError.builder()
.fieldName(fieldName)
.message(String.format("Vector dimension %d != expected %d",
vector.length, expectedDimension))
.severity(ValidationError.Severity.HIGH)
.type(ErrorType.INVALID_VECTOR_DIMENSION)
.actualValue(vector.length)
.expectedValue(expectedDimension)
.build());
}
// Check for NaN or Infinity
for (int i = 0; i < vector.length; i++) {
if (Float.isNaN(vector[i]) || Float.isInfinite(vector[i])) {
errors.add(ValidationError.builder()
.fieldName(fieldName)
.message(String.format("Invalid vector value at index %d: %f", i, vector[i]))
.severity(ValidationError.Severity.HIGH)
.type(ErrorType.RANGE_VIOLATION)
.actualValue(vector[i])
.build());
}
}
} else if (value != null) {
errors.add(ValidationError.builder()
.fieldName(fieldName)
.message("Vector must be byte[] or float[], got: " + value.getClass().getSimpleName())
.severity(ValidationError.Severity.HIGH)
.type(ErrorType.TYPE_MISMATCH)
.actualValue(value.getClass())
.expectedValue("byte[] or float[]")
.build());
}
return errors;
}
/**
* Validates geo fields
*/
private List<ValidationError> validateGeoField(String fieldName, Object value) {
List<ValidationError> errors = new ArrayList<>();
if (value instanceof Point) {
Point point = (Point) value;
// Validate coordinate ranges
if (point.getX() < -180 || point.getX() > 180) {
errors.add(ValidationError.builder()
.fieldName(fieldName)
.message(String.format("Invalid longitude: %f (must be -180 to 180)", point.getX()))
.severity(ValidationError.Severity.HIGH)
.type(ErrorType.INVALID_GEO_COORDINATES)
.actualValue(point.getX())
.build());
}
if (point.getY() < -90 || point.getY() > 90) {
errors.add(ValidationError.builder()
.fieldName(fieldName)
.message(String.format("Invalid latitude: %f (must be -90 to 90)", point.getY()))
.severity(ValidationError.Severity.HIGH)
.type(ErrorType.INVALID_GEO_COORDINATES)
.actualValue(point.getY())
.build());
}
} else if (value instanceof String) {
// Validate WKT format
String wkt = (String) value;
if (!isValidWkt(wkt)) {
errors.add(ValidationError.builder()
.fieldName(fieldName)
.message("Invalid WKT format: " + wkt)
.severity(ValidationError.Severity.HIGH)
.type(ErrorType.FORMAT_ERROR)
.actualValue(wkt)
.build());
}
} else if (value != null) {
errors.add(ValidationError.builder()
.fieldName(fieldName)
.message("Geo field must be Point or WKT string, got: " + value.getClass().getSimpleName())
.severity(ValidationError.Severity.HIGH)
.type(ErrorType.TYPE_MISMATCH)
.actualValue(value.getClass())
.expectedValue("Point or String(WKT)")
.build());
}
return errors;
}
/**
* Validates tag fields
*/
private List<ValidationError> validateTagField(String fieldName, Object value, FieldSpec spec) {
List<ValidationError> errors = new ArrayList<>();
if (value instanceof Collection) {
Collection<?> tags = (Collection<?>) value;
// Check tag count limit
if (spec.getMaxTags() > 0 && tags.size() > spec.getMaxTags()) {
errors.add(ValidationError.builder()
.fieldName(fieldName)
.message(String.format("Tag count %d exceeds maximum %d",
tags.size(), spec.getMaxTags()))
.severity(ValidationError.Severity.MEDIUM)
.type(ErrorType.TAG_LIMIT_EXCEEDED)
.actualValue(tags.size())
.expectedValue(spec.getMaxTags())
.build());
}
// Validate each tag
for (Object tag : tags) {
if (!(tag instanceof String)) {
errors.add(ValidationError.builder()
.fieldName(fieldName)
.message("Tag must be string, got: " + tag.getClass().getSimpleName())
.severity(ValidationError.Severity.HIGH)
.type(ErrorType.TYPE_MISMATCH)
.actualValue(tag.getClass())
.expectedValue("String")
.build());
} else {
String tagStr = (String) tag;
// Check for invalid characters
if (tagStr.contains(",") || tagStr.contains(";")) {
errors.add(ValidationError.builder()
.fieldName(fieldName)
.message("Tag contains invalid separator characters: " + tagStr)
.severity(ValidationError.Severity.HIGH)
.type(ErrorType.FORMAT_ERROR)
.actualValue(tagStr)
.build());
}
}
}
} else if (value instanceof String) {
// Single tag value - validate
String tag = (String) value;
if (tag.contains(",") || tag.contains(";")) {
errors.add(ValidationError.builder()
.fieldName(fieldName)
.message("Tag contains invalid separator characters: " + tag)
.severity(ValidationError.Severity.HIGH)
.type(ErrorType.FORMAT_ERROR)
.actualValue(tag)
.build());
}
} else if (value != null) {
errors.add(ValidationError.builder()
.fieldName(fieldName)
.message("Tag field must be String or Collection<String>, got: " + value.getClass().getSimpleName())
.severity(ValidationError.Severity.HIGH)
.type(ErrorType.TYPE_MISMATCH)
.actualValue(value.getClass())
.expectedValue("String or Collection<String>")
.build());
}
return errors;
}
/**
* Check for duplicate field indexing
*/
private List<ValidationError> checkDuplicateFields(Object entity, IndexSpec spec) {
List<ValidationError> errors = new ArrayList<>();
Set<String> processedPaths = new HashSet<>();
for (Field field : entity.getClass().getDeclaredFields()) {
if (field.isAnnotationPresent(Indexed.class)) {
String path = getFieldPath(field);
if (processedPaths.contains(path)) {
errors.add(ValidationError.builder()
.fieldName(field.getName())
.fieldPath(path)
.message("Field path already indexed: " + path)
.severity(ValidationError.Severity.HIGH)
.type(ErrorType.DUPLICATE_FIELD)
.build());
}
processedPaths.add(path);
}
}
return errors;
}
/**
* Validates JSON structure for @Document entities
*/
private List<ValidationError> validateJsonStructure(Object entity) {
List<ValidationError> errors = new ArrayList<>();
try {
// Attempt to serialize to JSON
Gson gson = gsonBuilder.create();
String json = gson.toJson(entity);
// Parse back to validate structure
JsonElement element = JsonParser.parseString(json);
if (!element.isJsonObject()) {
errors.add(ValidationError.builder()
.fieldName("_document")
.message("Entity does not serialize to valid JSON object")
.severity(ValidationError.Severity.CRITICAL)
.type(ErrorType.INVALID_JSON_STRUCTURE)
.build());
}
// Check for circular references
if (hasCircularReference(entity)) {
errors.add(ValidationError.builder()
.fieldName("_document")
.message("Entity contains circular references")
.severity(ValidationError.Severity.HIGH)
.type(ErrorType.INVALID_JSON_STRUCTURE)
.build());
}
} catch (Exception e) {
errors.add(ValidationError.builder()
.fieldName("_document")
.message("Failed to serialize to JSON: " + e.getMessage())
.severity(ValidationError.Severity.CRITICAL)
.type(ErrorType.INVALID_JSON_STRUCTURE)
.build());
}
return errors;
}
// Helper methods
private boolean isValidUtf8(String text) {
try {
byte[] bytes = text.getBytes(StandardCharsets.UTF_8);
String decoded = new String(bytes, StandardCharsets.UTF_8);
return text.equals(decoded);
} catch (Exception e) {
return false;
}
}
private boolean isValidWkt(String wkt) {
// Basic WKT validation
return wkt != null &&
(wkt.startsWith("POINT") ||
wkt.startsWith("LINESTRING") ||
wkt.startsWith("POLYGON") ||
wkt.startsWith("MULTIPOINT") ||
wkt.startsWith("MULTILINESTRING") ||
wkt.startsWith("MULTIPOLYGON"));
}
private long estimateEntitySize(Object entity) {
try {
Gson gson = gsonBuilder.create();
String json = gson.toJson(entity);
return json.getBytes(StandardCharsets.UTF_8).length;
} catch (Exception e) {
return 0;
}
}
private long getMaxDocumentSize() {
// Redis JSON max document size (configurable)
return properties.getMaxDocumentSize(); // Default: 64MB
}
private boolean hasCircularReference(Object entity) {
return hasCircularReference(entity, new IdentityHashMap<>());
}
private boolean hasCircularReference(Object obj, IdentityHashMap<Object, Boolean> visited) {
if (obj == null || obj.getClass().isPrimitive()) {
return false;
}
if (visited.containsKey(obj)) {
return true;
}
visited.put(obj, Boolean.TRUE);
// Check fields
for (Field field : obj.getClass().getDeclaredFields()) {
field.setAccessible(true);
try {
Object value = field.get(obj);
if (value != null && !field.getType().isPrimitive()) {
if (hasCircularReference(value, visited)) {
return true;
}
}
} catch (IllegalAccessException e) {
// Skip inaccessible fields
}
}
visited.remove(obj);
return false;
}
}@Service
@Slf4j
public class IndexingDiagnosticService {
@Autowired
private EntityIndexingValidator validator;
@Autowired
private RedisModulesOperations<String> modulesOperations;
/**
* Diagnoses why a specific entity failed to index
*/
public DiagnosisReport diagnoseEntity(String entityId, Class<?> entityClass) {
DiagnosisReport report = new DiagnosisReport();
report.setEntityId(entityId);
report.setEntityClass(entityClass);
try {
// 1. Retrieve the entity from Redis
String key = buildKey(entityClass, entityId);
String json = modulesOperations.execute(jedis ->
jedis.jsonGet(key, Path2.ROOT_PATH));
if (json == null) {
report.addIssue("Entity not found in Redis");
return report;
}
// 2. Deserialize entity
Gson gson = gsonBuilder.create();
Object entity = gson.fromJson(json, entityClass);
// 3. Run validation
ValidationResult validation = validator.validate(entity);
report.setValidationResult(validation);
// 4. Check index status
String indexName = indexer.getIndexName(entityClass);
SearchOperations<String> searchOps = modulesOperations.opsForSearch(indexName);
// Try to retrieve from index
try {
Document doc = searchOps.get(entityId);
if (doc == null) {
report.addIssue("Document not found in index");
} else {
report.setIndexed(true);
}
} catch (Exception e) {
report.addIssue("Failed to retrieve from index: " + e.getMessage());
}
// 5. Get index info for failure count
Map<String, Object> info = searchOps.getInfo();
Long failures = (Long) info.get("hash_indexing_failures");
report.setTotalIndexFailures(failures);
// 6. Analyze specific failure reasons
analyzeFailureReasons(report, entity, validation);
} catch (Exception e) {
report.addIssue("Diagnostic failed: " + e.getMessage());
log.error("Failed to diagnose entity {}", entityId, e);
}
return report;
}
private void analyzeFailureReasons(DiagnosisReport report, Object entity, ValidationResult validation) {
// Group errors by type
Map<ErrorType, List<ValidationError>> errorsByType = validation.getErrors().stream()
.collect(Collectors.groupingBy(ValidationError::getType));
// Generate recommendations
if (errorsByType.containsKey(ErrorType.TYPE_MISMATCH)) {
report.addRecommendation("Fix data type mismatches in fields: " +
errorsByType.get(ErrorType.TYPE_MISMATCH).stream()
.map(ValidationError::getFieldName)
.collect(Collectors.joining(", ")));
}
if (errorsByType.containsKey(ErrorType.INVALID_VECTOR_DIMENSION)) {
report.addRecommendation("Ensure vector dimensions match index specification");
}
if (errorsByType.containsKey(ErrorType.NULL_NOT_ALLOWED)) {
report.addRecommendation("Provide values for required fields: " +
errorsByType.get(ErrorType.NULL_NOT_ALLOWED).stream()
.map(ValidationError::getFieldName)
.collect(Collectors.joining(", ")));
}
// Check for common patterns
if (validation.getEstimatedSize() > 16_000_000) { // 16MB
report.addRecommendation("Consider reducing document size (currently " +
validation.getEstimatedSize() + " bytes)");
}
}
/**
* Batch diagnose multiple failed entities
*/
public BatchDiagnosisReport diagnoseFailures(List<String> entityIds, Class<?> entityClass) {
BatchDiagnosisReport batchReport = new BatchDiagnosisReport();
Map<ErrorType, Integer> errorFrequency = new HashMap<>();
Map<String, Integer> fieldErrorCount = new HashMap<>();
for (String entityId : entityIds) {
DiagnosisReport report = diagnoseEntity(entityId, entityClass);
batchReport.addReport(entityId, report);
// Aggregate error statistics
if (report.getValidationResult() != null) {
for (ValidationError error : report.getValidationResult().getErrors()) {
errorFrequency.merge(error.getType(), 1, Integer::sum);
fieldErrorCount.merge(error.getFieldName(), 1, Integer::sum);
}
}
}
// Identify patterns
batchReport.setMostCommonError(
errorFrequency.entrySet().stream()
.max(Map.Entry.comparingByValue())
.map(Map.Entry::getKey)
.orElse(null));
batchReport.setMostProblematicField(
fieldErrorCount.entrySet().stream()
.max(Map.Entry.comparingByValue())
.map(Map.Entry::getKey)
.orElse(null));
return batchReport;
}
}@Service
public class ProductService {
@Autowired
private EntityIndexingValidator validator;
@Autowired
private ProductRepository repository;
public Product saveWithValidation(Product product) {
// Validate before saving
ValidationResult validation = validator.validate(product);
if (!validation.isValid()) {
log.error("Product validation failed: {}", validation.generateReport());
// Option 1: Throw exception
throw new ValidationException(validation);
// Option 2: Try to fix common issues
product = attemptAutoFix(product, validation);
// Option 3: Save anyway but track failure
trackValidationFailure(product.getId(), validation);
}
return repository.save(product);
}
private Product attemptAutoFix(Product product, ValidationResult validation) {
for (ValidationError error : validation.getErrors()) {
switch (error.getType()) {
case TEXT_TOO_LONG:
// Truncate text fields
truncateField(product, error.getFieldName());
break;
case INVALID_VECTOR_DIMENSION:
// Pad or truncate vector
fixVectorDimension(product, error.getFieldName());
break;
case NULL_NOT_ALLOWED:
// Set default value
setDefaultValue(product, error.getFieldName());
break;
}
}
return product;
}
}@RestController
@RequestMapping("/api/diagnostics")
public class DiagnosticsController {
@Autowired
private IndexingDiagnosticService diagnosticService;
@GetMapping("/diagnose/{entityId}")
public DiagnosisReport diagnoseEntity(
@PathVariable String entityId,
@RequestParam Class<?> entityClass) {
return diagnosticService.diagnoseEntity(entityId, entityClass);
}
@PostMapping("/diagnose/batch")
public BatchDiagnosisReport diagnoseBatch(
@RequestBody List<String> entityIds,
@RequestParam Class<?> entityClass) {
BatchDiagnosisReport report = diagnosticService.diagnoseFailures(entityIds, entityClass);
// Log summary
log.info("Batch diagnosis complete: {} entities analyzed", entityIds.size());
log.info("Most common error: {}", report.getMostCommonError());
log.info("Most problematic field: {}", report.getMostProblematicField());
return report;
}
}@Component
public class ValidatingRepositoryWrapper {
@Autowired
private EntityIndexingValidator validator;
public <T> SaveResult<T> saveAllWithValidation(
RedisDocumentRepository<T, ?> repository,
Iterable<T> entities) {
List<T> validEntities = new ArrayList<>();
List<T> invalidEntities = new ArrayList<>();
Map<T, ValidationResult> validationResults = new HashMap<>();
// Pre-validate all entities
for (T entity : entities) {
ValidationResult validation = validator.validate(entity);
validationResults.put(entity, validation);
if (validation.isValid()) {
validEntities.add(entity);
} else {
invalidEntities.add(entity);
}
}
// Save only valid entities
List<T> saved = repository.saveAll(validEntities);
// Report on invalid entities
if (!invalidEntities.isEmpty()) {
log.warn("Skipped {} invalid entities", invalidEntities.size());
for (T invalid : invalidEntities) {
ValidationResult validation = validationResults.get(invalid);
log.debug("Invalid entity: {}", validation.generateReport());
}
}
return SaveResult.<T>builder()
.saved(saved)
.skipped(invalidEntities)
.validationResults(validationResults)
.build();
}
}@DataRedisTest
public class ValidationTest {
@Autowired
private EntityIndexingValidator validator;
@Test
public void testInvalidNumericField() {
Product product = new Product();
product.setPrice("not-a-number"); // Should be numeric
ValidationResult result = validator.validate(product);
assertThat(result.isValid()).isFalse();
assertThat(result.getErrors())
.extracting(ValidationError::getType)
.contains(ErrorType.NUMERIC_PARSE_ERROR);
}
@Test
public void testVectorDimensionMismatch() {
Product product = new Product();
product.setEmbedding(new float[512]); // Expected: 1536
ValidationResult result = validator.validate(product);
assertThat(result.isValid()).isFalse();
assertThat(result.getErrors())
.extracting(ValidationError::getType)
.contains(ErrorType.INVALID_VECTOR_DIMENSION);
}
}This validation utility provides:
- Pre-save validation - Catch issues before they cause indexing failures
- Detailed error reporting - Understand exactly why validation failed
- Diagnostic tools - Analyze entities that already failed
- Batch analysis - Find patterns in multiple failures
- Auto-fix capabilities - Attempt to correct common issues
- Testing support - Validate entities in unit tests
The validator matches the exact validation logic from RediSearch source code, ensuring accurate prediction of indexing failures.