Skip to content

Entity Validation

Brian Sam-Bodden edited this page Jan 11, 2026 · 1 revision

Redis OM Spring: Entity Validation Utility

Pre-Save Validation & Diagnostic Tools

Based on analysis of RediSearch and RedisJSON source code, here are the main reasons why indexing fails and a comprehensive validation utility to detect these issues before saving.

Common Indexing Failure Reasons

From RediSearch Source Analysis

  1. Type Mismatches

    • Numeric field receiving non-numeric string (e.g., "abc" in numeric field)
    • Vector field with wrong dimension size
    • Geometry field with invalid WKT format
    • Date field with unparseable date string
  2. Size Violations

    • Vector blob size mismatch (expected vs actual bytes)
    • Text field exceeding max length
    • Tag field with too many values
    • Document exceeding size limits
  3. Format Errors

    • Invalid UTF-8 encoding in text fields
    • Malformed JSON structure
    • Invalid geo coordinates (out of range)
    • Non-normalized vector values
  4. Null/Missing Handling

    • Required fields with null values
    • Missing fields that are indexed
    • Empty arrays where not allowed
    • Null in sortable fields
  5. Duplicate Fields

    • Same field indexed twice in document
    • Conflicting field paths in nested JSON

Entity Validation Utility

package com.redis.om.spring.validation;

import com.redis.om.spring.annotations.*;
import com.redis.om.spring.metamodel.SearchFieldAccessor;
import org.springframework.stereotype.Component;
import lombok.extern.slf4j.Slf4j;

/**
 * Validates entities before indexing to predict and prevent RediSearch failures
 */
@Component
@Slf4j
public class EntityIndexingValidator {
    
    private final GsonBuilder gsonBuilder;
    private final RediSearchIndexer indexer;
    
    @Data
    @Builder
    public static class ValidationResult {
        private final boolean valid;
        private final String entityId;
        private final Class<?> entityClass;
        private final List<ValidationError> errors;
        private final List<ValidationWarning> warnings;
        private final Map<String, FieldValidation> fieldValidations;
        private final long estimatedSize;
        
        public boolean hasErrors() {
            return !errors.isEmpty();
        }
        
        public boolean hasWarnings() {
            return !warnings.isEmpty();
        }
        
        public String generateReport() {
            StringBuilder sb = new StringBuilder();
            sb.append(String.format("Validation Report for %s (ID: %s)\n", 
                                  entityClass.getSimpleName(), entityId));
            sb.append(String.format("Valid: %s | Errors: %d | Warnings: %d\n", 
                                  valid, errors.size(), warnings.size()));
            
            if (hasErrors()) {
                sb.append("\nERRORS:\n");
                errors.forEach(e -> sb.append(String.format("  - [%s] %s: %s\n", 
                    e.getSeverity(), e.getFieldName(), e.getMessage())));
            }
            
            if (hasWarnings()) {
                sb.append("\nWARNINGS:\n");
                warnings.forEach(w -> sb.append(String.format("  - %s: %s\n", 
                    w.getFieldName(), w.getMessage())));
            }
            
            return sb.toString();
        }
    }
    
    @Data
    @Builder
    public static class ValidationError {
        public enum Severity { CRITICAL, HIGH, MEDIUM, LOW }
        
        private final String fieldName;
        private final String fieldPath;
        private final String message;
        private final Severity severity;
        private final ErrorType type;
        private final Object actualValue;
        private final Object expectedValue;
    }
    
    public enum ErrorType {
        TYPE_MISMATCH,
        SIZE_VIOLATION,
        FORMAT_ERROR,
        NULL_NOT_ALLOWED,
        DUPLICATE_FIELD,
        RANGE_VIOLATION,
        ENCODING_ERROR,
        MISSING_REQUIRED,
        INVALID_VECTOR_DIMENSION,
        INVALID_GEO_COORDINATES,
        INVALID_DATE_FORMAT,
        NUMERIC_PARSE_ERROR,
        TEXT_TOO_LONG,
        TAG_LIMIT_EXCEEDED,
        INVALID_JSON_STRUCTURE
    }
    
    /**
     * Validates an entity before saving to predict indexing failures
     */
    public <T> ValidationResult validate(T entity) {
        Class<?> entityClass = entity.getClass();
        List<ValidationError> errors = new ArrayList<>();
        List<ValidationWarning> warnings = new ArrayList<>();
        Map<String, FieldValidation> fieldValidations = new HashMap<>();
        
        // Get index metadata
        String indexName = indexer.getIndexName(entityClass);
        IndexSpec spec = indexer.getSpec(indexName);
        
        if (spec == null) {
            errors.add(ValidationError.builder()
                .message("No index specification found for entity")
                .severity(ValidationError.Severity.CRITICAL)
                .type(ErrorType.MISSING_REQUIRED)
                .build());
                
            return ValidationResult.builder()
                .valid(false)
                .entityId(extractId(entity))
                .entityClass(entityClass)
                .errors(errors)
                .warnings(warnings)
                .build();
        }
        
        // Validate each field
        for (Field field : entityClass.getDeclaredFields()) {
            field.setAccessible(true);
            FieldValidation validation = validateField(entity, field, spec);
            fieldValidations.put(field.getName(), validation);
            
            errors.addAll(validation.getErrors());
            warnings.addAll(validation.getWarnings());
        }
        
        // Check for duplicate fields
        errors.addAll(checkDuplicateFields(entity, spec));
        
        // Estimate document size
        long estimatedSize = estimateEntitySize(entity);
        if (estimatedSize > getMaxDocumentSize()) {
            errors.add(ValidationError.builder()
                .fieldName("_document")
                .message(String.format("Document size %d exceeds maximum %d", 
                                     estimatedSize, getMaxDocumentSize()))
                .severity(ValidationError.Severity.HIGH)
                .type(ErrorType.SIZE_VIOLATION)
                .actualValue(estimatedSize)
                .expectedValue(getMaxDocumentSize())
                .build());
        }
        
        // Check JSON structure if Document
        if (entity.getClass().isAnnotationPresent(Document.class)) {
            errors.addAll(validateJsonStructure(entity));
        }
        
        return ValidationResult.builder()
            .valid(errors.isEmpty())
            .entityId(extractId(entity))
            .entityClass(entityClass)
            .errors(errors)
            .warnings(warnings)
            .fieldValidations(fieldValidations)
            .estimatedSize(estimatedSize)
            .build();
    }
    
    /**
     * Validates a single field
     */
    private FieldValidation validateField(Object entity, Field field, IndexSpec spec) {
        List<ValidationError> errors = new ArrayList<>();
        List<ValidationWarning> warnings = new ArrayList<>();
        
        try {
            Object value = field.get(entity);
            FieldSpec fieldSpec = spec.getField(field.getName());
            
            if (fieldSpec == null) {
                // Field not indexed, skip validation
                return FieldValidation.builder()
                    .fieldName(field.getName())
                    .indexed(false)
                    .build();
            }
            
            // Check for null values
            if (value == null) {
                if (fieldSpec.isRequired()) {
                    errors.add(ValidationError.builder()
                        .fieldName(field.getName())
                        .message("Required field is null")
                        .severity(ValidationError.Severity.HIGH)
                        .type(ErrorType.NULL_NOT_ALLOWED)
                        .build());
                } else if (fieldSpec.isSortable()) {
                    warnings.add(ValidationWarning.builder()
                        .fieldName(field.getName())
                        .message("Sortable field is null")
                        .build());
                }
                return FieldValidation.builder()
                    .fieldName(field.getName())
                    .indexed(true)
                    .value(null)
                    .errors(errors)
                    .warnings(warnings)
                    .build();
            }
            
            // Type-specific validation
            if (field.isAnnotationPresent(Indexed.class)) {
                Indexed indexed = field.getAnnotation(Indexed.class);
                SchemaFieldType type = indexed.schemaFieldType();
                
                switch (type) {
                    case NUMERIC:
                        errors.addAll(validateNumericField(field.getName(), value));
                        break;
                    case TEXT:
                        errors.addAll(validateTextField(field.getName(), value, fieldSpec));
                        break;
                    case TAG:
                        errors.addAll(validateTagField(field.getName(), value, fieldSpec));
                        break;
                    case VECTOR:
                        errors.addAll(validateVectorField(field.getName(), value, indexed));
                        break;
                    case GEO:
                        errors.addAll(validateGeoField(field.getName(), value));
                        break;
                }
            }
            
        } catch (IllegalAccessException e) {
            errors.add(ValidationError.builder()
                .fieldName(field.getName())
                .message("Could not access field: " + e.getMessage())
                .severity(ValidationError.Severity.MEDIUM)
                .type(ErrorType.MISSING_REQUIRED)
                .build());
        }
        
        return FieldValidation.builder()
            .fieldName(field.getName())
            .indexed(true)
            .errors(errors)
            .warnings(warnings)
            .build();
    }
    
    /**
     * Validates numeric fields - matches RediSearch numericPreprocessor logic
     */
    private List<ValidationError> validateNumericField(String fieldName, Object value) {
        List<ValidationError> errors = new ArrayList<>();
        
        if (value instanceof String) {
            try {
                // RediSearch uses fast_float_strtod - validate parsing
                Double.parseDouble((String) value);
            } catch (NumberFormatException e) {
                errors.add(ValidationError.builder()
                    .fieldName(fieldName)
                    .message("Cannot parse as numeric: " + value)
                    .severity(ValidationError.Severity.HIGH)
                    .type(ErrorType.NUMERIC_PARSE_ERROR)
                    .actualValue(value)
                    .build());
            }
        } else if (value instanceof Collection) {
            // Validate array of numerics
            for (Object item : (Collection<?>) value) {
                if (!(item instanceof Number)) {
                    errors.add(ValidationError.builder()
                        .fieldName(fieldName)
                        .message("Array contains non-numeric value: " + item)
                        .severity(ValidationError.Severity.HIGH)
                        .type(ErrorType.TYPE_MISMATCH)
                        .actualValue(item)
                        .build());
                }
            }
        } else if (!(value instanceof Number)) {
            errors.add(ValidationError.builder()
                .fieldName(fieldName)
                .message("Expected numeric type, got: " + value.getClass().getSimpleName())
                .severity(ValidationError.Severity.HIGH)
                .type(ErrorType.TYPE_MISMATCH)
                .actualValue(value.getClass())
                .expectedValue("Number")
                .build());
        }
        
        // Check for infinity or NaN
        if (value instanceof Double) {
            Double d = (Double) value;
            if (Double.isInfinite(d) || Double.isNaN(d)) {
                errors.add(ValidationError.builder()
                    .fieldName(fieldName)
                    .message("Invalid numeric value: " + d)
                    .severity(ValidationError.Severity.HIGH)
                    .type(ErrorType.RANGE_VIOLATION)
                    .actualValue(d)
                    .build());
            }
        }
        
        return errors;
    }
    
    /**
     * Validates text fields
     */
    private List<ValidationError> validateTextField(String fieldName, Object value, FieldSpec spec) {
        List<ValidationError> errors = new ArrayList<>();
        
        if (value instanceof Collection) {
            // Validate each item in array
            for (Object item : (Collection<?>) value) {
                errors.addAll(validateSingleText(fieldName, item, spec));
            }
        } else {
            errors.addAll(validateSingleText(fieldName, value, spec));
        }
        
        return errors;
    }
    
    private List<ValidationError> validateSingleText(String fieldName, Object value, FieldSpec spec) {
        List<ValidationError> errors = new ArrayList<>();
        
        String text = null;
        if (value instanceof String) {
            text = (String) value;
        } else if (value != null) {
            text = value.toString();
        }
        
        if (text != null) {
            // Check UTF-8 encoding
            if (!isValidUtf8(text)) {
                errors.add(ValidationError.builder()
                    .fieldName(fieldName)
                    .message("Invalid UTF-8 encoding")
                    .severity(ValidationError.Severity.HIGH)
                    .type(ErrorType.ENCODING_ERROR)
                    .build());
            }
            
            // Check length limits
            if (spec.getMaxTextLength() > 0 && text.length() > spec.getMaxTextLength()) {
                errors.add(ValidationError.builder()
                    .fieldName(fieldName)
                    .message(String.format("Text length %d exceeds maximum %d", 
                                         text.length(), spec.getMaxTextLength()))
                    .severity(ValidationError.Severity.MEDIUM)
                    .type(ErrorType.TEXT_TOO_LONG)
                    .actualValue(text.length())
                    .expectedValue(spec.getMaxTextLength())
                    .build());
            }
        }
        
        return errors;
    }
    
    /**
     * Validates vector fields - matches RediSearch vectorPreprocessor
     */
    private List<ValidationError> validateVectorField(String fieldName, Object value, Indexed indexed) {
        List<ValidationError> errors = new ArrayList<>();
        
        int expectedDimension = indexed.dimension();
        VectorType vectorType = indexed.type();
        int bytesPerElement = (vectorType == VectorType.FLOAT32) ? 4 : 2;
        int expectedBytes = expectedDimension * bytesPerElement;
        
        if (value instanceof byte[]) {
            byte[] blob = (byte[]) value;
            if (blob.length != expectedBytes) {
                errors.add(ValidationError.builder()
                    .fieldName(fieldName)
                    .message(String.format("Vector blob size %d != expected %d (dim=%d, type=%s)", 
                                         blob.length, expectedBytes, expectedDimension, vectorType))
                    .severity(ValidationError.Severity.HIGH)
                    .type(ErrorType.INVALID_VECTOR_DIMENSION)
                    .actualValue(blob.length)
                    .expectedValue(expectedBytes)
                    .build());
            }
        } else if (value instanceof float[]) {
            float[] vector = (float[]) value;
            if (vector.length != expectedDimension) {
                errors.add(ValidationError.builder()
                    .fieldName(fieldName)
                    .message(String.format("Vector dimension %d != expected %d", 
                                         vector.length, expectedDimension))
                    .severity(ValidationError.Severity.HIGH)
                    .type(ErrorType.INVALID_VECTOR_DIMENSION)
                    .actualValue(vector.length)
                    .expectedValue(expectedDimension)
                    .build());
            }
            
            // Check for NaN or Infinity
            for (int i = 0; i < vector.length; i++) {
                if (Float.isNaN(vector[i]) || Float.isInfinite(vector[i])) {
                    errors.add(ValidationError.builder()
                        .fieldName(fieldName)
                        .message(String.format("Invalid vector value at index %d: %f", i, vector[i]))
                        .severity(ValidationError.Severity.HIGH)
                        .type(ErrorType.RANGE_VIOLATION)
                        .actualValue(vector[i])
                        .build());
                }
            }
        } else if (value != null) {
            errors.add(ValidationError.builder()
                .fieldName(fieldName)
                .message("Vector must be byte[] or float[], got: " + value.getClass().getSimpleName())
                .severity(ValidationError.Severity.HIGH)
                .type(ErrorType.TYPE_MISMATCH)
                .actualValue(value.getClass())
                .expectedValue("byte[] or float[]")
                .build());
        }
        
        return errors;
    }
    
    /**
     * Validates geo fields
     */
    private List<ValidationError> validateGeoField(String fieldName, Object value) {
        List<ValidationError> errors = new ArrayList<>();
        
        if (value instanceof Point) {
            Point point = (Point) value;
            // Validate coordinate ranges
            if (point.getX() < -180 || point.getX() > 180) {
                errors.add(ValidationError.builder()
                    .fieldName(fieldName)
                    .message(String.format("Invalid longitude: %f (must be -180 to 180)", point.getX()))
                    .severity(ValidationError.Severity.HIGH)
                    .type(ErrorType.INVALID_GEO_COORDINATES)
                    .actualValue(point.getX())
                    .build());
            }
            if (point.getY() < -90 || point.getY() > 90) {
                errors.add(ValidationError.builder()
                    .fieldName(fieldName)
                    .message(String.format("Invalid latitude: %f (must be -90 to 90)", point.getY()))
                    .severity(ValidationError.Severity.HIGH)
                    .type(ErrorType.INVALID_GEO_COORDINATES)
                    .actualValue(point.getY())
                    .build());
            }
        } else if (value instanceof String) {
            // Validate WKT format
            String wkt = (String) value;
            if (!isValidWkt(wkt)) {
                errors.add(ValidationError.builder()
                    .fieldName(fieldName)
                    .message("Invalid WKT format: " + wkt)
                    .severity(ValidationError.Severity.HIGH)
                    .type(ErrorType.FORMAT_ERROR)
                    .actualValue(wkt)
                    .build());
            }
        } else if (value != null) {
            errors.add(ValidationError.builder()
                .fieldName(fieldName)
                .message("Geo field must be Point or WKT string, got: " + value.getClass().getSimpleName())
                .severity(ValidationError.Severity.HIGH)
                .type(ErrorType.TYPE_MISMATCH)
                .actualValue(value.getClass())
                .expectedValue("Point or String(WKT)")
                .build());
        }
        
        return errors;
    }
    
    /**
     * Validates tag fields
     */
    private List<ValidationError> validateTagField(String fieldName, Object value, FieldSpec spec) {
        List<ValidationError> errors = new ArrayList<>();
        
        if (value instanceof Collection) {
            Collection<?> tags = (Collection<?>) value;
            
            // Check tag count limit
            if (spec.getMaxTags() > 0 && tags.size() > spec.getMaxTags()) {
                errors.add(ValidationError.builder()
                    .fieldName(fieldName)
                    .message(String.format("Tag count %d exceeds maximum %d", 
                                         tags.size(), spec.getMaxTags()))
                    .severity(ValidationError.Severity.MEDIUM)
                    .type(ErrorType.TAG_LIMIT_EXCEEDED)
                    .actualValue(tags.size())
                    .expectedValue(spec.getMaxTags())
                    .build());
            }
            
            // Validate each tag
            for (Object tag : tags) {
                if (!(tag instanceof String)) {
                    errors.add(ValidationError.builder()
                        .fieldName(fieldName)
                        .message("Tag must be string, got: " + tag.getClass().getSimpleName())
                        .severity(ValidationError.Severity.HIGH)
                        .type(ErrorType.TYPE_MISMATCH)
                        .actualValue(tag.getClass())
                        .expectedValue("String")
                        .build());
                } else {
                    String tagStr = (String) tag;
                    // Check for invalid characters
                    if (tagStr.contains(",") || tagStr.contains(";")) {
                        errors.add(ValidationError.builder()
                            .fieldName(fieldName)
                            .message("Tag contains invalid separator characters: " + tagStr)
                            .severity(ValidationError.Severity.HIGH)
                            .type(ErrorType.FORMAT_ERROR)
                            .actualValue(tagStr)
                            .build());
                    }
                }
            }
        } else if (value instanceof String) {
            // Single tag value - validate
            String tag = (String) value;
            if (tag.contains(",") || tag.contains(";")) {
                errors.add(ValidationError.builder()
                    .fieldName(fieldName)
                    .message("Tag contains invalid separator characters: " + tag)
                    .severity(ValidationError.Severity.HIGH)
                    .type(ErrorType.FORMAT_ERROR)
                    .actualValue(tag)
                    .build());
            }
        } else if (value != null) {
            errors.add(ValidationError.builder()
                .fieldName(fieldName)
                .message("Tag field must be String or Collection<String>, got: " + value.getClass().getSimpleName())
                .severity(ValidationError.Severity.HIGH)
                .type(ErrorType.TYPE_MISMATCH)
                .actualValue(value.getClass())
                .expectedValue("String or Collection<String>")
                .build());
        }
        
        return errors;
    }
    
    /**
     * Check for duplicate field indexing
     */
    private List<ValidationError> checkDuplicateFields(Object entity, IndexSpec spec) {
        List<ValidationError> errors = new ArrayList<>();
        Set<String> processedPaths = new HashSet<>();
        
        for (Field field : entity.getClass().getDeclaredFields()) {
            if (field.isAnnotationPresent(Indexed.class)) {
                String path = getFieldPath(field);
                if (processedPaths.contains(path)) {
                    errors.add(ValidationError.builder()
                        .fieldName(field.getName())
                        .fieldPath(path)
                        .message("Field path already indexed: " + path)
                        .severity(ValidationError.Severity.HIGH)
                        .type(ErrorType.DUPLICATE_FIELD)
                        .build());
                }
                processedPaths.add(path);
            }
        }
        
        return errors;
    }
    
    /**
     * Validates JSON structure for @Document entities
     */
    private List<ValidationError> validateJsonStructure(Object entity) {
        List<ValidationError> errors = new ArrayList<>();
        
        try {
            // Attempt to serialize to JSON
            Gson gson = gsonBuilder.create();
            String json = gson.toJson(entity);
            
            // Parse back to validate structure
            JsonElement element = JsonParser.parseString(json);
            if (!element.isJsonObject()) {
                errors.add(ValidationError.builder()
                    .fieldName("_document")
                    .message("Entity does not serialize to valid JSON object")
                    .severity(ValidationError.Severity.CRITICAL)
                    .type(ErrorType.INVALID_JSON_STRUCTURE)
                    .build());
            }
            
            // Check for circular references
            if (hasCircularReference(entity)) {
                errors.add(ValidationError.builder()
                    .fieldName("_document")
                    .message("Entity contains circular references")
                    .severity(ValidationError.Severity.HIGH)
                    .type(ErrorType.INVALID_JSON_STRUCTURE)
                    .build());
            }
            
        } catch (Exception e) {
            errors.add(ValidationError.builder()
                .fieldName("_document")
                .message("Failed to serialize to JSON: " + e.getMessage())
                .severity(ValidationError.Severity.CRITICAL)
                .type(ErrorType.INVALID_JSON_STRUCTURE)
                .build());
        }
        
        return errors;
    }
    
    // Helper methods
    
    private boolean isValidUtf8(String text) {
        try {
            byte[] bytes = text.getBytes(StandardCharsets.UTF_8);
            String decoded = new String(bytes, StandardCharsets.UTF_8);
            return text.equals(decoded);
        } catch (Exception e) {
            return false;
        }
    }
    
    private boolean isValidWkt(String wkt) {
        // Basic WKT validation
        return wkt != null && 
               (wkt.startsWith("POINT") || 
                wkt.startsWith("LINESTRING") || 
                wkt.startsWith("POLYGON") ||
                wkt.startsWith("MULTIPOINT") ||
                wkt.startsWith("MULTILINESTRING") ||
                wkt.startsWith("MULTIPOLYGON"));
    }
    
    private long estimateEntitySize(Object entity) {
        try {
            Gson gson = gsonBuilder.create();
            String json = gson.toJson(entity);
            return json.getBytes(StandardCharsets.UTF_8).length;
        } catch (Exception e) {
            return 0;
        }
    }
    
    private long getMaxDocumentSize() {
        // Redis JSON max document size (configurable)
        return properties.getMaxDocumentSize(); // Default: 64MB
    }
    
    private boolean hasCircularReference(Object entity) {
        return hasCircularReference(entity, new IdentityHashMap<>());
    }
    
    private boolean hasCircularReference(Object obj, IdentityHashMap<Object, Boolean> visited) {
        if (obj == null || obj.getClass().isPrimitive()) {
            return false;
        }
        
        if (visited.containsKey(obj)) {
            return true;
        }
        
        visited.put(obj, Boolean.TRUE);
        
        // Check fields
        for (Field field : obj.getClass().getDeclaredFields()) {
            field.setAccessible(true);
            try {
                Object value = field.get(obj);
                if (value != null && !field.getType().isPrimitive()) {
                    if (hasCircularReference(value, visited)) {
                        return true;
                    }
                }
            } catch (IllegalAccessException e) {
                // Skip inaccessible fields
            }
        }
        
        visited.remove(obj);
        return false;
    }
}

Diagnostic Service for Failed Entities

@Service
@Slf4j
public class IndexingDiagnosticService {
    
    @Autowired
    private EntityIndexingValidator validator;
    
    @Autowired
    private RedisModulesOperations<String> modulesOperations;
    
    /**
     * Diagnoses why a specific entity failed to index
     */
    public DiagnosisReport diagnoseEntity(String entityId, Class<?> entityClass) {
        DiagnosisReport report = new DiagnosisReport();
        report.setEntityId(entityId);
        report.setEntityClass(entityClass);
        
        try {
            // 1. Retrieve the entity from Redis
            String key = buildKey(entityClass, entityId);
            String json = modulesOperations.execute(jedis -> 
                jedis.jsonGet(key, Path2.ROOT_PATH));
            
            if (json == null) {
                report.addIssue("Entity not found in Redis");
                return report;
            }
            
            // 2. Deserialize entity
            Gson gson = gsonBuilder.create();
            Object entity = gson.fromJson(json, entityClass);
            
            // 3. Run validation
            ValidationResult validation = validator.validate(entity);
            report.setValidationResult(validation);
            
            // 4. Check index status
            String indexName = indexer.getIndexName(entityClass);
            SearchOperations<String> searchOps = modulesOperations.opsForSearch(indexName);
            
            // Try to retrieve from index
            try {
                Document doc = searchOps.get(entityId);
                if (doc == null) {
                    report.addIssue("Document not found in index");
                } else {
                    report.setIndexed(true);
                }
            } catch (Exception e) {
                report.addIssue("Failed to retrieve from index: " + e.getMessage());
            }
            
            // 5. Get index info for failure count
            Map<String, Object> info = searchOps.getInfo();
            Long failures = (Long) info.get("hash_indexing_failures");
            report.setTotalIndexFailures(failures);
            
            // 6. Analyze specific failure reasons
            analyzeFailureReasons(report, entity, validation);
            
        } catch (Exception e) {
            report.addIssue("Diagnostic failed: " + e.getMessage());
            log.error("Failed to diagnose entity {}", entityId, e);
        }
        
        return report;
    }
    
    private void analyzeFailureReasons(DiagnosisReport report, Object entity, ValidationResult validation) {
        // Group errors by type
        Map<ErrorType, List<ValidationError>> errorsByType = validation.getErrors().stream()
            .collect(Collectors.groupingBy(ValidationError::getType));
        
        // Generate recommendations
        if (errorsByType.containsKey(ErrorType.TYPE_MISMATCH)) {
            report.addRecommendation("Fix data type mismatches in fields: " + 
                errorsByType.get(ErrorType.TYPE_MISMATCH).stream()
                    .map(ValidationError::getFieldName)
                    .collect(Collectors.joining(", ")));
        }
        
        if (errorsByType.containsKey(ErrorType.INVALID_VECTOR_DIMENSION)) {
            report.addRecommendation("Ensure vector dimensions match index specification");
        }
        
        if (errorsByType.containsKey(ErrorType.NULL_NOT_ALLOWED)) {
            report.addRecommendation("Provide values for required fields: " +
                errorsByType.get(ErrorType.NULL_NOT_ALLOWED).stream()
                    .map(ValidationError::getFieldName)
                    .collect(Collectors.joining(", ")));
        }
        
        // Check for common patterns
        if (validation.getEstimatedSize() > 16_000_000) { // 16MB
            report.addRecommendation("Consider reducing document size (currently " + 
                validation.getEstimatedSize() + " bytes)");
        }
    }
    
    /**
     * Batch diagnose multiple failed entities
     */
    public BatchDiagnosisReport diagnoseFailures(List<String> entityIds, Class<?> entityClass) {
        BatchDiagnosisReport batchReport = new BatchDiagnosisReport();
        
        Map<ErrorType, Integer> errorFrequency = new HashMap<>();
        Map<String, Integer> fieldErrorCount = new HashMap<>();
        
        for (String entityId : entityIds) {
            DiagnosisReport report = diagnoseEntity(entityId, entityClass);
            batchReport.addReport(entityId, report);
            
            // Aggregate error statistics
            if (report.getValidationResult() != null) {
                for (ValidationError error : report.getValidationResult().getErrors()) {
                    errorFrequency.merge(error.getType(), 1, Integer::sum);
                    fieldErrorCount.merge(error.getFieldName(), 1, Integer::sum);
                }
            }
        }
        
        // Identify patterns
        batchReport.setMostCommonError(
            errorFrequency.entrySet().stream()
                .max(Map.Entry.comparingByValue())
                .map(Map.Entry::getKey)
                .orElse(null));
        
        batchReport.setMostProblematicField(
            fieldErrorCount.entrySet().stream()
                .max(Map.Entry.comparingByValue())
                .map(Map.Entry::getKey)
                .orElse(null));
        
        return batchReport;
    }
}

Usage Examples

Pre-Save Validation

@Service
public class ProductService {
    
    @Autowired
    private EntityIndexingValidator validator;
    
    @Autowired
    private ProductRepository repository;
    
    public Product saveWithValidation(Product product) {
        // Validate before saving
        ValidationResult validation = validator.validate(product);
        
        if (!validation.isValid()) {
            log.error("Product validation failed: {}", validation.generateReport());
            
            // Option 1: Throw exception
            throw new ValidationException(validation);
            
            // Option 2: Try to fix common issues
            product = attemptAutoFix(product, validation);
            
            // Option 3: Save anyway but track failure
            trackValidationFailure(product.getId(), validation);
        }
        
        return repository.save(product);
    }
    
    private Product attemptAutoFix(Product product, ValidationResult validation) {
        for (ValidationError error : validation.getErrors()) {
            switch (error.getType()) {
                case TEXT_TOO_LONG:
                    // Truncate text fields
                    truncateField(product, error.getFieldName());
                    break;
                case INVALID_VECTOR_DIMENSION:
                    // Pad or truncate vector
                    fixVectorDimension(product, error.getFieldName());
                    break;
                case NULL_NOT_ALLOWED:
                    // Set default value
                    setDefaultValue(product, error.getFieldName());
                    break;
            }
        }
        return product;
    }
}

Diagnosing Known Failures

@RestController
@RequestMapping("/api/diagnostics")
public class DiagnosticsController {
    
    @Autowired
    private IndexingDiagnosticService diagnosticService;
    
    @GetMapping("/diagnose/{entityId}")
    public DiagnosisReport diagnoseEntity(
            @PathVariable String entityId,
            @RequestParam Class<?> entityClass) {
        
        return diagnosticService.diagnoseEntity(entityId, entityClass);
    }
    
    @PostMapping("/diagnose/batch")
    public BatchDiagnosisReport diagnoseBatch(
            @RequestBody List<String> entityIds,
            @RequestParam Class<?> entityClass) {
        
        BatchDiagnosisReport report = diagnosticService.diagnoseFailures(entityIds, entityClass);
        
        // Log summary
        log.info("Batch diagnosis complete: {} entities analyzed", entityIds.size());
        log.info("Most common error: {}", report.getMostCommonError());
        log.info("Most problematic field: {}", report.getMostProblematicField());
        
        return report;
    }
}

Integration with SaveAll

@Component
public class ValidatingRepositoryWrapper {
    
    @Autowired
    private EntityIndexingValidator validator;
    
    public <T> SaveResult<T> saveAllWithValidation(
            RedisDocumentRepository<T, ?> repository,
            Iterable<T> entities) {
        
        List<T> validEntities = new ArrayList<>();
        List<T> invalidEntities = new ArrayList<>();
        Map<T, ValidationResult> validationResults = new HashMap<>();
        
        // Pre-validate all entities
        for (T entity : entities) {
            ValidationResult validation = validator.validate(entity);
            validationResults.put(entity, validation);
            
            if (validation.isValid()) {
                validEntities.add(entity);
            } else {
                invalidEntities.add(entity);
            }
        }
        
        // Save only valid entities
        List<T> saved = repository.saveAll(validEntities);
        
        // Report on invalid entities
        if (!invalidEntities.isEmpty()) {
            log.warn("Skipped {} invalid entities", invalidEntities.size());
            for (T invalid : invalidEntities) {
                ValidationResult validation = validationResults.get(invalid);
                log.debug("Invalid entity: {}", validation.generateReport());
            }
        }
        
        return SaveResult.<T>builder()
            .saved(saved)
            .skipped(invalidEntities)
            .validationResults(validationResults)
            .build();
    }
}

Testing Support

@DataRedisTest
public class ValidationTest {
    
    @Autowired
    private EntityIndexingValidator validator;
    
    @Test
    public void testInvalidNumericField() {
        Product product = new Product();
        product.setPrice("not-a-number"); // Should be numeric
        
        ValidationResult result = validator.validate(product);
        
        assertThat(result.isValid()).isFalse();
        assertThat(result.getErrors())
            .extracting(ValidationError::getType)
            .contains(ErrorType.NUMERIC_PARSE_ERROR);
    }
    
    @Test
    public void testVectorDimensionMismatch() {
        Product product = new Product();
        product.setEmbedding(new float[512]); // Expected: 1536
        
        ValidationResult result = validator.validate(product);
        
        assertThat(result.isValid()).isFalse();
        assertThat(result.getErrors())
            .extracting(ValidationError::getType)
            .contains(ErrorType.INVALID_VECTOR_DIMENSION);
    }
}

This validation utility provides:

  1. Pre-save validation - Catch issues before they cause indexing failures
  2. Detailed error reporting - Understand exactly why validation failed
  3. Diagnostic tools - Analyze entities that already failed
  4. Batch analysis - Find patterns in multiple failures
  5. Auto-fix capabilities - Attempt to correct common issues
  6. Testing support - Validate entities in unit tests

The validator matches the exact validation logic from RediSearch source code, ensuring accurate prediction of indexing failures.

Clone this wiki locally