In [1]:
import os
import pandas as pd
import warnings
from redis import Redis
from redisvl.utils.vectorize import HFTextVectorizer
from redisvl.schema import IndexSchema
from redisvl.index import SearchIndex
from redisvl.query import VectorQuery, RangeQuery, HybridQuery
from redisvl.query.filter import Tag, Text, Num

In [2]:

import os
import pandas as pd
import warnings
from redis import Redis
from redisvl.utils.vectorize import HFTextVectorizer
from redisvl.schema import IndexSchema
from redisvl.index import SearchIndex
from redisvl.query import VectorQuery, RangeQuery, HybridQuery
from redisvl.query.filter import Tag, Text, Num
warnings.filterwarnings('ignore')
# ============================================
# 1. CONNECT TO REDIS
# ============================================
REDIS_HOST = os.getenv("REDIS_HOST", "localhost")
REDIS_PORT = os.getenv("REDIS_PORT", "6379")
REDIS_PASSWORD = os.getenv("REDIS_PASSWORD", "")
REDIS_URL = f"redis://:{REDIS_PASSWORD}@{REDIS_HOST}:{REDIS_PORT}"
redis_connection = Redis.from_url(REDIS_URL)
print("✓ Connected to Redis:", redis_connection.ping())
# ============================================
# 2. PREPARE SAMPLE DATA
# ============================================
# Sample movie dataset
movies_data = [
    {
        "id": "1",
        "title": "Explosive Pursuit",
        "genre": "action",
        "rating": 7,
        "description": "A daring cop chases a notorious criminal across the city in a high-speed pursuit."
    },
    {
        "id": "2",
        "title": "Skyfall",
        "genre": "action",
        "rating": 8,
        "description": "James Bond returns to track down a dangerous new threat to MI6."
    },
    {
        "id": "3",
        "title": "The Grand Budapest Hotel",
        "genre": "comedy",
        "rating": 8,
        "description": "A concierge and his protégé navigate an adventure at a famous European hotel."
    },
    {
        "id": "4",
        "title": "Enchanted Kingdom",
        "genre": "fantasy",
        "rating": 7,
        "description": "A young princess discovers her magical powers in a family-friendly fantasy adventure."
    },
    {
        "id": "5",
        "title": "The Dark Knight",
        "genre": "action",
        "rating": 9,
        "description": "Batman faces his greatest challenge yet when a criminal mastermind brings chaos to Gotham."
    },
    {
        "id": "6",
        "title": "Frozen",
        "genre": "fantasy",
        "rating": 8,
        "description": "Two sisters embark on a magical journey in this family-friendly animated adventure."
    }
]
df = pd.DataFrame(movies_data)
print(f"\n✓ Loaded {len(df)} movies")
# ============================================
# 3. CREATE VECTOR EMBEDDINGS
# ============================================
os.environ["TOKENIZERS_PARALLELISM"] = "false"
# Initialize the vectorizer with a pre-trained model
hf = HFTextVectorizer(
    model="sentence-transformers/all-MiniLM-L6-v2",
    dims=384
)
# Generate embeddings for all movie descriptions
df["vector"] = hf.embed_many(df["description"].tolist(), as_buffer=True)
print("✓ Generated vector embeddings")
# ============================================
# 4. DEFINE SCHEMA AND CREATE INDEX
# ============================================
schema = IndexSchema.from_dict({
    "index": {
        "name": "movies",
        "prefix": "doc"
    },
    "fields": [
        {"name": "id", "type": "tag"},
        {"name": "title", "type": "text"},
        {"name": "genre", "type": "tag"},
        {"name": "rating", "type": "numeric"},
        {"name": "description", "type": "text"},
        {
            "name": "vector",
            "type": "vector",
            "attrs": {
                "dims": 384,
                "algorithm": "flat",
                "distance_metric": "cosine"
            }
        }
    ]
})
# Create the index
index = SearchIndex(schema, redis_connection)
index.create(overwrite=True, drop=True)
print("✓ Created search index")
# ============================================
# 5. LOAD DATA INTO REDIS
# ============================================
data = df.to_dict(orient="records")
index.load(data, id_field="id")
print(f"✓ Loaded {len(data)} documents into Redis\n")
# ============================================
# 6. PERFORM VECTOR SEARCHES
# ============================================
print("=" * 60)
print("DEMO 1: Basic Vector Search")
print("=" * 60)
user_query = "high-speed thrilling chase"
embedded_query = hf.embed(user_query)
vec_query = VectorQuery(
    vector=embedded_query,
    vector_field_name="vector",
    num_results=3,
    return_fields=["title", "genre", "description"],
    return_score=True
)
results = index.query(vec_query)
print(f"\nQuery: '{user_query}'")
print("\nTop Results:")
for i, result in enumerate(results, 1):
    print(f"{i}. {result['title']} (Score: {result['vector_distance']:.4f})")
    print(f"   {result['description']}\n")
# ============================================
# DEMO 2: Vector Search with Genre Filter
# ============================================
print("\n" + "=" * 60)
print("DEMO 2: Vector Search with Genre Filter")
print("=" * 60)
tag_filter = Tag("genre") == "action"
vec_query.set_filter(tag_filter)
results = index.query(vec_query)
print(f"\nQuery: '{user_query}' (Genre: action)")
print("\nFiltered Results:")
for i, result in enumerate(results, 1):
    print(f"{i}. {result['title']} - {result['genre']}")
# ============================================
# DEMO 3: Multiple Filters (Genre + Rating)
# ============================================
print("\n" + "=" * 60)
print("DEMO 3: Multiple Filters (Genre + Rating)")
print("=" * 60)
num_filter = Num("rating") >= 8
tag_filter = Tag("genre") == "action"
combined_filter = tag_filter & num_filter
vec_query = VectorQuery(
    vector=embedded_query,
    vector_field_name="vector",
    num_results=3,
    return_fields=["title", "rating", "genre"],
    return_score=True,
    filter_expression=combined_filter
)
results = index.query(vec_query)
print(f"\nQuery: High-rated action movies")
print("\nResults:")
for i, result in enumerate(results, 1):
    print(f"{i}. {result['title']} - Rating: {result['rating']}, Genre: {result['genre']}")
# ============================================
# DEMO 4: Range Query with Distance Threshold
# ============================================
print("\n" + "=" * 60)
print("DEMO 4: Range Query (Only Similar Results)")
print("=" * 60)
user_query = "family friendly fantasy adventure"
embedded_query = hf.embed(user_query)
range_query = RangeQuery(
    vector=embedded_query,
    vector_field_name="vector",
    return_fields=["title", "genre", "description"],
    return_score=True,
    distance_threshold=0.8
)
results = index.query(range_query)
print(f"\nQuery: '{user_query}' (similarity threshold: 0.8)")
print(f"Found {len(results)} results within threshold:\n")
for i, result in enumerate(results, 1):
    print(f"{i}. {result['title']} (Distance: {result['vector_distance']:.4f})")
    print(f"   Genre: {result['genre']}")
    print(f"   {result['description']}\n")
# ============================================
# DEMO 5: Hybrid Search (Vector + Text)
# ============================================
print("\n" + "=" * 60)
print("DEMO 5: Hybrid Search (Vector + Full-Text)")
print("=" * 60)
hybrid_query = HybridQuery(
    text=user_query,
    text_field_name="description",
    text_scorer="BM25",
    vector=embedded_query,
    vector_field_name="vector",
    alpha=0.7,  # 70% weight to vector, 30% to text
    num_results=4,
    return_fields=["title", "description"]
)
results = index.query(hybrid_query)
print(f"\nQuery: '{user_query}' (hybrid: 70% semantic, 30% keyword)")
print("\nResults with combined scores:\n")
for i, result in enumerate(results, 1):
    print(f"{i}. {result['title']}")
    print(f"   Vector Score: {result.get('vector_similarity', 'N/A'):.4f}")
    print(f"   Text Score: {result.get('text_score', 'N/A'):.4f}")
    print(f"   Hybrid Score: {result.get('hybrid_score', 'N/A'):.4f}\n")
print("=" * 60)
print("✓ All demos complete!")
print("=" * 60)
Breaking Down the Code
Step 1: Connect to Redis
REDIS_URL = f"redis://:{REDIS_PASSWORD}@{REDIS_HOST}:{REDIS_PORT}"
redis_connection = Redis.from_url(REDIS_URL)
We establish a connection to Redis using environment variables for flexibility across environments.
Step 2: Prepare Your Data
df = pd.DataFrame(movies_data)
We load movie data into a pandas DataFrame. Each movie has a title, genre, rating, and description.
Step 3: Generate Vector Embeddings
hf = HFTextVectorizer(
    model="sentence-transformers/all-MiniLM-L6-v2",
    dims=384
)
df["vector"] = hf.embed_many(df["description"].tolist(), as_buffer=True)
RedisVL's HFTextVectorizer wraps Hugging Face models to generate embeddings. The model converts each movie description into a 384-dimensional vector that captures its semantic meaning. The as_buffer=True parameter stores vectors as compact binary data for efficiency.
Step 4: Define the Schema
schema = IndexSchema.from_dict({
    "index": {"name": "movies", "prefix": "doc"},
    "fields": [
        {"name": "title", "type": "text"},
        {"name": "genre", "type": "tag"},
        {"name": "rating", "type": "numeric"},
        {
            "name": "vector",
            "type": "vector",
            "attrs": {
                "dims": 384,
                "algorithm": "flat",
                "distance_metric": "cosine"
            }
        }
    ]
})
The schema defines how Redis should index each field. The vector field uses cosine distance to measure similarity - perfect for text embeddings where the angle between vectors matters more than their magnitude.
Step 5: Create the Index and Load Data
index = SearchIndex(schema, redis_connection)
index.create(overwrite=True, drop=True)
index.load(data, id_field="id")
We create the index in Redis and load all movie documents in one batch operation.
Step 6: Perform Searches
Basic Vector Search:
vec_query = VectorQuery(
    vector=embedded_query,
    vector_field_name="vector",
    num_results=3,
    return_fields=["title", "genre", "description"],
    return_score=True
)
results = index.query(vec_query)
This finds the 3 most semantically similar movies to your query.
Vector Search with Filters:
tag_filter = Tag("genre") == "action"
vec_query.set_filter(tag_filter)
Add filters to narrow results by genre, rating, or other attributes.
Range Queries:
range_query = RangeQuery(
    vector=embedded_query,
    vector_field_name="vector",
    distance_threshold=0.8
)
Only return results within a specific similarity threshold, ensuring quality.
Hybrid Search:
hybrid_query = HybridQuery(
    text=user_query,
    vector=embedded_query,
    alpha=0.7  # 70% semantic, 30% keyword
)
Combine semantic search with traditional full-text search for best-of-both-worlds results.
Understanding the Results
When you run searches, you'll see distance scores. With cosine distance:
0.0 = identical vectors (perfect match)
0.5 = moderately similar
1.0+ = less similar

Lower distances indicate better matches.
Key Takeaways
Vector embeddings transform text into numerical representations that capture meaning, enabling semantic search beyond keyword matching.
RedisVL simplifies the entire workflow from embedding generation to complex queries, making it easy to add semantic search to your applications.
Hybrid search combines the precision of keyword matching with the flexibility of semantic search for optimal results.
Filters and range queries give you fine-grained control over search results, balancing relevance with quality thresholds.
Next Steps
Now that you have a working vector search system, consider:
Scaling up: Replace the sample data with your own dataset (thousands or millions of documents)
Experimenting with models: Try different embedding models from Hugging Face for your specific domain
Adding features: Implement faceted search, personalization, or multi-vector search
Production deployment: Add error handling, caching, and monitoring

Resources
RedisVL Documentation
Redis Vector Search Guide
Sentence Transformers Models

---

Ready to build smarter search? Clone the code and start experimenting! Vector search is transforming how we find information - now you have the tools to implement it yourself.
Questions or feedback? Leave a comment below!

NameError: name 'REDIS_PASSWORD' is not defined