In [None]:
!pip install redisvl sentence-transformers pandas nltk


In [None]:
# NBVAL_SKIP
%%sh
curl -fsSL https://packages.redis.io/gpg -o /usr/share/keyrings/redis-archive-keyring.gpg
echo "deb [signed-by=/usr/share/keyrings/redis-archive-keyring.gpg] https://packages.redis.io/deb $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/redis.list > /dev/null
sudo apt-get update > /dev/null 2>&1
sudo apt-get install -y redis-stack-server > /dev/null 2>&1

redis-stack-server --daemonize yes

In [5]:
import os
import warnings

warnings.filterwarnings('ignore')

# Replace values below with your own if using Redis Cloud instance
REDIS_HOST = os.getenv("REDIS_HOST", "localhost") # ex: "redis-18374.c253.us-central1-1.gce.cloud.redislabs.com"
REDIS_PORT = os.getenv("REDIS_PORT", "6379")      # ex: 18374
REDIS_PASSWORD = os.getenv("REDIS_PASSWORD", "")  # ex: "1TNxTEdYRDgIDKM2gDfasupCADXXXX"

# If SSL is enabled on the endpoint, use rediss:// as the URL prefix
REDIS_URL = f"redis://:{REDIS_PASSWORD}@{REDIS_HOST}:{REDIS_PORT}"

In [6]:
from redis import Redis

client = Redis.from_url(REDIS_URL)
client.ping()

True

In [7]:
import pandas as pd
import numpy as np
import json

df = pd.read_json("/content/movies.json")
print("Loaded", len(df), "movie entries")

df.head()

FileNotFoundError: File /content/movies.json does not exist

In [None]:
from redisvl.utils.vectorize import HFTextVectorizer
from redisvl.extensions.cache.embeddings import EmbeddingsCache


hf = HFTextVectorizer(
    model = "sentence-transformers/all-MiniLM-L6-v2",
    cache = EmbeddingsCache(
        name="movies_cache",
        ttl=600,
        redis_client=client
    )
)

In [None]:
df["vector"] = hf.embed_many(df["description"].tolist(), as_buffer=True)
df.head()

# Define Redis Index Schema

In [None]:
from redisvl.schema import IndexSchema
from redisvl.index import SearchIndex


index_name = "movies"


schema = IndexSchema.from_dict({

    "index": {
        "name": index_name,
        "prefix": index_name,
        "storage_type": "hash",
    },
    "fields": [
        {
            "name": "title",
            "type": "text",

        },
        {
            "name": "description",
            "type": "text",
        },
        {
            "name": "genre",
            "type": "tag",
            "attrs":{
                "sortable": True
            }
        },
        {
            "name": "rating",
            "type": "numeric",
            "attrs":{
                "sortable": True
            }
        },
        {
            "name": "vector",
            "type": "vector",
            "attrs" : {
                       "dims": 384,
            "distance_metric": "cosine",
            "datatype": "float32",
            "algorithm": "hnsw"
            }

        }



     ]

})

index = SearchIndex(schema, client)
index.create(overwrite=True, drop=True)



In [None]:
!rvl index info -i movies -u {REDIS_URL}

# Populate Index


In [None]:
index.load(df.to_dict(orient="records"))

# Search Techniques

### Standard Vector Search

In [None]:
from redisvl.query import VectorQuery

user_query = "High tech and action packed movie"

embedded_user_query = hf.embed(user_query)

vec_query = VectorQuery(
    vector=embedded_user_query,
    vector_field_name="vector",
    num_results=3,
    return_fields=["title", "genre"],
    return_score=True
)


result = index.query(vec_query)
pd.DataFrame(result)


### Vector Search with Filters

In [None]:
from redisvl.query.filter import Tag


tag_filter = Tag("genre") == "action"
vec_query.set_filter(tag_filter)

result = index.query(vec_query)
pd.DataFrame(result)

## Hybrid Search


In [None]:
from redisvl.query import HybridQuery

user_query = "action adventure movie with great fighting scenes against a dangerous criminal, crime busting, superheroes, and magic"

embedded_user_query = hf.embed(user_query)

hybrid_query = HybridQuery(
    vector=embedded_user_query,
    vector_field_name="vector",
    text_query=user_query,
    text_field_name="description",