In [None]:
!pip install redisvl sentence-transformers pandas nltk


In [None]:
# NBVAL_SKIP
%%sh
curl -fsSL https://packages.redis.io/gpg -o /usr/share/keyrings/redis-archive-keyring.gpg
echo "deb [signed-by=/usr/share/keyrings/redis-archive-keyring.gpg] https://packages.redis.io/deb $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/redis.list > /dev/null
sudo apt-get update > /dev/null 2>&1
sudo apt-get install -y redis-stack-server > /dev/null 2>&1

redis-stack-server --daemonize yes

Starting redis-stack-server, database path /var/lib/redis-stack


In [None]:
import os
import warnings

warnings.filterwarnings('ignore')

# Replace values below with your own if using Redis Cloud instance
REDIS_HOST = os.getenv("REDIS_HOST", "localhost") # ex: "redis-18374.c253.us-central1-1.gce.cloud.redislabs.com"
REDIS_PORT = os.getenv("REDIS_PORT", "6379")      # ex: 18374
REDIS_PASSWORD = os.getenv("REDIS_PASSWORD", "")  # ex: "1TNxTEdYRDgIDKM2gDfasupCADXXXX"

# If SSL is enabled on the endpoint, use rediss:// as the URL prefix
REDIS_URL = f"redis://:{REDIS_PASSWORD}@{REDIS_HOST}:{REDIS_PORT}"

In [None]:
from redis import Redis

client = Redis.from_url(REDIS_URL)
client.ping()

True

In [None]:
import pandas as pd
import numpy as np
import json

df = pd.read_json("/content/movies.json")
print("Loaded", len(df), "movie entries")

df.head()

Loaded 20 movie entries


Unnamed: 0,id,title,genre,rating,description
0,1,Explosive Pursuit,action,7,A daring cop chases a notorious criminal acros...
1,2,Skyfall,action,8,James Bond returns to track down a dangerous n...
2,3,Fast & Furious 9,action,6,Dom and his crew face off against a high-tech ...
3,4,Black Widow,action,7,Natasha Romanoff confronts her dark past and f...
4,5,John Wick,action,8,A retired hitman seeks vengeance against those...


In [None]:
from redisvl.utils.vectorize import HFTextVectorizer
from redisvl.extensions.cache.embeddings import EmbeddingsCache


hf = HFTextVectorizer(
    model = "sentence-transformers/all-MiniLM-L6-v2",
    cache = EmbeddingsCache(
        name="movies_cache",
        ttl=600,
        redis_client=client
    )
)

In [None]:
df["vector"] = hf.embed_many(df["description"].tolist(), as_buffer=True)
df.head()

Unnamed: 0,id,title,genre,rating,description,vector
0,1,Explosive Pursuit,action,7,A daring cop chases a notorious criminal acros...,b'\x9ef|=\xc8a\n;\xde\x91\xb7; \xcb~\xbd*e\xce...
1,2,Skyfall,action,8,James Bond returns to track down a dangerous n...,b'\x9eD\x9e\xbd?\x9b\x89\xbc\xbc\x16\x95\xbc\x...
2,3,Fast & Furious 9,action,6,Dom and his crew face off against a high-tech ...,"b'$\xa5\xc7\xbc\xfc,\xa2=L\x19H\xbc=\xc6t\xbd\..."
3,4,Black Widow,action,7,Natasha Romanoff confronts her dark past and f...,b't\xeb\x85\xbd\x04\xcdo\xbd\xb9\xe8\xc2\xbb;\...
4,5,John Wick,action,8,A retired hitman seeks vengeance against those...,b'4<x\xbb\x02/\xc5=\xff\x86:;\xc7\xd0\x94<\xfc...


# Define Redis Index Schema

In [None]:
from redisvl.schema import IndexSchema
from redisvl.index import SearchIndex


index_name = "movies"


schema = IndexSchema.from_dict({

    "index": {
        "name": index_name,
        "prefix": index_name,
        "storage_type": "hash",
    },
    "fields": [
        {
            "name": "title",
            "type": "text",

        },
        {
            "name": "description",
            "type": "text",
        },
        {
            "name": "genre",
            "type": "tag",
            "attrs":{
                "sortable": True
            }
        },
        {
            "name": "rating",
            "type": "numeric",
            "attrs":{
                "sortable": True
            }
        },
        {
            "name": "vector",
            "type": "vector",
            "attrs" : {
                       "dims": 384,
            "distance_metric": "cosine",
            "datatype": "float32",
            "algorithm": "hnsw"
            }

        }



     ]

})

index = SearchIndex(schema, client)
index.create(overwrite=True, drop=True)



In [None]:
!rvl index info -i movies -u {REDIS_URL}



Index Information:
╭───────────────┬───────────────┬───────────────┬───────────────┬───────────────┬╮
│ Index Name    │ Storage Type  │ Prefixes      │ Index Options │ Indexing      │
├───────────────┼───────────────┼───────────────┼───────────────┼───────────────┼┤
| movies        | HASH          | ['movies']    | []            | 0             |
╰───────────────┴───────────────┴───────────────┴───────────────┴───────────────┴╯
Index Fields:
╭─────────────────┬─────────────────┬─────────────────┬─────────────────┬─────────────────┬─────────────────┬─────────────────┬─────────────────┬─────────────────┬─────────────────┬─────────────────┬╮
│ Name            │ Attribute       │ Type            │ Field Option    │ Option Value    │ Field Option    │ Option Value    │ Field Option    │ Option Value    │ Field Option    │ Option Value    │
├─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼──

# Populate Index


In [None]:
index.load(df.to_dict(orient="records"))

['movies:01K2FY0K0RZ70XEBN691R3CWYS',
 'movies:01K2FY0K0RZ19G5AXWQKDFGBCC',
 'movies:01K2FY0K0R9N89YYKRPZFXXDHM',
 'movies:01K2FY0K0T8BVRMW98C3BXA2PR',
 'movies:01K2FY0K0T9KRF75RBX4TD7PMW',
 'movies:01K2FY0K0T34C4EWRP8EK5HQBC',
 'movies:01K2FY0K0T4CZPEDHWYTMMQVV7',
 'movies:01K2FY0K0TNFQXZCC6YSQ27A8R',
 'movies:01K2FY0K0T1V3XQQHFX5SSVN6F',
 'movies:01K2FY0K0TEK4T3F7ND0P44Z97',
 'movies:01K2FY0K0THWJ619DSV9K9NK2J',
 'movies:01K2FY0K0TTR83CJRMH15F0JG3',
 'movies:01K2FY0K0TGSNYGHZCTD9E16M6',
 'movies:01K2FY0K0TEAAQ556G6E3ZNCDW',
 'movies:01K2FY0K0T33RHK6TP9QEHEHC3',
 'movies:01K2FY0K0TZ9N9461F40YF5CXK',
 'movies:01K2FY0K0TJHF4XEC8RFS43561',
 'movies:01K2FY0K0THFHSEN13D78SSPW9',
 'movies:01K2FY0K0TMJRHE1SEG476N8J7',
 'movies:01K2FY0K0TKMJ5M8ZZS4BE0HPF']

# Search Techniques

### Standard Vector Search

In [None]:
from redisvl.query import VectorQuery

user_query = "High tech and action packed movie"

embedded_user_query = hf.embed(user_query)

vec_query = VectorQuery(
    vector=embedded_user_query,
    vector_field_name="vector",
    num_results=3,
    return_fields=["title", "genre"],
    return_score=True
)


result = index.query(vec_query)
pd.DataFrame(result)


Unnamed: 0,id,vector_distance,title,genre
0,movies:01K2FY0K0R9N89YYKRPZFXXDHM,0.649737894535,Fast & Furious 9,action
1,movies:01K2FY0K0T34C4EWRP8EK5HQBC,0.763234972954,Mad Max: Fury Road,action
2,movies:01K2FY0K0TTR83CJRMH15F0JG3,0.792449653149,The Lego Movie,comedy


### Vector Search with Filters

In [None]:
from redisvl.query.filter import Tag


tag_filter = Tag("genre") == "action"
vec_query.set_filter(tag_filter)

result = index.query(vec_query)
pd.DataFrame(result)

Unnamed: 0,id,vector_distance,title,genre
0,movies:01K2FY0K0R9N89YYKRPZFXXDHM,0.649737894535,Fast & Furious 9,action
1,movies:01K2FY0K0T34C4EWRP8EK5HQBC,0.763234972954,Mad Max: Fury Road,action
2,movies:01K2FY0K0RZ70XEBN691R3CWYS,0.796153008938,Explosive Pursuit,action


## Hybrid Search


In [None]:
from redisvl.query import HybridQuery

user_query = "action adventure movie with great fighting scenes against a dangerous criminal, crime busting, superheroes, and magic"

embedded_user_query = hf.embed(user_query)

hybrid_query = HybridQuery(
    vector=embedded_user_query,
    vector_field_name="vector",
    text_query=user_query,
    text_field_name="description",