In [None]:
from opensearchpy import OpenSearch
from opensearchpy.helpers import bulk

# Connect to OpenSearch
client = OpenSearch(
    hosts=[{'host': 'localhost', 'port': 9200}],
    http_auth=('admin', 'admin'),  # adjust credentials if needed
    use_ssl=False,
)

INDEX_NAME = "movies"

# Define index mapping with dense_vector
index_body = {
    "settings": {
        "index": {
            "knn": True  # Enable KNN search
        }
    },
    "mappings": {
        "properties": {
            "title": {"type": "text"},
            "id": {"type": "keyword"},
            "year": {"type": "integer"},
            "rating": {"type": "float"},
            "embedding": {
                "type": "dense_vector",
                "dims": 1536,  # adjust to your actual embedding size
                "index": True,
                "similarity": "cosine"
            }
        }
    }
}

# Delete old index if it exists
if client.indices.exists(index=INDEX_NAME):
    client.indices.delete(index=INDEX_NAME)

# Create new index
client.indices.create(index=INDEX_NAME, body=index_body)
print(f"Created index '{INDEX_NAME}'")

# ----------------------------------------------------
# Insert movie collection
# ----------------------------------------------------

# Example data (you can replace this with your own)
collection = [
    {
        "id": "1",
        "title": "The Matrix",
        "year": 1999,
        "rating": 8.7,
        "embedding": [0.1, 0.3, 0.5, ...]  # replace ... with real values
    },
    {
        "id": "2",
        "title": "Inception",
        "year": 2010,
        "rating": 8.8,
        "embedding": [0.2, 0.1, 0.4, ...]
    },
    # etc.
]

# Bulk insert
actions = [
    {"_index": INDEX_NAME, "_id": movie["id"], "_source": movie}
    for movie in collection
]
bulk(client, actions)
print(f"Inserted {len(collection)} movies")

# ----------------------------------------------------
# Perform vector + year constraint query
# ----------------------------------------------------

query_embedding = [0.15, 0.25, 0.45, ...]  # your query embedding

query_body = {
    "size": 5,
    "query": {
        "bool": {
            "filter": [
                {"range": {"year": {"lt": 2000}}}
            ],
            "must": [
                {
                    "knn": {
                        "embedding": {
                            "vector": query_embedding,
                            "k": 5
                        }
                    }
                }
            ]
        }
    }
}

response = client.search(index=INDEX_NAME, body=query_body)

# Print results
print("\nTop results (year < 2000):")
for hit in response["hits"]["hits"]:
    source = hit["_source"]
    score = hit["_score"]
    print(f"- {source['title']} ({source['year']}) | Score: {score:.4f}")
