# LOAD THE MODEL TO ELASTIC CLOUD
!eland_import_hub_model --hub-access-token "HUGGINGFACE-ACCESS-TOKEN" --url "ELASTIC_CLOUD_HOST" --es-api-key "ELASTIC_CLOUD_API"  --hub-model-id sentence-transformers/all-MiniLM-L6-v2 --task-type  text_embedding

# Explicitly stop the ml deployment

DELETE _ml/trained_models/sentence-transformers__all-minilm-l6-v2?force=true


In [1]:
import pandas as pd 
df=pd.read_csv("netflix_titles.csv")

In [2]:
df.dropna(inplace=True)
df.isna().sum()

show_id         0
type            0
title           0
director        0
cast            0
country         0
date_added      0
release_year    0
rating          0
duration        0
listed_in       0
description     0
dtype: int64

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5332 entries, 7 to 8806
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   show_id       5332 non-null   object
 1   type          5332 non-null   object
 2   title         5332 non-null   object
 3   director      5332 non-null   object
 4   cast          5332 non-null   object
 5   country       5332 non-null   object
 6   date_added    5332 non-null   object
 7   release_year  5332 non-null   int64 
 8   rating        5332 non-null   object
 9   duration      5332 non-null   object
 10  listed_in     5332 non-null   object
 11  description   5332 non-null   object
dtypes: int64(1), object(11)
memory usage: 541.5+ KB


In [4]:
import os
from elasticsearch import Elasticsearch
client=Elasticsearch(hosts=os.environ["ELASTIC_CLOUD_ID"],api_key=os.environ["ELASTIC_API_KEY"], request_timeout=60)

In [15]:
from json import loads, dumps
result = df.to_json(orient="records")
parsed = loads(result)
print(dumps(parsed, indent=4))  


[
    {
        "show_id": "s8",
        "type": "Movie",
        "title": "Sankofa",
        "director": "Haile Gerima",
        "cast": "Kofi Ghanaba, Oyafunmike Ogunlano, Alexandra Duah, Nick Medley, Mutabaruka, Afemo Omilami, Reggie Carter, Mzuri",
        "country": "United States, Ghana, Burkina Faso, United Kingdom, Germany, Ethiopia",
        "date_added": "September 24, 2021",
        "release_year": 1993,
        "rating": "TV-MA",
        "duration": "125 min",
        "listed_in": "Dramas, Independent Movies, International Movies",
        "description": "On a photo shoot in Ghana, an American model slips back in time, becomes enslaved on a plantation and bears witness to the agony of her ancestral past."
    },
    {
        "show_id": "s9",
        "type": "TV Show",
        "title": "The Great British Baking Show",
        "director": "Andy Devonshire",
        "cast": "Mel Giedroyc, Sue Perkins, Mary Berry, Paul Hollywood",
        "country": "United Kingdom",
        "

In [16]:
actions = [{"_index": "netflix-data", "_source": doc} for doc in parsed]

In [7]:

from elasticsearch.helpers import bulk
success, failed = bulk(client, actions)

if failed:
    print(f"Some documents failed to index: {failed}")
else:
    print(f"Successfully indexed {success} documents.")

Successfully indexed 5332 documents.


In [5]:
from elasticsearch import Elasticsearch


# Define the pipeline for text embedding
pipeline_body = {
    "description": "Text embedding pipeline",
    "processors": [
        {
            "inference": {
                "model_id": "sentence-transformers__all-minilm-l6-v2",
                "target_field": "text_embedding",
                "field_map": {
                    "description": "text_field"  # Mapping 'passage' from the document to 'text_field' expected by the model
                }
            }
        }
    ],
    "on_failure": [
        {
            "set": {
                "description": "Index document to 'failed-<index>'",
                "field": "_index",
                "value": "failed-{{{_index}}}"
            }
        },
        {
            "set": {
                "description": "Set error message",
                "field": "ingest.failure",
                "value": "{{_ingest.on_failure_message}}"
            }
        }
    ]
}

# Make the PUT request to create the pipeline
response = client.ingest.put_pipeline(id='text-embeddings-for-netflix-data', body=pipeline_body)

# Print the response
print(response)


{'acknowledged': True}


In [18]:
index_body = {
    "mappings": {
        "properties": {
            "text_embedding.predicted_value": {
                "type": "dense_vector", 
            },
            "text": {
                "type": "text"  # Text field for storing raw text
            }
        }
    }
}

# Make the PUT request to create the index with the mapping
response = client.indices.create(index='netflix-data-with-embeddings', body=index_body)

# Print the response
print(response)

{'acknowledged': True, 'shards_acknowledged': True, 'index': 'netflix-data-with-embeddings'}


In [19]:
reindex_body = {
    "source": {
        "index": "netflix-data" # Source index from where to pull the data
    },
    "dest": {
        "index": "netflix-data-with-embeddings",  # Destination index
        "pipeline": "text-embeddings-for-netflix-data"  # Apply the text-embeddings pipeline during reindexing
    }
}

# Make the POST request for reindexing and set wait_for_completion=false for async reindexing
response = client.reindex(body=reindex_body, wait_for_completion=False)

# Print the response
print(response)

{'task': 'uGJM-0BJT5iQHDwWXZridw:795205'}


# Calling the input query

In [6]:
def relevant_documents_call(user_query):
    infer_body = {
        "docs": [
            {
                "text_field": user_query
            }
        ]
    }
    response = client.ml.infer_trained_model(model_id="sentence-transformers__all-minilm-l6-v2", body=infer_body)
    user_vector=response['inference_results'][0]['predicted_value']
    
    
    hybrid_search={
        "query": {
            "match": {
                "text": user_query,
            },
        },
        "knn": {
            "field": "text_embedding.predicted_value",
            "query_vector": user_vector,
            "k": 5,
            "num_candidates": 10,
        },
        "rank": {"rrf": {}},
    }

    relevant_documents = client.search(index="netflix-data-with-embeddings", body=hybrid_search)

    # Print the results
    for hit in relevant_documents['hits']['hits']:
        print(f"Title: {hit['_source']['title']} \n description: {hit['_source']['description']} \n Score: {hit['_score']}\n\n")


In [7]:
relevant_documents_call("Looking for a heartwarming romantic comedy")

Title: Iyore 
 description: A tragic romance unfolds during a teacher's lessons on the Benin empire after they begin to mirror her love life when her childhood love reappears. 
 Score: 0.016393442


Title: Honeymoon Travels Pvt. Ltd. 
 description: This offbeat comedy-drama follows six quirky newlywed couples as they set off on a bus from Mumbai to Goa on their honeymoons. 
 Score: 0.016129032


Title: 7 Khoon Maaf 
 description: Spiced liberally with black comedy, this Bollywood drama follows the lethal love life of a woman who marries numerous men – only to find them flawed. 
 Score: 0.015873017


Title: Fary : Hexagone 
 description: French comedy phenom Fary puts a playful spin on questions of identity, culture and more in the first half of an epic two-part stand-up special. 
 Score: 0.015625


Title: Steal a Pencil for Me 
 description: This moving documentary chronicles the unshakeable romance between a couple who met and fell in love while imprisoned in a Nazi concentration camp

In [24]:
relevant_documents_call("Find a crime thriller with a strong female lead")

Title: Hard Tide 
 description: A drug dealer who's been emulating his father's successful criminal career is forced to go on the run with a young girl in this fact-based thriller. 
 Score: 0.016393442


Title: Maya Memsaab 
 description: A beautiful, wealthy woman’s insatiable appetite for romance leads to tragedy and a police investigation. 
 Score: 0.016129032


Title: The Legend of Michael Mishra 
 description: After a life of crime, a notorious kidnapper tries to change his ways and turn over a new leaf to win the heart of the woman he loves. 
 Score: 0.015873017


Title: Die Ontwaking 
 description: An inexperienced murder detective joins a desperate search for a deranged serial killer who possesses a deep obsession with young women’s skin. 
 Score: 0.015625


Title: Along Came a Spider 
 description: When a girl is kidnapped from a prestigious prep school, a homicide detective takes the case, teaming up with young security agent. 
 Score: 0.015384615


