<a href="https://colab.research.google.com/github/prakul/MongoDB-AI-Resources/blob/main/Auto_embedding_quick_start_%2B_Views.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Atlas Vector Search Auto embed Quick Start + Views

In [None]:
pip install pymongo python-dotenv pandas

Collecting pymongo
  Downloading pymongo-4.13.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (22 kB)
Collecting python-dotenv
  Downloading python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB)
Collecting dnspython<3.0.0,>=1.16.0 (from pymongo)
  Downloading dnspython-2.7.0-py3-none-any.whl.metadata (5.8 kB)
Downloading pymongo-4.13.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m19.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading python_dotenv-1.1.0-py3-none-any.whl (20 kB)
Downloading dnspython-2.7.0-py3-none-any.whl (313 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m313.6/313.6 kB[0m [31m15.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: python-dotenv, dnspython, pymongo
Successfully installed dnspython-2.7.0 pymongo-4.13.2 python-dotenv-1.1.0


In [None]:
from pymongo.mongo_client import MongoClient
from pymongo.operations import SearchIndexModel
import os
from dotenv import load_dotenv
import time
import urllib
load_dotenv(override=True)

# Connect to your Atlas deployment
MONGO_URI = os.environ["MONGO_URI"]
uname = os.environ["uname"]
pword = urllib.parse.quote_plus(os.environ["pword"])
uri = MONGO_URI.format(uname, pword)
client = MongoClient(uri)

# Access your database and collection
database = client["sample_mflix"]


In [None]:
database.list_collection_names()


['theaters',
 'system.views',
 'comments',
 'sessions',
 'users',
 'embedded_movies',
 'movies',
 'embed_source']

In [None]:
collection = database["movies"]

In [None]:
collection.count_documents({})

21349

In [None]:
collection.find_one()

{'_id': ObjectId('573a1390f29313caabcd50e5'),
 'plot': 'The cartoonist, Winsor McCay, brings the Dinosaurus back to life in the figure of his latest creation, Gertie the Dinosaur.',
 'genres': ['Animation', 'Short', 'Comedy'],
 'runtime': 12,
 'cast': ['Winsor McCay', 'George McManus', 'Roy L. McCardell'],
 'num_mflix_comments': 0,
 'poster': 'https://m.media-amazon.com/images/M/MV5BMTQxNzI4ODQ3NF5BMl5BanBnXkFtZTgwNzY5NzMwMjE@._V1_SY1000_SX677_AL_.jpg',
 'title': 'Gertie the Dinosaur',
 'fullplot': 'Winsor Z. McCay bets another cartoonist that he can animate a dinosaur. So he draws a big friendly herbivore called Gertie. Then he get into his own picture. Gertie walks through the picture, eats a tree, meets her creator, and takes him carefully on her back for a ride.',
 'languages': ['English'],
 'released': datetime.datetime(1914, 9, 15, 0, 0),
 'directors': ['Winsor McCay'],
 'writers': ['Winsor McCay'],
 'awards': {'wins': 1, 'nominations': 0, 'text': '1 win.'},
 'lastupdated': '2015

In [None]:
def create_autoembed_index(path, embedding_model, search_index_name):
    # Create your index model, then create the search index
    search_index_model = SearchIndexModel(
    definition={
        "fields": [
        {
            "type": "text",
            "path": path,
            "model": embedding_model
        }
        ]
    },
    name=search_index_name,
    type="vectorSearch",
    )
    start = time.time()
    result = collection.create_search_index(model=search_index_model)
    print("New search index named " + result + " is building.")

    # Wait for initial sync to complete
    print("Polling to check if the index is ready. This may take up a short while depending on the size of your collection and the embedding model chosen.")
    predicate=None
    if predicate is None:
        predicate = lambda index: index.get("queryable") is True

    while True:
        indices = list(collection.list_search_indexes(result))
        if len(indices) and predicate(indices[0]):
            break
        time.sleep(5)
    print(f"{result} is ready for querying. Took a total of {time.time()-start} seconds")
    return result

In [None]:
path = "plot"
embedding_model = "voyage-3.5-lite"
search_index_name = "demo_test_1"

res = create_autoembed_index(path, embedding_model, search_index_name)

In [None]:
def get_results(index_name, path, query):
    pipeline = [
    {
        '$vectorSearch': {
            'index': index_name,
            'path': path,
            'query': query,
            'numCandidates': 150,
            'limit': 10
        }
    }, {
        '$project': {
            '_id':0,
            'title':1,
            'plot':1,
            'score': {
                '$meta': 'vectorSearchScore'
            }
        }
    }
    ]

    res = collection.aggregate(pipeline)
    return res

In [None]:
import pandas as pd

query = 'funny movies with out of world characters'
res0 = list(get_results(search_index_name, path, query))
pd.DataFrame(res0).head(10)


Unnamed: 0,plot,title,score
0,Two British comic-book geeks traveling across ...,Paul,0.525405
1,"Turning the zombie film on its head, this film...",Wasting Away,0.522425
2,In the tradition of O' Brother Where Art Thou ...,The Finger,0.515916
3,"On this planet, it's Cowboys and Aliens.",Oblivion,0.514514
4,Aliens who look like clowns come from outer sp...,Killer Klowns from Outer Space,0.514008
5,Kung-Fu Action / Comedy / Horror / Musical abo...,Jesus Christ Vampire Hunter,0.513851
6,"""Documentary"" about a man who can look and act...",Zelig,0.512904
7,The last day of creation. A stranger arrives i...,The Nine Lives of Tomas Katz,0.512741
8,"A vain actor, his best friend, and an activist...",Freaked,0.512397
9,Watch the fur fly as a new breed of superhero ...,Super Buddies,0.511345


In [None]:
path = "plot"
embedding_model_1 = "voyage-3-large"
search_index_name_1 = "demo_test_large"



In [None]:
index = create_autoembed_index(path, embedding_model_1, search_index_name_1)


In [None]:
query = 'funny movies with out of world characters'

res1 = list(get_results(search_index_name_1, path, query))
# print results
pd.DataFrame(res1).head(10)

Unnamed: 0,plot,title,score
0,"On this planet, it's Cowboys and Aliens.",Oblivion,0.536753
1,The bungling inspector Cruchot (Funès) finds h...,The Troops & Aliens,0.529704
2,Aliens who look like clowns come from outer sp...,Killer Klowns from Outer Space,0.527151
3,"A race of small, furry aliens make lunch out o...",Critters,0.519599
4,A comedy. The story follows a young scientist ...,King Size,0.519276
5,Two British comic-book geeks traveling across ...,Paul,0.518966
6,In the tradition of O' Brother Where Art Thou ...,The Finger,0.518942
7,"In this Star Wars take-off, the peaceful plane...",Message from Space,0.51705
8,"In this Star Wars take-off, the peaceful plane...",Message from Space,0.51705
9,A spaceship with three furry aliens lands in a...,Earth Girls Are Easy,0.516886


# Vector Search on Views

```
db.createView('embed_source',
              'movies',  
              [{$addFields: {
                    embedding_source: {
                        $concat: [
                            "Movie Title: ", "$title",
                            "\n",
                            "Plot: ", "$fullplot"]
                        }
                    }
                }
            ]
            )


```

In [None]:
database.list_collection_names()


['theaters',
 'system.views',
 'comments',
 'sessions',
 'users',
 'embedded_movies',
 'movies',
 'embed_source']

In [None]:
database["embed_source"].find_one()

{'_id': ObjectId('573a1390f29313caabcd50e5'),
 'plot': 'The cartoonist, Winsor McCay, brings the Dinosaurus back to life in the figure of his latest creation, Gertie the Dinosaur.',
 'genres': ['Animation', 'Short', 'Comedy'],
 'runtime': 12,
 'cast': ['Winsor McCay', 'George McManus', 'Roy L. McCardell'],
 'num_mflix_comments': 0,
 'poster': 'https://m.media-amazon.com/images/M/MV5BMTQxNzI4ODQ3NF5BMl5BanBnXkFtZTgwNzY5NzMwMjE@._V1_SY1000_SX677_AL_.jpg',
 'title': 'Gertie the Dinosaur',
 'fullplot': 'Winsor Z. McCay bets another cartoonist that he can animate a dinosaur. So he draws a big friendly herbivore called Gertie. Then he get into his own picture. Gertie walks through the picture, eats a tree, meets her creator, and takes him carefully on her back for a ride.',
 'languages': ['English'],
 'released': datetime.datetime(1914, 9, 15, 0, 0),
 'directors': ['Winsor McCay'],
 'writers': ['Winsor McCay'],
 'awards': {'wins': 1, 'nominations': 0, 'text': '1 win.'},
 'lastupdated': '2015