# Import Data to MongoDB Atlas and Create Vector Search Index

This notebook demonstrates how to import the `mobile_reviews_with_embeddings.json` data into MongoDB Atlas and create a vector search index for the `review_embedding` field.

In [None]:
%pip install pymongo

In [None]:
import json
from pymongo import MongoClient

# 1. Connection Setup
# Replace <connection_string> with your actual MongoDB Atlas connection string
MONGO_URI = "<connection_string>"
DB_NAME = "tech_on_the_rock"
COLLECTION_NAME = "mobile_reviews"

client = MongoClient(MONGO_URI)
db = client[DB_NAME]
collection = db[COLLECTION_NAME]

# Test connection
try:
    client.admin.command('ping')
    print("Pinged your deployment. You successfully connected to MongoDB!")
except Exception as e:
    print(e)

In [None]:
# 2. Load Data
file_path = 'mobile_reviews_with_embeddings.json'

with open(file_path, 'r') as file:
    data = json.load(file)

print(f"Loaded {len(data)} documents from {file_path}")

In [None]:
# 3. Insert Data
# Optional: Clear existing data
collection.delete_many({})

result = collection.insert_many(data)
print(f"Inserted {len(result.inserted_ids)} documents into '{DB_NAME}.{COLLECTION_NAME}'")

## Create Vector Search Index

The following definition defines the vector search index. The embedding field is named `review_embedding` and the dimensions are 4096 (matching the `qwen3-embedding` model).

**Instructions:**
1. Go to your cluster in MongoDB Atlas.
2. Click on the **Atlas Search** tab.
3. Click **Create Search Index**.
4. Select **JSON Editor**.
5. Select the `tech_on_the_rock.mobile_reviews` collection.
6. Enter the Index Name: `vector_index`
7. Paste the following JSON configuration:


In [None]:
# Vector Search Index Configuration (for Atlas UI)
vector_index_definition = {
  "fields": [
    {
      "numDimensions": 4096,
      "path": "review_embedding",
      "similarity": "cosine",
      "type": "vector"
    }
  ]
}

print("Use this configuration in Atlas UI:")
print(json.dumps(vector_index_definition, indent=2))

### Programmatic Index Creation (Optional)
You can also create the search index programmatically using the following code.

In [None]:
# Attempt to create the index programmatically
index_name = "vector_index"

vector_search_definition = {
  "fields": [
    {
      "numDimensions": 4096,
      "path": "review_embedding",
      "similarity": "cosine",
      "type": "vector"
    }
  ]
}

try:
    result = collection.create_search_index(
        model=vector_search_definition,
        name=index_name,
        type="vectorSearch"
    )
    print(f"Creating index: {result}")
    print("Wait a few minutes for the index to build.")
except Exception as e:
    print("Error creating index programmatically (might need to do it in UI):", e)