# Semantic Search Prototype

This notebook demonstrates semantic search over the EV India 13 dataset using cosine similarity.


In [None]:
import json
import numpy as np
import sys
from pathlib import Path

# Add src to path
project_root = Path().resolve().parent
sys.path.insert(0, str(project_root / "src"))

from search import SemanticSearch, format_search_results

# Set up paths
data_path = project_root / "data" / "processed" / "cleaned_data.json"
embeddings_path = project_root / "data" / "processed" / "embeddings.npy"

print(f"Loading data from: {data_path}")
print(f"Loading embeddings from: {embeddings_path}")


In [None]:
# Load data and embeddings
with open(data_path, 'r') as f:
    cleaned_data = json.load(f)

embeddings = np.load(embeddings_path)

print(f"Loaded {len(cleaned_data)} entries")
print(f"Embeddings shape: {embeddings.shape}")

# Initialize search engine
search_engine = SemanticSearch(cleaned_data, embeddings)
print("\nSemantic search engine initialized!")


In [None]:
# Test query 1: AI and machine learning projects
query1 = "AI and machine learning projects"
print(f"Query: {query1}")
print("=" * 80)

results1 = search_engine.search(query1, top_k=5)
print(format_search_results(results1))


In [None]:
# Test query 2: Healthcare and medical devices
query2 = "Healthcare and medical devices"
print(f"Query: {query2}")
print("=" * 80)

results2 = search_engine.search(query2, top_k=5)
print(format_search_results(results2))


In [None]:
# Test query 3: Hardware and robotics
query3 = "Hardware and robotics"
print(f"Query: {query3}")
print("=" * 80)

results3 = search_engine.search(query3, top_k=5)
print(format_search_results(results3))


In [None]:
# Test query 4: Education and learning platforms
query4 = "Education and learning platforms"
print(f"Query: {query4}")
print("=" * 80)

results4 = search_engine.search(query4, top_k=5)
print(format_search_results(results4))


In [None]:
# Test query 5: Sustainability and climate
query5 = "Sustainability and climate adaptation"
print(f"Query: {query5}")
print("=" * 80)

results5 = search_engine.search(query5, top_k=5)
print(format_search_results(results5))


In [None]:
# Find similar entries to a specific entry
print("Finding entries similar to first entry:")
print("=" * 80)
print(f"Query entry: {cleaned_data[0]['name']} - {cleaned_data[0]['project_name']}")

similar = search_engine.find_similar_entries(0, top_k=5, exclude_self=True)
print(format_search_results(similar))
