In [1]:
import sqlite3
import pandas as pd

# Connect to SQLite DB 
conn = sqlite3.connect("patent_db.sqlite")

# Read the data 
df = pd.read_sql_query("SELECT id, title, abstract FROM patents", conn)

df.head()


Unnamed: 0,id,title,abstract
0,1,Climbing rose plant named ‘BAISIKOMIK’,A new and distinct variety of climbing rose pl...
1,2,Mandarin plant named ‘CLEMENLOLA’,‘CLEMENLOLA’ is a spontaneous mutation of the ...
2,3,,A new \n
3,4,,A new and distinct cultivar of \n
4,5,,A new \n


In [2]:
# Combine title and abstract for better semantic understanding
df["text"] = df["title"].fillna("") + ". " + df["abstract"].fillna("")

# Convert to list
texts = df["text"].tolist()

print(texts[0])


Climbing rose plant named ‘BAISIKOMIK’. A new and distinct variety of climbing rose plant, referred to by its cultivar name, ‘BAISIKOMIK’, is described. The new variety forms nice turbinated shaped flowers with true color gradation. The flowers emit a strong, sweet licorice/anise fragrance. Improved reblooming is exhibited. Additionally, the new variety is particularly well suited for growing as distinctive ornamentation in the landscape.



In [3]:
from sentence_transformers import SentenceTransformer

# Load pre-trained model
model = SentenceTransformer("all-MiniLM-L6-v2")

# Generate embeddings
embeddings = model.encode(texts, show_progress_bar=True)


Batches:   0%|          | 0/1824 [00:00<?, ?it/s]

In [None]:
import faiss
import numpy as np

# Get embedding dimension
dimension = embeddings[0].shape[0]

# Create FAISS index
index = faiss.IndexFlatL2(dimension)

# Add all document embeddings to index
index.add(np.array(embeddings))


In [5]:
def search_patents(query, k=5):
    # Convert query to embedding
    query_embedding = model.encode([query])
    
    # Search top-k most similar
    distances, indices = index.search(np.array(query_embedding), k)
    
    # Display results
    print(f"\nTop {k} matches for: '{query}'")
    for i in indices[0]:
        print("\n------------------------------")
        print(f"📌 Title: {df.iloc[i]['title']}")
        print(f"🧾 Abstract: {df.iloc[i]['abstract']}")


In [None]:
search_patents("AI system for self-driving cars", k=3)