In [None]:
import sqlite3
import pandas as pd
from sentence_transformers import SentenceTransformer
import numpy as np

# Connect to SQLite database
conn = sqlite3.connect("patent_db.sqlite")

# Load data from the 'patents' table (exclude 'processed')
query = "SELECT id, title, abstract, patent_number, publication_date, inventors, assignees FROM patents"
df = pd.read_sql_query(query, conn)

# Combine title and abstract for embeddings
df["title"] = df["title"].fillna("")
df["abstract"] = df["abstract"].fillna("")
df["text"] = df["title"] + ". " + df["abstract"]

# Load SentenceTransformer model
model = SentenceTransformer("all-MiniLM-L6-v2")

# Generate sentence embeddings
texts = df["text"].tolist()
embeddings = model.encode(texts, show_progress_bar=True)

# Save outputs
np.save("embeddings.npy", embeddings)
df.to_csv("patent_data.csv", index=False)

# Close connection
conn.close()

print("âœ… Embeddings and CSV file saved.")


In [None]:
!streamlit run app.py