In [1]:
import pennylane as qml
from pennylane import numpy as np
import pandas as pd
import joblib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import normalize
 
# ---- Output paths ----
TFIDF_MODEL_PATH = "quantum_policy_vectorizer.pkl"
QUANTUM_MATRIX_PATH = "quantum_policy_tfidf_matrix.pkl"
 
# ---- Step 1: Load datasets ----
train_df = pd.read_csv("train_policies.csv")
full_df = pd.read_csv("education_policies.csv")
 
# ---- Step 2: Preprocess data ----
def preprocess(df):
    df = df.copy()
    df["text_for_nlp"] = (
        df["title"].astype(str) + ". " +
        df["full_text"].astype(str) + ". Stakeholders: " +
        df["stakeholders"].astype(str)
    ).str.lower()
    return df

train_df = preprocess(train_df)
full_df = preprocess(full_df)

# ---- Step 3: Classical TF-IDF ----
# Use small max_features for quantum demonstration (e.g., 8 qubits)
vectorizer = TfidfVectorizer(max_features=8, ngram_range=(1, 1))
vectorizer.fit(train_df["text_for_nlp"])
tfidf_matrix = vectorizer.transform(full_df["text_for_nlp"]).toarray()
tfidf_matrix = normalize(tfidf_matrix, norm="l2")  # Normalize for quantum embedding
 
# ---- Step 4: Quantum device setup ----
n_qubits = tfidf_matrix.shape[1]
dev = qml.device("default.qubit", wires=n_qubits)
 
# ---- Step 5: Define quantum feature encoder ----
@qml.qnode(dev)
def quantum_encoder(x):
    # Encode classical TF-IDF vector into qubit rotations
    qml.templates.AngleEmbedding(x, wires=range(n_qubits))
    qml.templates.BasicEntanglerLayers(weights=np.ones((1, n_qubits)), wires=range(n_qubits))
    return [qml.expval(qml.PauliZ(i)) for i in range(n_qubits)]
    
# ---- Step 6: Encode all samples ----
quantum_features = np.array([quantum_encoder(x) for x in tfidf_matrix])
 
# ---- Step 7: Save both classical and quantum components ----
# Save TF-IDF vectorizer
joblib.dump(vectorizer, TFIDF_MODEL_PATH)

# Save quantum features and metadata
joblib.dump({
    "quantum_features": quantum_features,
    "tfidf_matrix": tfidf_matrix,
    "df": full_df
}, QUANTUM_MATRIX_PATH)

print(f"✅ Quantum TF-IDF model saved to '{TFIDF_MODEL_PATH}'")
print(f"✅ Quantum feature matrix saved to '{QUANTUM_MATRIX_PATH}'")
 
import textwrap
import joblib
import numpy as np
import pennylane as qml
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import normalize
from sklearn.metrics.pairwise import cosine_similarity
 
# --------------------------------------------------
# Load quantum TF-IDF model and quantum embeddings
# --------------------------------------------------

TFIDF_MODEL_PATH = "quantum_policy_vectorizer.pkl"
QUANTUM_MATRIX_PATH = "quantum_policy_tfidf_matrix.pkl"
 
vectorizer = joblib.load(TFIDF_MODEL_PATH)
data = joblib.load(QUANTUM_MATRIX_PATH)
 
quantum_features = data["quantum_features"]
tfidf_matrix = data["tfidf_matrix"]

df = data["df"]

# --------------------------------------------------
# Quantum device setup
# --------------------------------------------------

n_qubits = tfidf_matrix.shape[1]
dev = qml.device("default.qubit", wires=n_qubits)
@qml.qnode(dev)

def quantum_encoder(x):
    """Quantum feature map: encodes a classical TF-IDF vector into qubits"""
    qml.templates.AngleEmbedding(x, wires=range(n_qubits))
    qml.templates.BasicEntanglerLayers(weights=np.ones((1, n_qubits)), wires=range(n_qubits))
    return [qml.expval(qml.PauliZ(i)) for i in range(n_qubits)]
 
# --------------------------------------------------
# Quantum search function
# --------------------------------------------------

def answer_query(query, top_k=3):
    """Find top-k policies most similar to the query (quantum TF-IDF encoding)."""
    # Step 1: Classical TF-IDF of query
    query_vec = vectorizer.transform([query.lower()]).toarray()
    query_vec = normalize(query_vec, norm="l2")
    # Step 2: Quantum encode the query
    query_quantum = np.array(quantum_encoder(query_vec[0]))
    # Step 3: Compute similarity with stored quantum features
    sims = cosine_similarity([query_quantum], quantum_features).flatten()
    top_idx = sims.argsort()[::-1][:top_k]

    # Step 4: Display results
    print(f"\n🔎 Quantum Query: {query}")
    for idx in top_idx:
        row = df.iloc[idx]
        snippet = textwrap.shorten(row["full_text"], width=250, placeholder="...")
        print(f"\n📌 {row['title']} ({row['policy_id']}) | Quantum Similarity = {sims[idx]:.3f}")
        print(f"Region: {row['region']} | Year: {row['year']} | Status: {row['status']}")
        print(f"Summary: {snippet}")

# --------------------------------------------------
# Example query
# --------------------------------------------------
answer_query("teacher training and capacity building initiatives", top_k=3)

ModuleNotFoundError: No module named 'pennylane'

In [2]:
!pip install pennylane

Collecting pennylane
  Downloading pennylane-0.42.3-py3-none-any.whl.metadata (11 kB)
Collecting networkx (from pennylane)
  Downloading networkx-3.4.2-py3-none-any.whl.metadata (6.3 kB)
Collecting autograd (from pennylane)
  Downloading autograd-1.8.0-py3-none-any.whl.metadata (7.5 kB)
Collecting appdirs (from pennylane)
  Downloading appdirs-1.4.4-py2.py3-none-any.whl.metadata (9.0 kB)
Collecting autoray<0.8,>=0.6.11 (from pennylane)
  Downloading autoray-0.7.2-py3-none-any.whl.metadata (5.8 kB)
Collecting cachetools (from pennylane)
  Downloading cachetools-6.2.1-py3-none-any.whl.metadata (5.5 kB)
Collecting pennylane-lightning>=0.42 (from pennylane)
  Downloading pennylane_lightning-0.42.0-cp310-cp310-win_amd64.whl.metadata (11 kB)
Collecting tomlkit (from pennylane)
  Downloading tomlkit-0.13.3-py3-none-any.whl.metadata (2.8 kB)
Collecting diastatic-malt (from pennylane)
  Downloading diastatic_malt-2.15.2-py3-none-any.whl.metadata (2.6 kB)
Collecting scipy-openblas32>=0.3.26 (fro