In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
import json
import os
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
import faiss

In [None]:
# Load your scraped JSON
with open("davv_scraped_data.json", "r", encoding="utf-8") as f:
    raw_data = json.load(f)

# Initialize the chunker
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=100,
    separators=["\n\n", "\n", ".", " "]
)

# Create chunked data
chunked_data = []

for doc in raw_data:
    chunks = text_splitter.split_text(doc["content"])
    for chunk in chunks:
        chunked_data.append({
            "url": doc["url"],
            "chunk": chunk
        })

# Save chunked output
with open("davv_chunked_data.json", "w", encoding="utf-8") as f:
    json.dump(chunked_data, f, indent=2, ensure_ascii=False)

In [None]:
# Load chunked data
with open("davv_chunked_data.json", "r", encoding="utf-8") as f:
    raw_data = json.load(f)

# Filter valid text chunks
documents = [entry["chunk"].strip() for entry in raw_data if entry.get("chunk") and entry["chunk"].strip()]
print(f"✅ Total valid chunks found: {len(documents)}")

if not documents:
    raise ValueError("❌ No valid chunks found to embed!")

# Load sentence transformer model
print("🚀 Loading embedding model...")
model = SentenceTransformer("all-MiniLM-L6-v2")

# Generate embeddings
print("📍 Creating embeddings...")
embeddings = model.encode(documents, show_progress_bar=True, convert_to_numpy=True)

# Create FAISS index
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

# Save vector index + metadata
faiss.write_index(index, "davv_index.faiss")

# Save metadata
with open("davv_metadata.json", "w", encoding="utf-8") as f:
    json.dump(documents, f, ensure_ascii=False, indent=2)

print("✅ FAISS index and metadata saved successfully.")


  from .autonotebook import tqdm as notebook_tqdm


✅ Total valid chunks found: 4463
🚀 Loading embedding model...
📍 Creating embeddings...


Batches: 100%|██████████| 140/140 [01:51<00:00,  1.25it/s]


✅ FAISS index and metadata saved successfully.


In [None]:
# Load model and index
print("🔍 Loading model and index...")
model = SentenceTransformer("all-MiniLM-L6-v2")
index = faiss.read_index("davv_index.faiss")

# Load metadata (text chunks)
with open("davv_metadata.json", "r", encoding="utf-8") as f:
    chunks = json.load(f)

# Function to query
def search_query(user_query, top_k=3):
    print(f"📨 User query: {user_query}")
    query_embedding = model.encode([user_query], convert_to_numpy=True)
    distances, indices = index.search(query_embedding, top_k)

    results = []
    for idx in indices[0]:
        if idx < len(chunks):
            results.append(chunks[idx])
    return results

# Example 
if __name__ == "__main__":
    while True:
        query = input("\nAsk DAVV Assistant something (or type 'exit'): ")
        if query.lower() == "exit":
            break
        results = search_query(query)
        print("\n🔎 Top Matches:\n")
        for i, res in enumerate(results, 1):
            print(f"{i}. {res}\n")


  from .autonotebook import tqdm as notebook_tqdm


🔍 Loading model and index...
📨 User query: full form of DAVV

🔎 Top Matches:

1. Latest at DAVV NEW Admission Notice : Advertisement NON CET 2024 03-06-2022 View Answer Books List for view answer book on 24.03.2022 Notice for Observation of Medical Courses Answer Sheet Dated From 16, Nov. to 17, Nov. - 2021 Important Links Result Upload Schedule MP Online Activities Exam Notices College Notices

2. Latest at DAVV NEW Admission Notice : Advertisement NON CET 2024 03-06-2022 View Answer Books List for view answer book on 24.03.2022 Notice for Observation of Medical Courses Answer Sheet Dated From 16, Nov. to 17, Nov. - 2021 Important Links Result Upload Schedule MP Online Activities Exam Notices College Notices

3. . It was established in 1964, by an Act of Legislature of Madhya Pradesh. DAVV is an affiliating State University whose jurisdiction includes seven tribal dominated districts of Indore division It is catering to the educational needs on one hand to the most industrially develo

In [None]:
from langchain_groq import ChatGroq

llm = ChatGroq(
    groq_api_key="groq_api_key",
    model_name="llama-3.3-70b-specdec"
)

response = llm.invoke("What is quantum computing?")
print(response.content)

Quantum computing is a revolutionary technology that uses the principles of quantum mechanics to perform calculations and operations on data. It's a new paradigm for computing that has the potential to solve complex problems that are currently unsolvable or require an unfeasible amount of time to solve using traditional computers.

**Classical vs. Quantum Computing**

Classical computers use bits, which are either 0 or 1, to process information. These bits are used to perform calculations and operations, but they are limited by their binary nature. Quantum computers, on the other hand, use quantum bits or qubits, which can exist in multiple states simultaneously, known as superposition. This means that a qubit can represent both 0 and 1 at the same time, allowing for much faster and more efficient processing of certain types of calculations.

**Key principles of quantum computing:**

1. **Superposition**: Qubits can exist in multiple states simultaneously, allowing for the exploration 