In [1]:
# Uncommnet and install the required library if running on Kaggle or Google colab
# ! pip install -U langchain-community
# ! pip install chromadb

In [2]:
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.docstore.document import Document
from langchain.embeddings import HuggingFaceEmbeddings
import pickle

from langchain.document_loaders import TextLoader




In [4]:
from huggingface_hub import login
login("")  # <- Enter your hugging face token here

In [5]:
embedding_model = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5")

  embedding_model = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5")


In [8]:
# Step 2: Load your text (adjust the path as needed)
loader = TextLoader("./data/qcnotes_revised.txt", encoding='utf-8')
raw_docs = loader.load()

In [9]:
# --- 2. Chunk Text ---

# Step 3: Split into manageable chunks
splitter = RecursiveCharacterTextSplitter( chunk_size=600,
    chunk_overlap=100,
    separators=["\n\n", "\n", " "])

docs = splitter.split_documents(raw_docs)

print(f" Created {len(docs)} chunks.")

 Created 1012 chunks.


In [10]:
# Step 4: Create the vectorstore using Chroma
vectorstore = Chroma.from_documents(docs, embedding=embedding_model, 
                                    persist_directory="./chroma_bge_768",  collection_name="qnotes_docs")

# Optional: Persist and reload later
vectorstore.persist()

  vectorstore.persist()


In [11]:
import torch
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

In [12]:
# Step 1: Set the model name
model_name = "google/gemma-2b-it"
from transformers import AutoTokenizer, AutoModelForCausalLM

# Step 2: Load tokenizer and fix padding side (Gemma requires left-padding for generation)
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"  # Gemma needs left padding for batched generation

# Step 3: Load model directly onto GPU
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",       # or omit this if issues arise
    device_map=None           # Don't use device_map on CPU

)

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,
    temperature=0.0,      # No randomness
    top_p=1.0,
    do_sample=False       # Disable sampling; forces greedy decoding

)

`config.hidden_act` is ignored, you should use `config.hidden_activation` instead.
Gemma's activation function will be set to `gelu_pytorch_tanh`. Please, use
`config.hidden_activation` if you want to override this behaviour.
See https://github.com/huggingface/transformers/pull/29402 for more details.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cpu


In [13]:
def anwer(user_query, sample_no = 1):

    # Use your vectorstore to get context
    retrieved_docs = vectorstore.similarity_search(user_query, k=5)
    retrieved_context = "\n\n".join([doc.page_content for doc in retrieved_docs])
    

    prompt = f"""
    You are a quantum computing assistant trained on academic lecture notes. 
    
    Answer the question strictly based on the provided context. Do not rely on prior knowledge or assumptions. If the answer is not explicitly present in the context, reply with "I don't know."
    
    **Key instructions:**
    - Use technical terms and examples exactly as presented in the context.
    - When a definition includes a quantum state (like |00⟩ or entangled states), include it explicitly.
    - Avoid vague language (like "a unit vector") if not supported by the context.
    
    If the context contains an explicit quantum state definition (e.g., |00⟩ + |11⟩), that expression must be shown in the answer.

    Context:
    {retrieved_context}
    
    Question:
    {user_query}
    
    Answer (structured and accurate):
    """

    
    # Run with Gemma or Mistral
    output = pipe(prompt)[0]['generated_text']

    answer = output.split("Answer (structured and accurate):")[-1].strip()
    context = output.split("Context:")[-1].strip().split("Question:")[0].strip()
    question = output.split("Question:")[-1].strip().split("Answer (structured and accurate):")[0].strip()
    
    result = {
        "question": question,
        "context": context,
        "answer": answer
    }
    
    print("sample"+str(sample_no))
    print("question: ", result["question"])
    print("answer: ", result["answer"])
    print()

In [14]:
qs = ['What are three reasons to study quantum computers ?',
 'What is the purpose of the Hadamard gate in quantum computing ?',
 'What is the standard form of an EPR-pair ? Also include equation',
 'Who proposed the first efficient quantum algorithm for factoring, and in what year ?',
 'What is the role of the quantum circuit model in computation ?',
 'How can an EPR-pair simulate a public coin toss ?',
 'What is the function of the Toffoli gate, and why is it important ?',
 'What does the Quantum Fourier Transform do in phase estimation ?',
 'What is the difference between the quantum Turing machine and the quantum circuit model ?',
 'Why did Feynman propose the idea of quantum computers ?']

In [15]:
# 
for i, q in enumerate(qs):
    anwer(q, sample_no = i)
    print('-------------------------------------------------------------')



sample0
question:  What are three reasons to study quantum computers ?
answer:  1. The process of miniaturization that has made current classical computers so powerful and cheap, has already reached micro-levels where quantum eﬀects occur.
    2. Making use of quantum eﬀects allows one to speed up certain computations enormously (sometimes exponentially), and even enables some things that are impossible for classical computers.
    3. A quantum computer is like a universal quantum system, and should be able to eﬃciently simulate every eﬃcient quantum process.

-------------------------------------------------------------
sample1
question:  What is the purpose of the Hadamard gate in quantum computing ?
answer:  The purpose of the Hadamard gate in quantum computing is to perform a tensor product of the initial state vector with itself, which results in a superposition of all n-bit strings. This allows quantum algorithms to explore a vast number of different quantum states simultaneously