# a. BASIC Q&A SYSTEM USING KEYWORD MATCHING

In [None]:
# Create qa_dataset.json
qa_data = {
    "what is artificial intelligence": "Artificial Intelligence is the simulation of human intelligence processes by machines.",
    "how does machine learning work": "Machine Learning is a subset of AI that allows systems to learn and improve from experience without being explicitly programmed.",
    "what are applications of nlp": "Natural Language Processing is used in chatbots, translation services, sentiment analysis, and more.",
    "what is deep learning": "Deep Learning is a subset of machine learning using neural networks with many layers.",
    "what is supervised learning": "Supervised learning uses labeled data to train models to predict outcomes.",
    "what is reinforcement learning": "Reinforcement learning is an area of machine learning where agents learn to make decisions by trial and error."
}

import json
with open('qa_dataset.json', 'w') as f:
    json.dump(qa_data, f, indent=4)
print("Dataset saved as qa_dataset.json")


Dataset saved as qa_dataset.json


In [None]:
import json

# Load dataset
with open('qa_dataset.json', 'r') as f:
    qa_data = json.load(f)

def find_answer(question, qa_data):
    question_tokens = question.lower().split()
    for q, a in qa_data.items():
        if all(keyword in q.lower() for keyword in question_tokens):
            return a
    return "Sorry, I don't know the answer to that question."

def main():
    print("Ask your questions! (type 'exit' or 'quit' to stop)")
    while True:
        user_question = input("You: ")
        if user_question.lower() in ['exit', 'quit']:
            print("Goodbye!")
            break
        answer = find_answer(user_question, qa_data)
        print("Answer:", answer)

if __name__ == "__main__":
    main()


Ask your questions! (type 'exit' or 'quit' to stop)
You: what is supervised learning
Answer: Supervised learning uses labeled data to train models to predict outcomes.
You: exit
Goodbye!


# b. BUILDING A Q&A SYSTEM WITH BERT

In [None]:
# Install required packages
!pip install transformers torch



In [None]:


# Import libraries
from transformers import pipeline

# Load a pre-trained BERT Q&A pipeline
qa_pipeline = pipeline(
    "question-answering",
    model="bert-large-uncased-whole-word-masking-finetuned-squad",
    tokenizer="bert-large-uncased-whole-word-masking-finetuned-squad"
)

# Sample Input
context = "BERT is a pre-trained transformer model for natural language understanding."
question = "What is BERT?"

# Run Inference
result = qa_pipeline(question=question, context=context)

# Output
print("Question:", question)
print("Answer:", result['answer'])
print("Score:", result['score'])


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/443 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-large-uncased-whole-word-masking-finetuned-squad were not used when initializing BertForQuestionAnswering: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Fetching 0 files: 0it [00:00, ?it/s]

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

Fetching 0 files: 0it [00:00, ?it/s]

Device set to use cpu


Question: What is BERT?
Answer: a pre-trained transformer model for natural language understanding
Score: 0.4890269935131073


# c. QUESTION ANSWERING ON SQUAD DATASET USING TRANSFORMERS

In [None]:
import os
os.environ["WANDB_DISABLED"] = "true"

# Install dependencies
!pip install transformers torch gradio --quiet

from transformers import pipeline
import gradio as gr

# Load pre-trained QA pipeline
qa = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")

# Function for QA
def answer_question(context, question):
    result = qa(question=question, context=context)
    return result["answer"]

# Gradio Interface
iface = gr.Interface(
    fn=answer_question,
    inputs=[
        gr.Textbox(label="Context", placeholder="Enter the context paragraph here..."),
        gr.Textbox(label="Question", placeholder="Enter your question here...")
    ],
    outputs=gr.Textbox(label="Answer"),
    title="Question Answering with DistilBERT",
    description="Enter a context and a question, and the model will return the answer."
)

iface.launch()


config.json:   0%|          | 0.00/473 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/261M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

Fetching 0 files: 0it [00:00, ?it/s]

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

Fetching 0 files: 0it [00:00, ?it/s]

Device set to use cpu


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://699a648e0bc9e59394.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


