In [2]:
!pip install transformers datasets torch




In [6]:
!pip install faiss-gpu


Collecting faiss-gpu
  Downloading faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.4 kB)
Downloading faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (85.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.5/85.5 MB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-gpu
Successfully installed faiss-gpu-1.7.2


In [8]:
!pip install gradio

Collecting gradio
  Downloading gradio-5.11.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.6-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.5.3 (from gradio)
  Downloading gradio_client-1.5.3-py3-none-any.whl.metadata (7.1 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.2.2 (from gradio)
  Downloading ruff-0.8.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.meta

In [None]:
import os
import faiss
import numpy as np
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
from sentence_transformers import SentenceTransformer
import json
import gradio as gr

# Step 1: Load and Prepare Embedding Model
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

# Step 2: Load and Prepare Language Model
generation_model_name = "google/flan-t5-large"  # Replace with a compatible Seq2Seq model
tokenizer = AutoTokenizer.from_pretrained(generation_model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(generation_model_name)
generation_pipeline = pipeline("text2text-generation", model=model, tokenizer=tokenizer)

# Step 3: Set up FAISS for Knowledge Retrieval
dimension = embedding_model.get_sentence_embedding_dimension()
index = faiss.IndexFlatL2(dimension)

# Step 4: Load Dataset
data_file = "questions_answers.json"  # Replace with the path to your dataset file
try:
    with open(data_file, "r") as f:
        data = json.load(f)
except FileNotFoundError:
    raise FileNotFoundError(f"Dataset file {data_file} not found. Please check the file path.")

# Prepare corpus and FAISS index
corpus = [item['question'] for item in data]
answers = [item['answer'] for item in data]  # Extract corresponding answers
embeddings = embedding_model.encode(corpus)
index.add(np.array(embeddings))

# Step 5: Define Chat Function
def chat(query):
    # Retrieve relevant knowledge
    query_embedding = embedding_model.encode([query])
    D, I = index.search(query_embedding, k=1)  # Search for the closest match
    if I[0][0] < len(corpus):  # Ensure a valid index is returned
        relevant_question = corpus[I[0][0]]
        relevant_answer = answers[I[0][0]]  # Fetch the corresponding answer
    else:
        relevant_question = "No relevant question found."
        relevant_answer = "I couldn't find a relevant answer."

    # Augment the query with context
    augmented_query = f"Question: {query}\nContext: {relevant_question}\nAnswer: {relevant_answer}"

    # Generate response
    response = generation_pipeline(augmented_query, max_length=200, num_return_sequences=1)[0]['generated_text']

    return f"Query: {query}\nContext: {relevant_question}\nAnswer: {relevant_answer}\nResponse: {response}"

# Step 6: Gradio Interface
interface = gr.Interface(
    fn=chat,
    inputs=gr.Textbox(label="Enter your question:"),
    outputs=gr.Textbox(label="Chatbot Response:")
)

if __name__ == '__main__':
    interface.launch(debug=True)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.13G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Device set to use cpu


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://7ebc8c2bbe9c8b6699.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
