In [1]:
!pip install transformers
!pip install sentence-transformers
!pip install qdrant-client
!pip install PyMuPDF  # for parsing PDFs
!pip install pypdf2   # Alternative for PDF parsing if needed
!pip install langchain


Collecting sentence-transformers
  Downloading sentence_transformers-3.1.0-py3-none-any.whl.metadata (23 kB)
Downloading sentence_transformers-3.1.0-py3-none-any.whl (249 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m249.1/249.1 kB[0m [31m11.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sentence-transformers
Successfully installed sentence-transformers-3.1.0
Collecting qdrant-client
  Downloading qdrant_client-1.11.2-py3-none-any.whl.metadata (10 kB)
Collecting grpcio-tools>=1.41.0 (from qdrant-client)
  Downloading grpcio_tools-1.66.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.3 kB)
Collecting httpx>=0.20.0 (from httpx[http2]>=0.20.0->qdrant-client)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting portalocker<3.0.0,>=2.7.0 (from qdrant-client)
  Downloading portalocker-2.10.1-py3-none-any.whl.metadata (8.5 kB)
Collecting protobuf<6.0dev,>=5.26.1 (from grpcio-tools>=1.41.0->qdrant-client)
 

In [2]:
!pip install -U langchain-community


Collecting langchain-community
  Downloading langchain_community-0.3.0-py3-none-any.whl.metadata (2.8 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.5.2-py3-none-any.whl.metadata (3.5 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.22.0-py3-none-any.whl.metadata (7.2 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting python-dotenv>=0.21.0 (from pydantic-settings<3.0.0,>=2.4.0->langchain-community)
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloa

In [3]:
!pip install  gradio

Collecting gradio
  Downloading gradio-4.44.0-py3-none-any.whl.metadata (15 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0 (from gradio)
  Downloading fastapi-0.114.2-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.3.0 (from gradio)
  Downloading gradio_client-1.3.0-py3-none-any.whl.metadata (7.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.9 (from gradio)
  Downloading python_multipart-0.0.9-py3-none-any.whl.metadata (2.5 kB)
Collecting ruff>=0.2.2 (from gradio)
  Downloading ruff-0.6.5-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting semantic-version~=2.0 (from gradio)
  Downloading semantic_version-2.10.0-py2.py3-none-any.whl.metadata (9.7 kB)
Collecting tomlkit==0.12

In [5]:
import torch
from transformers import pipeline, AutoModelForQuestionAnswering, AutoTokenizer
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient
from qdrant_client.http.models import VectorParams, Distance, PointStruct
from uuid import uuid4

# Initialize Qdrant client (assumes a local Qdrant instance running)
client = QdrantClient(":memory:")  # Use ":memory:" for in-memory storage

# Create the collection with vector parameters
client.recreate_collection(
    collection_name="my_documents",
    vectors_config=VectorParams(size=384, distance=Distance.COSINE)
)

# Load the pre-trained sentence transformer model for multilingual embeddings
embedding_model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')

# Load the pre-trained multilingual QA model from Hugging Face
qa_model_name = "deepset/xlm-roberta-base-squad2"
qa_model = AutoModelForQuestionAnswering.from_pretrained(qa_model_name)
qa_tokenizer = AutoTokenizer.from_pretrained(qa_model_name)
device = 0 if torch.cuda.is_available() else -1
qa_pipeline = pipeline("question-answering", model=qa_model, tokenizer=qa_tokenizer, device=device)

# Function to convert text into documents with pagination
def text_to_docs(text, chars_per_page=2000):
    pages = [text[i:i+chars_per_page] for i in range(0, len(text), chars_per_page)]
    docs = [{"id": str(uuid4()), "page_content": page} for page in pages]
    return docs

# Function to handle user input
def handle_text(text):
    pages = text_to_docs(text)

    # Create embeddings and store them in Qdrant
    client.recreate_collection(
        collection_name="my_documents",
        vectors_config=VectorParams(size=384, distance=Distance.COSINE)
    )

    for page in pages:
        embedding = embedding_model.encode(page['page_content'])
        point = PointStruct(
            id=page['id'],
            vector=embedding.tolist(),
            payload={"page_content": page['page_content']}
        )
        client.upsert(collection_name="my_documents", points=[point])

    return "Text processed and ready for questioning."

# Function to handle user questions
def ask_question(question, text):
    try:
        # Process and store text
        handle_text(text)

        # Generate the embedding for the question
        question_embedding = embedding_model.encode(question).tolist()

        # Search for the most similar document in the collection
        search_result = client.search(
            collection_name="my_documents",
            query_vector=question_embedding,
            limit=1  # Retrieve the top result
        )

        if search_result:
            context = search_result[0].payload['page_content']

            # Use the QA pipeline to answer the question based on the context
            qa_input = {"question": question, "context": context}
            result = qa_pipeline(qa_input)
            return result['answer']
        else:
            return "Sorry, I couldn't find an answer to that question."
    except Exception as e:
        return f"An error occurred: {str(e)}"

# Interactive chat loop
def chat(text, question):
    return ask_question(question, text)

# Run the Gradio interface
import gradio as gr

interface = gr.Interface(
    fn=chat,
    inputs=[
        gr.Textbox(lines=10, placeholder="Enter your text here...", label="Enter Text"),
        gr.Textbox(lines=1, placeholder="Ask a question...", label="Question")
    ],
    outputs="text",
    title="Text-Based Question Answering Bot",
    description="Enter any text paragraph and ask questions about it."
)

interface.launch()


  client.recreate_collection(


sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://6320adb280f8455524.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




In [6]:
!pip install openai

Collecting openai
  Downloading openai-1.45.1-py3-none-any.whl.metadata (22 kB)
Collecting jiter<1,>=0.4.0 (from openai)
  Downloading jiter-0.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)
Downloading openai-1.45.1-py3-none-any.whl (374 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m374.2/374.2 kB[0m [31m14.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading jiter-0.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (318 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m318.9/318.9 kB[0m [31m22.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: jiter, openai
Successfully installed jiter-0.5.0 openai-1.45.1


In [7]:
!pip install gradio



In [8]:
import gradio as gr
import openai

# Set your OpenAI API key here
openai.api_key = 'sk-41qQFnRM-0d-uUGy0akjHKCnYfBZr1Txx27DvgZg3qT3BlbkFJZTCDfsH4Tyn1x5E-zRtl0LH1gIUdH2pyf-TEOhitMA'

def get_answer(question):
    try:
        # Query the OpenAI API using the updated interface
        response = openai.ChatCompletion.create(
            model="gpt-3.5",  # or "gpt-4" if you prefer
            messages=[
                {"role": "user", "content": question}
            ]
        )
        answer = response.choices[0].message['content'].strip()
        return answer
    except Exception as e:
        return f"An error occurred: {str(e)}"

# Define the Gradio interface
def chat(question):
    return get_answer(question)

interface = gr.Interface(
    fn=chat,
    inputs=gr.Textbox(lines=2, placeholder="Ask any question here...", label="Question"),
    outputs="text",
    title="General Knowledge Chatbot",
    description="Ask questions on a wide range of topics and get detailed answers."
)

interface.launch()


Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://57de855f17e4697dae.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




In [9]:
import torch
print(torch.cuda.is_available())  # Should return True if GPU is active


True


In [10]:
from transformers import GPTNeoForCausalLM  # Correct model for GPT-Neo


In [13]:
import gradio as gr
from transformers import pipeline

# Load a language model from Hugging Face
generator = pipeline("text-generation", model="EleutherAI/gpt-neo-2.7B")

def generate_response(prompt):
    try:
        # Generate a response using the GPT-Neo model
        response = generator(prompt, max_length=150, do_sample=True, temperature=0.7, top_p=0.9)
        return response[0]["generated_text"].strip()
    except Exception as e:
        return f"An error occurred: {str(e)}"

# Define the Gradio interface
interface = gr.Interface(
    fn=generate_response,
    inputs=gr.Textbox(lines=2, placeholder="Ask any question here...", label="Question"),
    outputs="text",
    title="GPT-Neo Chatbot",
    description="Ask questions and get detailed responses from GPT-Neo."
)

# Launch the Gradio interface with public sharing enabled
interface.launch(share=True)


Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://0b6bb8886791880444.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


