# File QA RAG Chatbot App with ChatGPT, and Chainlit

Here I will implement an advanced RAG System with ChatGPT, LangChain and Chainlit to build a File QA UI-based chatbot with the following features:

- PDF Document Upload and Indexing
- RAG System for query analysis and response
- Result streaming capabilities (Real-time output)
- Show document sources of the answer from RAG system

## Install App and LLM dependencies

In [None]:
!pip install chainlit pillow openai
import google.generativeai as genai



In [None]:
!pip install PyMuPDF==1.24.0
!pip install chromadb==0.4.24
!pip install pyngrok==7.1.5



## Load OpenAI API Credentials

Here we load it from a file so we don't explore the credentials on the internet by mistake

In [None]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"

In [None]:
import yaml

with open('gemini1.yml', 'r') as file:
    api_creds = yaml.safe_load(file)

In [None]:
api_creds.keys()

dict_keys(['gemini_api_key'])

In [None]:
import os

os.environ['GEMINI_API_KEY'] = api_creds['gemini_api_key']

## Write the app code here and store it in a py file

In [None]:
%%writefile app.py
import google.generativeai as genai
from operator import itemgetter
import chainlit as cl
import tempfile
import os
from PIL import Image
import io
import chainlit as cl
import pandas as pd
# Takes uploaded PDFs, creates document chunks, computes embeddings
# Stores document chunks and embeddings in a Vector DB
# Returns a retriever which can look up the Vector DB
# to return documents based on user input
model = genai.GenerativeModel("gemini-1.5-flash")
genai.configure(api_key=os.environ.get('GEMINI_API_KEY'))

@cl.on_chat_start
# this function is called when the app starts for the first time

#model = genai.GenerativeModel("gemini-1.5-flash")

async def when_chat_starts():
  uploaded_files = None
  # Wait for the user to upload a file
  while uploaded_files is None:
    uploaded_files = await cl.AskFileMessage(
        content="Please upload images to continue.",
        accept=["image/*"],
        max_size_mb=20, max_files=5,
        timeout=180).send()



  msg = cl.Message(content="Processing files, please wait...")
  await msg.send()
  await cl.sleep(2)

  temp_dir = "temp_uploads"
  os.makedirs(temp_dir, exist_ok=True)

    # Iterate through the uploaded files
  for uploaded_file in uploaded_files:
    # Access the file path directly from the uploaded file
    file_path = uploaded_file.path
    image = cl.Image(path=file_path, name=uploaded_file.name, display="inline")
    await cl.Message(content=f"Here is your uploaded image: {uploaded_file.name}",elements=[image]).send()
    cl.user_session.set("uploaded_files", uploaded_files)



def ask_questions_with_history(image_file, question, history):
  myfile = genai.upload_file(image_file)
  result = model.generate_content([myfile, "\n\n", question]
                                  )
  history.append({
      'question': question,
      'answer': result.text

    })
  return result.text


def describe_image(image_path):
  image = Image.open(image_path)
  width, height = image.size
  return f"The image has a width of {width} pixels and height of {height} pixels."


# ChainLit interface setup
@cl.on_message

# this function is called whenever the user sends a prompt message in the app


async def on_user_message(message: cl.Message):
    # Assume uploaded_files is a list of files uploaded by the user

    # Placeholder for question history
    history = []
    uploaded_files = cl.user_session.get("uploaded_files")

    # Loop through uploaded files
    for uploaded_file in uploaded_files:
        # Access the file path directly from the uploaded file
        file_path = uploaded_file.path
        image_file = cl.Image(path=file_path, name=uploaded_file.name, display="inline")
        question_text = message.content


        # Send a message with the uploaded image

        # Assuming 'message' has the user's question


        # Call the function to ask questions with the image
        answer = ask_questions_with_history(file_path, question_text, history)


        # Send the answer back to the user
        await cl.Message(content=f"Answer: {answer}").send()

Writing app.py


## Start the app

In [None]:
!chainlit run app.py --port=8989 --watch &>./logs.txt &

## Change the Initial app screen

In [None]:
%%writefile chainlit.md

# Welcome to File QA RAG Chatbot 🤖

Please ask your question?

Writing chainlit.md


In [None]:
from pyngrok import ngrok
import yaml
from google.colab import userdata
# Terminate open tunnels if exist
ngrok.kill()

# Setting the authtoken
# Get your authtoken from `ngrok_credentials.yml` file
ngrok_key=userdata.get('ngrok_key')
ngrok.set_auth_token(ngrok_key)

# Open an HTTPs tunnel on port XXXX which you get from your `logs.txt` file
ngrok_tunnel = ngrok.connect(8989)
print("Chainlit App:", ngrok_tunnel.public_url)

Chainlit App: https://2ef3-35-204-36-126.ngrok-free.app


## Remove running app processes

In [None]:
ngrok.kill()

In [None]:
!ps -ef | grep app

root           6       1  0 06:10 ?        00:00:14 /tools/node/bin/node /datalab/web/app.js
root       23149     334  0 07:46 ?        00:00:00 /bin/bash -c ps -ef | grep app
root       23151   23149  0 07:46 ?        00:00:00 grep app


In [None]:
!sudo kill -9 11975

kill: (11975): No such process


In [None]:
!sudo kill -9 47452


In [None]:
!sudo lsof -i :8989


COMMAND    PID USER   FD   TYPE  DEVICE SIZE/OFF NODE NAME
chainlit 47452 root   11u  IPv4 1278332      0t0  TCP localhost:8989 (LISTEN)
chainlit 47452 root   18u  IPv4 1291787      0t0  TCP localhost:8989->localhost:57728 (ESTABLISHED)
ngrok    47911 root   11u  IPv4 1291786      0t0  TCP localhost:57728->localhost:8989 (ESTABLISHED)


In [None]:
import google.generativeai as genai
import base64
import os
# Configure the API key
genai.configure(api_key=os.environ.get('GEMINI_API_KEY'))

# Function to encode the image in base64 format
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")

# Example image path and encoding it
image_path = "/content/Cat image.png"
encoded_image = encode_image(image_path)
models = genai.GenerativeModel("gemini-1.5-flash")


# Example question
question = "What is happening in this image?"

myfile = genai.upload_file("/content/Cat image.png")
print(f"{myfile=}")

models = genai.GenerativeModel("gemini-1.5-flash")
response = models.generate_content(
    [myfile, "\n\n", question]
)

# Print the response from the model
print(response.text)


myfile=genai.File({
    'name': 'files/8weik9za5fgu',
    'display_name': 'Cat image.png',
    'mime_type': 'image/png',
    'sha256_hash': 'MzM3MWEwMjY5ZTZmNTQzMWIwNmJjN2M2ZDMzZGZkNWU3YzYwYTc1M2I2MTE4OThiNTZlNDBkMjc4MTQyNzY4ZA==',
    'size_bytes': '1267755',
    'state': 'ACTIVE',
    'uri': 'https://generativelanguage.googleapis.com/v1beta/files/8weik9za5fgu',
    'create_time': '2024-10-01T16:01:58.547227Z',
    'expiration_time': '2024-10-03T16:01:58.483716357Z',
    'update_time': '2024-10-01T16:01:58.547227Z'})
A cat is wearing a pink jacket and sunglasses. The cat is illuminated with pink and blue light. The background is blurry and out of focus. The cat is looking towards the right side of the frame.
