<a href="https://colab.research.google.com/github/lamld203844/chat-any/blob/main/chat_any.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# System flow

In [None]:
!pip install -q langchain
!pip install -q langchain-community
!pip install -q llama-index

!pip install -q llama-index-embeddings-langchain
!pip install -q sentence-transformers

!pip install -q llama-index-llms-gemini
!pip install -q google-generativeai
!pip install -q llama-index-readers-web

In [2]:
import nest_asyncio

# allows nested access to the event loop
nest_asyncio.apply()

## Load file
- load website

In [3]:

# -------------------------------------------
# Load data from a website via Llamaindex Loader
#
# -------------------------------------------

from llama_index.core import SummaryIndex
from llama_index.readers.web import SimpleWebPageReader
from IPython.display import Markdown, display
import os

loader = SimpleWebPageReader(html_to_text=True)
docs = loader.load_data(["https://cinnamon.is/en/"])


In [6]:
docs

[Document(id_='https://cinnamon.is/en/', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='![cinnamon](https://cinnamon.is/en/wp-\ncontent/themes/Cinnamon-2017-en/images/logo.png)\n\n  * [English](https://cinnamon.is/en/ "English")[日本語](https://cinnamon.is/ "日本語")[Tiếng Việt](https://cinnamon.is/vi/ "Tiếng Việt")[繁體中文](https://cinnamon.is/tw/ "繁體中文")\n\n  * HOME\n  * PRODUCT\n  * NEWS\n  * [COMPANY](https://cinnamon.is/en/company/)\n  * [RECRUITING](https://cinnamon.is/en/recruiting/)\n  * CONTACT\n\n  * HOME\n  * PRODUCT\n  * NEWS\n  * [COMPANY](https://cinnamon.is/en/company/)\n  * [RECRUITING](https://cinnamon.is/en/recruiting/)\n  * CONTACT\n\n  * [English](https://cinnamon.is/en/ "English")[日本語](https://cinnamon.is/ "日本語")[Tiếng Việt](https://cinnamon.is/vi/ "Tiếng Việt")[繁體中文](https://cinnamon.is/tw/ "繁體中文")\n\n# Extend human potential with AI\n\n## At Cinnamon we are working to make a world  \nwhere human creativ

## Chunking and creating embeddings model

In [4]:

# --------------------------------------------
# Chunking and create embeddings
# Automatic via llama index VectorStoreIndex
# --------------------------------------------
from torch import cuda
from llama_index.core import Settings
from llama_index.core import VectorStoreIndex
from llama_index.embeddings.langchain import LangchainEmbedding
from langchain_community.embeddings import HuggingFaceBgeEmbeddings

def load_embedding_model(
    model_name: str = "BAAI/bge-large-en-v1.5",
    device: str = "cuda" if cuda.is_available() else "cpu"
) -> HuggingFaceBgeEmbeddings:
    model_kwargs = {"device": device}
    encode_kwargs = {
        "normalize_embeddings": True
    }  # set True to compute cosine similarity
    embedding_model = HuggingFaceBgeEmbeddings(
        model_name=model_name,
        model_kwargs=model_kwargs,
        encode_kwargs=encode_kwargs,
    )
    return embedding_model

# setting up the embedding model
lc_embedding_model = load_embedding_model()
embed_model = LangchainEmbedding(lc_embedding_model)

### Sanity check embedding model
embedding = lc_embedding_model.embed_query('Hello, world')
embedding = embedding[:10]
print(f'Embedding: {embedding}')

  from .autonotebook import tqdm as notebook_tqdm


Embedding: [0.057074934244155884, 0.002089126966893673, 0.031196145340800285, -0.02522592805325985, 0.011089143343269825, -0.014548566192388535, 0.032433126121759415, 0.02435421757400036, 0.012902944348752499, 0.04747822508215904]


In [5]:
# ====== Create vector store and upload data ======
Settings.embed_model = lc_embedding_model
index = VectorStoreIndex.from_documents(docs, show_progress=True)
# TODO try async index creation for faster emebdding generation & persist it to memory!
# index = VectorStoreIndex(docs, use_async=True)

Parsing nodes: 100%|██████████| 1/1 [00:00<00:00, 53.94it/s]
Generating embeddings: 100%|██████████| 2/2 [00:16<00:00,  8.13s/it]


## Load llm

In [9]:
# --------------------------
# Set up search query engine
# --------------------------

# setting up the llm
from dotenv import load_dotenv
load_dotenv()
google_api_key = os.environ['GOOGLE_API_KEY']

llm = Gemini(model_name="models/gemini-pro", api_key=google_api_key)

# Sanity check llm
resp = llm.complete("Hello, world")
print(resp)

Hello, world!


## Set up query engine

In [10]:

# ====== Setup a query engine ======
Settings.llm = llm
query_engine = index.as_query_engine(similarity_top_k=4)

# ---------------------------------------
# Customise prompt template + augmenting
# ---------------------------------------

from llama_index.core import PromptTemplate

qa_prompt_tmpl_str = (
  "You are a friendly and supportive assistant for question answering information from website"
  "Answer the question using the following information delimited by triple brackque, in case you don't know the answer say 'I don't know!':\n\n"
  "```\n{context_str}\n```"
  "Query: {query_str}\n"
  "\nYou can format our put as you want but try to give the answer which is the best match with the context information above."
  "\nDon't say based on information provided or something like that"
)

qa_prompt_tmpl = PromptTemplate(qa_prompt_tmpl_str)

query_engine.update_prompts(
    {"response_synthesizer:text_qa_template": qa_prompt_tmpl}
)

In [11]:
# ---------- Chatting -----------
from IPython.display import Markdown, display
response = query_engine.query('What is this website about?')
display(Markdown(str(response)))

This website is about Cinnamon, a company that provides Deep Learning backed AI products. Cinnamon's mission is to extend human potential for more productive and creative work by utilizing the power of Artificial Intelligence.

# App

In [24]:
from torch import cuda
cuda.is_available()

False

In [None]:
import nest_asyncio

# allows nested access to the event loop
nest_asyncio.apply()


# -------------------------------------------
# Load data from a website via Llamaindex Loader
#
# -------------------------------------------

from llama_index.core import SummaryIndex
from llama_index.readers.web import SimpleWebPageReader
from IPython.display import Markdown, display
import os

url = 'https://cinnamon.is/en/news/cinnamon-ai-artificial-intelligence-startup-issues-shares-to-dai-ichi-life-insurance-through-third-party-allotment/'
docs = SimpleWebPageReader(html_to_text=True).load_data(
    [url]
)

# --------------------------------------------
# Chunking and create embeddings
# Automatic via llama index VectorStoreIndex
# --------------------------------------------
from torch import cuda
from llama_index.core import Settings
from llama_index.core import VectorStoreIndex
from llama_index.embeddings.langchain import LangchainEmbedding
from langchain_community.embeddings import HuggingFaceBgeEmbeddings

def load_embedding_model(
    model_name: str = "BAAI/bge-large-en-v1.5",
    device: str = "cuda" if cuda.is_available() else "cpu"
) -> HuggingFaceBgeEmbeddings:
    model_kwargs = {"device": device}
    encode_kwargs = {
        "normalize_embeddings": True
    }  # set True to compute cosine similarity
    embedding_model = HuggingFaceBgeEmbeddings(
        model_name=model_name,
        model_kwargs=model_kwargs,
        encode_kwargs=encode_kwargs,
    )
    return embedding_model

# setting up the embedding model
lc_embedding_model = load_embedding_model()
embed_model = LangchainEmbedding(lc_embedding_model)

# ### Sanity check embedding model
# embedding = lc_embedding_model.embed_query('Hello, world')
# embedding = embedding[:10]
# print(f'Embedding: {embedding}')

# ====== Create vector store and upload data ======
Settings.embed_model = embed_model
index = VectorStoreIndex.from_documents(docs, show_progress=True)
# TODO try async index creation for faster emebdding generation & persist it to memory!
# index = VectorStoreIndex(docs, use_async=True)

# --------------------------
# Set up search query engine
# --------------------------

# setting up the llm
google_api_key = 'AIzaSyDkjiYKPW2P2knSwHelfHsQBrn46n0RHSg'
llm = Gemini(model_name="models/gemini-pro", api_key=google_api_key)

# # Sanity check llm
# resp = llm.complete("Hello, world")
# print(resp)

# ====== Setup a query engine ======
Settings.llm = llm
query_engine = index.as_query_engine(similarity_top_k=4)

# ---------------------------------------
# Customise prompt template + augmenting
# ---------------------------------------

from llama_index.core import PromptTemplate

qa_prompt_tmpl_str = (
  "You are a friendly and supportive assistant for question answering information from website"
  "Answer the question using the following information delimited by triple brackque, incase case you don't know the answer say 'I don't know!':\n\n"
  "```\n{context_str}\n```"
  "Query: {query_str}\n"
  "\nDon't say based on information provided or something like that"
)

qa_prompt_tmpl = PromptTemplate(qa_prompt_tmpl_str)

query_engine.update_prompts(
    {"response_synthesizer:text_qa_template": qa_prompt_tmpl}
)

In [19]:
import re 

def validate_website_url(url):

    url_pattern = re.compile(
        r'http[s]?://'  # http:// or https://
        r'(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|'  # domain...
        r'(?:%[0-9a-fA-F][0-9a-fA-F]))+'  # ...or percent-encoded characters
        r'(?:\:[0-9]{1,5})?'  # optional port number
        r'(?:/[a-zA-Z0-9$-_@.&+!*\\(\\),=%]*)*'  # path
        r'(?:\?[a-zA-Z0-9$-_@.&+!*\\(\\),=%]*)?'  # query string
        r'(?:#[a-zA-Z0-9$-_@.&+!*\\(\\),=%]*)?'  # fragment
    )
    return bool(url_pattern.match(url))

In [20]:
def setup_query_engine(website_url):
    if validate_website_url(website_url):
        try:
            # -------------------------------------------
            # Load data from a website via Llamaindex Loader
            # -------------------------------------------
            loader = SimpleWebPageReader()
            docs = loader.load_data([website_url])

            # ---- Create vector store and upload data ---
            # Chunking and create embeddings
            # Automatic via llama index VectorStoreIndex
            # --------------------------------------------
            Settings.embed_model = embed_model
            index = VectorStoreIndex.from_documents(docs, show_progress=True)

            # ====== Setup a query engine ======
            Settings.llm = llm
            query_engine = index.as_query_engine(streaming=True, similarity_top_k=4)

            # ====== Customise prompt template ======
            qa_prompt_tmpl_str = (
                "You are a friendly and supportive assistant for question answering information from website.\n"
                "Answer the question using the following information delimited by triple brackque, in case you don't know the answer say 'I don't know!':\n\n"
                "```\n{context_str}\n```"
                "Query: {query_str}\n"
                "\nYou can format our put as you want but try to give the answer which is the best match with the context information above."
                "\nDon't say based on information provided or something like that"
            )
            qa_prompt_tmpl = PromptTemplate(qa_prompt_tmpl_str)

            query_engine.update_prompts(
                {"response_synthesizer:text_qa_template": qa_prompt_tmpl}
            )
            # ======= Complete setting up !!!! ========
            if docs:
                print("Data loaded successfully!!")
                print("Ready to chat!!")
            else:
                print("No data found, check if the repository is not empty!")
            
            return query_engine
        except Exception as e:
                print(f"An error occurred: {e}")
    else:
        print('Invalid github repo, try again!')
        return None

In [21]:
url = 'https://cinnamon.is/en/'
setup_query_engine(url)

Parsing nodes: 100%|██████████| 1/1 [00:00<00:00,  8.17it/s]
Generating embeddings:   0%|          | 0/12 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
# ---------- Chatting -----------
from IPython.display import Markdown, display
response = query_engine.query('summarize this website')
display(Markdown(str(response)))

# GUI with Streamlit


In [1]:
!pip install -q langchain
!pip install -q langchain-community
!pip install -q llama-index

!pip install -q llama-index-embeddings-langchain
!pip install -q sentence-transformers

!pip install -q llama-index-llms-gemini
!pip install -q google-generativeai
!pip install -q llama-index-readers-web

!pip install -q streamlit
!npm install -q localtunnel

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m973.5/973.5 kB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m308.5/308.5 kB[0m [31m12.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m122.8/122.8 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.0/53.0 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m142.5/142.5 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m14.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.3/49.3 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.4/15.4 MB[0m [31m89.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━

In [3]:
%%writefile app.py

import os
import gc

import re # website url validation
import uuid # unique id for each session
import nest_asyncio # allows nested access to the event loop
nest_asyncio.apply()

import streamlit as st
from torch import cuda
# from dotenv import load_dotenv
# load_dotenv() # Load Gemini API


from llama_index.core import Settings
from llama_index.core import PromptTemplate
from llama_index.core import SummaryIndex
from llama_index.readers.web import SimpleWebPageReader
from IPython.display import Markdown, display
from llama_index.core import VectorStoreIndex
from llama_index.embeddings.langchain import LangchainEmbedding
from langchain_community.embeddings import HuggingFaceBgeEmbeddings

from llama_index.llms.gemini import Gemini


# ---------- Init + Helper function ----------------

# os.environ['HF_HOME'] = '\lit-chat_with_code_RAG\weights' # for run embedding model locally

# setting up the embedding model
def load_embedding_model(
    model_name: str = "BAAI/bge-large-en-v1.5",
    device: str = "cuda" if cuda.is_available() else "cpu"
) -> HuggingFaceBgeEmbeddings:
    model_kwargs = {"device": device}
    encode_kwargs = {
        "normalize_embeddings": True
    }  # set True to compute cosine similarity
    embedding_model = HuggingFaceBgeEmbeddings(
        model_name=model_name,
        model_kwargs=model_kwargs,
        encode_kwargs=encode_kwargs,
    )
    return embedding_model

lc_embedding_model = load_embedding_model()
embed_model = LangchainEmbedding(lc_embedding_model)

# setting up session
if "id" not in st.session_state:
    st.session_state.id = uuid.uuid4()
    st.session_state.file_cache = {}

session_id = st.session_state.id
client = None

# setting up the llm
from google.colab import userdata
google_api_key = userdata.get('GOOGLE_API_KEY')
llm = Gemini(model_name="models/gemini-pro", api_key=google_api_key)

# helper func
def reset_chat():
    st.session_state.messages = []
    st.session_state.context = None
    gc.collect() # free up memory

def validate_website_url(url):

    url_pattern = re.compile(
        r'http[s]?://'  # http:// or https://
        r'(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|'  # domain...
        r'(?:%[0-9a-fA-F][0-9a-fA-F]))+'  # ...or percent-encoded characters
        r'(?:\:[0-9]{1,5})?'  # optional port number
        r'(?:/[a-zA-Z0-9$-_@.&+!*\\(\\),=%]*)*'  # path
        r'(?:\?[a-zA-Z0-9$-_@.&+!*\\(\\),=%]*)?'  # query string
        r'(?:#[a-zA-Z0-9$-_@.&+!*\\(\\),=%]*)?'  # fragment
    )
    return bool(url_pattern.match(url))

# ---------- End helper function ----------------

with st.sidebar:
    # Input for URL
    website_url = st.text_input("URL")

    # Button to load and process url
    load_button = st.button("Load")

    message_container = st.empty()  # Placeholder for dynamic messages

    if load_button and website_url:
        if validate_website_url(website_url):
            with st.spinner(f"Loading website..."):
                try:
                    # -------------------------------------------
                    # Load data from a website via Llamaindex Loader
                    # -------------------------------------------
                    loader = SimpleWebPageReader()
                    docs = loader.load_data([website_url])

                    # ---- Create vector store and upload data ---
                    # Chunking and create embeddings
                    # Automatic via llama index VectorStoreIndex
                    # --------------------------------------------
                    Settings.embed_model = embed_model
                    index = VectorStoreIndex.from_documents(docs)

                    # ====== Setup a query engine ======
                    Settings.llm = llm
                    query_engine = index.as_query_engine(streaming=True, similarity_top_k=4)

                    # ====== Customise prompt template ======
                    qa_prompt_tmpl_str = (
                        "You are a friendly and supportive assistant."
                        "Context information is below.\n"
                        "---------------------\n"
                        "{context_str}\n"
                        "---------------------\n"
                        "Try to give the answer that best matches the context information above. In case you don't know the answer, say 'I don't know!'"
                        "Query: {query_str}\n"
                        "Answer: "
                    )
                    qa_prompt_tmpl = PromptTemplate(qa_prompt_tmpl_str)

                    query_engine.update_prompts(
                        {"response_synthesizer:text_qa_template": qa_prompt_tmpl}
                    )
                    # ======= Complete setting up !!!! ========
                    if docs:
                        message_container.success("Data loaded successfully!!")
                    else:
                        message_container.write(
                            "No data found, check if the repository is not empty!"
                        )
                    st.session_state.query_engine = query_engine

                except Exception as e:
                    st.error(f"An error occurred: {e}")
                    st.stop()

                st.success("Ready to Chat!")
        else:
            st.error('Invalid url')
            st.stop()

col1, col2 = st.columns([6, 1])

with col1:
    st.header(f"Chat with any website")

with col2:
    st.button("Clear ↺", on_click=reset_chat)


# Initialize chat history
if "messages" not in st.session_state:
    reset_chat()


# Display chat messages from history on app rerun
for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])


# Accept user input
if prompt := st.chat_input("What's up?"):
    # Add user message to chat history
    st.session_state.messages.append({"role": "user", "content": prompt})
    # Display user message in chat message container
    with st.chat_message("user"):
        st.markdown(prompt)

    # Display assistant response in chat message container
    with st.chat_message("assistant"):
        message_placeholder = st.empty()
        full_response = ""

        query_engine = st.session_state.query_engine

        # Simulate stream of response with milliseconds delay
        streaming_response = query_engine.query(prompt)

        for chunk in streaming_response.response_gen:
            full_response += chunk
            message_placeholder.markdown(full_response + "▌")

        message_placeholder.markdown(full_response)

    # Add assistant response to chat history
    st.session_state.messages.append({"role": "assistant", "content": full_response})

Writing app.py


In [None]:
!streamlit run app.py &>/content/logs.txt & npx localtunnel --port 8501 & curl ipv4.icanhazip.com

34.90.31.54
[K[?25hnpx: installed 22 in 1.948s
