<a href="https://colab.research.google.com/github/tejaswi194117/Dev-folio/blob/main/Complete_Streamlit_Chatbot_App_for_Colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
# Install required libraries
!pip install langchain langchainhub chromadb transformers accelerate bitsandbytes streamlit pyngrok -U -qq

import streamlit as st
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.llms import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
import os
import subprocess
import time
from pyngrok import ngrok

# Load model and tokenizer
model_name = "tiiuae/falcon-rw-1b"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)

# Pipeline
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=150,
    temperature=0.7,
    top_p=0.9,
    repetition_penalty=1.2
)
llm = HuggingFacePipeline(pipeline=pipe)

# Chroma Vectorstore path
chroma_path = "./chroma_db"

# Streamlit UI
st.set_page_config(page_title="LangChain Chatbot", layout="wide")
st.title("🤖 LangChain + Falcon + ChromaDB Chatbot")

# Function to run the chatbot
def run_chatbot():
    # Load Chroma Vectorstore
    if os.path.exists(chroma_path):
        embedding = HuggingFaceEmbeddings()
        vectorstore = Chroma(persist_directory=chroma_path, embedding_function=embedding)
        qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=vectorstore.as_retriever())
    else:
        st.error(f"Error: Chroma database not found at {chroma_path}. Please make sure you have created the vectorstore and it is saved at the specified path.")
        qa = None

    query = st.text_input("Enter your question:")
    if query:
        if qa:
            response = qa.run(query)
            st.write("### Response:")
            st.success(response)
        else:
            st.write("Please create the Chroma database before querying.")

# Function to start the Streamlit app and Ngrok tunnel
def start_streamlit():
    # Kill any existing ngrok processes
    ngrok.kill()

    # Set up Ngrok authtoken
    NGROK_AUTHTOKEN = "2wcHAo16Jv6D4frA9a9KLrZr6EE_6LxU7c9X1rPGPBaivggbS"  # Replace with your authtoken
    if not NGROK_AUTHTOKEN or NGROK_AUTHTOKEN == "YOUR_NGROK_AUTHTOKEN":
        st.error("❌ Error: Ngrok authtoken not configured.  Please follow the instructions to get a token and replace the placeholder in the code.")
        print("❌ Error: Ngrok authtoken not configured.  Please follow the instructions to get a token and replace the placeholder in the code.")
        return  # Stop if the authtoken is not set
    ngrok.set_auth_token(NGROK_AUTHTOKEN)


    # Start Streamlit in the background
    # Use a direct call to streamlit run instead of subprocess
    try:
        from streamlit.web.cli import main as streamlit_run
        import sys
        sys.argv = ["streamlit", "run", "/content/streamlit_app.py", "&>/content/logs.txt", "&"] # changed from __file__
        # the path "/content/streamlit_app.py"  should be absolute or relative to the current working directory
        process = subprocess.Popen(['streamlit', 'run', '/content/streamlit_app.py', '&>/content/logs.txt', '&'], shell=True)
        time.sleep(5)

        # Open an ngrok tunnel to the Streamlit port (default 8501)
        url = ngrok.connect(8501)
        print(f"✅ Chatbot is running! Open this URL in your browser: {url}")
        run_chatbot()  # Call the chatbot function

    except Exception as e:
        print(f"❌ Error: {e}")

# Create a dummy streamlit_app.py
with open("/content/streamlit_app.py", "w") as f:
    f.write("import streamlit as st\nst.write('This is a placeholder')")

# Run the app
start_streamlit()


Device set to use cpu


✅ Chatbot is running! Open this URL in your browser: NgrokTunnel: "https://e48d-34-118-241-28.ngrok-free.app" -> "http://localhost:8501"


  embedding = HuggingFaceEmbeddings()
  vectorstore = Chroma(persist_directory=chroma_path, embedding_function=embedding)
2025-05-04 11:24:42.071 Session state does not function when running a script without `streamlit run`


In [5]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.document_loaders import TextLoader # Import TextLoader
from langchain.schema import Document

# 1. Load data (replace with your actual data loading)
#    Now using Document objects
documents = [
    Document(page_content="This is document 1.", metadata={"source": "doc1"}),
    Document(page_content="This is document 2.", metadata={"source": "doc2"}),
    Document(page_content="Here is another document.", metadata={"source": "doc3"}),
    #  If you are loading from files, use a loader.  For example
    #  loader = TextLoader("my_document.txt")
    #  documents = loader.load()
    #  The TextLoader creates Document objects for you.

]

# 2. Create embeddings
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")  # Or another model

# 3. Create Chroma vectorstore
chroma_db = Chroma.from_documents(
    documents=documents,
    embedding=embedding_model,
    persist_directory="./chroma_db"  # Important: Use the same path as in the Streamlit code
)
chroma_db.persist()
print("Chroma database created!")


Chroma database created!


  chroma_db.persist()


In [3]:
pip install -U langchain-community

Collecting langchain-community
  Downloading langchain_community-0.3.23-py3-none-any.whl.metadata (2.5 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.9.1-py3-none-any.whl.metadata (3.8 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading mypy_extensions-1.1.0-py3-no