In [None]:
!pip install streamlit langchain chromadb openai pypdf python-docx pyngrok langchain_community
!pip install tiktoken





In [None]:
%%writefile app.py
import streamlit as st
import os
import tempfile
from langchain_community.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA

# Get API Key from environment variable
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "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")


st.set_page_config(page_title="📄 RAG Chatbot", layout="wide")
st.title("📄 Chat with Your Documents - RAG Chatbot")

# Sidebar for settings
with st.sidebar:
    st.header("⚙️ Settings")
    model_choice = st.selectbox("Select Model", ["gpt-3.5-turbo", "gpt-4"])
    temperature = st.slider("Temperature", 0.0, 1.0, 0.5, 0.1)
    top_k = st.slider("Top K Retrieved Docs", 1, 5, 3)

# File uploader
uploaded_files = st.file_uploader("Upload PDFs, DOCX, or TXT files", accept_multiple_files=True, type=['pdf', 'docx', 'txt'])

def load_documents(uploaded_files):
    documents = []
    for uploaded_file in uploaded_files:
        with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
            tmp_file.write(uploaded_file.read())
            tmp_file_path = tmp_file.name

        if uploaded_file.name.endswith('.pdf'):
            loader = PyPDFLoader(tmp_file_path)
        elif uploaded_file.name.endswith('.docx'):
            loader = Docx2txtLoader(tmp_file_path)
        elif uploaded_file.name.endswith('.txt'):
            loader = TextLoader(tmp_file_path)
        else:
            st.error("Unsupported file type.")
            continue

        documents.extend(loader.load())
        os.remove(tmp_file_path)
    return documents

if uploaded_files:
    documents = load_documents(uploaded_files)
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    document_chunks = text_splitter.split_documents(documents)

    embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
    vector_db = Chroma.from_documents(document_chunks, embeddings, persist_directory="./chroma_db")
    retriever = vector_db.as_retriever(search_kwargs={"k": top_k})

    llm = ChatOpenAI(model=model_choice, temperature=temperature, openai_api_key=OPENAI_API_KEY)
    qa_chain = RetrievalQA.from_chain_type(llm, retriever=retriever)

    st.subheader("💬 Chat with your documents")
    chat_history = st.session_state.get("chat_history", [])
    query = st.text_input("Ask a question:")

    if query:
        response = qa_chain.run(query)
        chat_history.append((query, response))
        st.session_state["chat_history"] = chat_history

    for question, answer in chat_history:
        st.write(f"**🟢 You:** {question}")
        st.write(f"**🤖 AI:** {answer}")

# Run Streamlit in Colab
def run_streamlit():
    import subprocess
    subprocess.Popen(["streamlit", "run", "app.py", "--server.port", "8501", "--server.headless", "true", "--browser.serverAddress", "0.0.0.0", "--browser.gatherUsageStats", "false"])




Overwriting app.py


In [None]:
!streamlit run app.py --server.port 8501 --server.headless true --browser.serverAddress 0.0.0.0 --browser.gatherUsageStats false &>/dev/null &


In [None]:
!ngrok authtoken 2rwft64obU2hwUOr9vXTMN3srAw_4VodV5EpdGPzo9ipemV9C


Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [None]:
!pkill -9 -f ngrok


In [None]:
from pyngrok import ngrok

# Open a tunnel to access Streamlit publicly
public_url = ngrok.connect(8501, "http")
print(f"🚀 Streamlit App is live at: {public_url}")


🚀 Streamlit App is live at: NgrokTunnel: "https://53b1-34-48-31-234.ngrok-free.app" -> "http://localhost:8501"
