In [None]:
!pip install streamlit
!pip install -r requirement.txt

In [None]:
import streamlit as st

In [None]:
from google.colab import userdata
import os
os.environ["OPENAI_API_KEY"] = userdata.get("OPENAI_API_KEY")


In [None]:
%%writefile app.py
import os
import streamlit as st
from langchain_community.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.chains import ConversationalRetrievalChain

os.environ["OPENAI_API_KEY"] = userdata.get("OPENAI_API_KEY")
if 'OPENAI_API_KEY' in os.environ:
    print("OPENAI_API_KEY is set.")
else:
    print("OPENAI_API_KEY is not set.")

# --- Streamlit UI ---
st.set_page_config(page_title="Chat with your Document", layout="wide")
st.title("📄 Chat with your Document")

uploaded_file = st.file_uploader("Upload a document", type=["pdf", "txt", "docx"])
if "chat_history" not in st.session_state:
    st.session_state.chat_history = []
if "qa_chain" not in st.session_state:
    st.session_state.qa_chain = None

if uploaded_file is not None and st.session_state.qa_chain is None:
    # --- Load Document ---
    file_type = uploaded_file.name.split(".")[-1].lower()
    with open("temp." + file_type, "wb") as f:
        f.write(uploaded_file.getbuffer())

    if file_type == "pdf":
        loader = PyPDFLoader("temp.pdf")
    elif file_type == "txt":
        loader = TextLoader("temp.txt")
    elif file_type == "docx":
        loader = Docx2txtLoader("temp.docx")
    else:
        st.error("Unsupported file type")
        st.stop()

    documents = loader.load()

    # --- Split & Embed ---
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    chunks = splitter.split_documents(documents)

    embeddings = OpenAIEmbeddings()
    vectordb = FAISS.from_documents(chunks, embeddings)

    # --- Conversational Chain ---
    llm = ChatOpenAI(temperature=0)
    st.session_state.qa_chain = ConversationalRetrievalChain.from_llm(
        llm, vectordb.as_retriever(), return_source_documents=True
    )
    st.success("✅ Document processed! You can start chatting below.")
    print('Documetnt Processed')
    print('vectordb : ', vectordb)
    print('qa_chain : ', st.session_state.qa_chain)

# --- Chat Interface ---
if st.session_state.qa_chain:
    print('Inside the qa_chain')
    query = st.text_input("Ask a question about your document:")
    if query:
        result = st.session_state.qa_chain({"question": query, "chat_history": st.session_state.chat_history})
        answer = result["answer"]
        st.session_state.chat_history.append((query, answer))

        # Display conversation
        for q, a in st.session_state.chat_history:
            st.markdown(f"**You:** {q}")
            st.markdown(f"**Bot:** {a}")


Overwriting app.py


In [None]:
!wget -q -O - ipv4.icanhazip.com


34.55.221.252


In [None]:
!streamlit run app.py & npx localtunnel --port 8501

[1G[0K⠙
Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.55.221.252:8501[0m
[0m
[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0Kyour url is: https://clever-clouds-hear.loca.lt
OPENAI_API_KEY is set.
OPENAI_API_KEY is set.
OPENAI_API_KEY is set.
Documetnt Processed
vectordb :  <langchain_community.vectorstores.faiss.FAISS object at 0x7de3f0b89970>
qa_chain :  verbose=False combine_docs_chain=StuffDocumentsChain(verbose=False, llm_chain=LLMChain(verbose=False, prompt=ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_t

In [None]:
import os
import streamlit as st
from langchain_community.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory  # memory


# --- API Key ---
os.environ["OPENAI_API_KEY"] = '##'

if 'OPENAI_API_KEY' in os.environ:
    print("OPENAI_API_KEY is set.")
else:
    print("OPENAI_API_KEY is not set.")

# --- Streamlit UI ---
st.set_page_config(page_title="Chat with your Document", layout="wide")
st.title("📄 Chat with your Document")

uploaded_file = st.file_uploader("Upload a document", type=["pdf", "txt", "docx"])
if "qa_chain" not in st.session_state:
    st.session_state.qa_chain = None

if uploaded_file is not None and st.session_state.qa_chain is None:
    # --- Load Document ---
    file_type = uploaded_file.name.split(".")[-1].lower()
    with open("temp." + file_type, "wb") as f:
        f.write(uploaded_file.getbuffer())

    if file_type == "pdf":
        loader = PyPDFLoader("temp.pdf")
    elif file_type == "txt":
        loader = TextLoader("temp.txt")
    elif file_type == "docx":
        loader = Docx2txtLoader("temp.docx")
    else:
        st.error("Unsupported file type")
        st.stop()

    documents = loader.load()

    # --- Split & Embed ---
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    chunks = splitter.split_documents(documents)

    embeddings = OpenAIEmbeddings()
    vectordb = FAISS.from_documents(chunks, embeddings)

    # --- Conversational Chain with Memory ---
    llm = ChatOpenAI(temperature=0)
    memory = ConversationBufferMemory(
        memory_key="chat_history",
        return_messages=True,
        output_key="answer"   # 👈 tell memory to use "answer"
    )

    st.session_state.qa_chain = ConversationalRetrievalChain.from_llm(
        llm,
        retriever=vectordb.as_retriever(),
        memory=memory,
        return_source_documents=True,
        output_key="answer"   # 👈 also set here
    )

    st.success("✅ Document processed! You can start chatting below.")

# --- Chat Interface ---
if st.session_state.qa_chain:
    query = st.text_input("Ask a question about your document:")
    if query:
        result = st.session_state.qa_chain({"question": query})
        answer = result["answer"]

        # Display chat history from memory
        for msg in result["chat_history"]:
            role = "You" if msg.type == "human" else "Bot"
            st.markdown(f"**{role}:** {msg.content}")


In [None]:
!