<a href="https://colab.research.google.com/github/rownokstar/AI-Study-Assistant/blob/main/AI_Study_Assistant.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Cell 1: Installation
!pip install streamlit python-dotenv langchain langchain-community openai pypdf faiss-cpu pyngrok -q

In [None]:
# Cell 2: Setup API Keys from Colab Secrets
import os
from google.colab import userdata

os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')
os.environ["NGROK_AUTHTOKEN"] = userdata.get('NGROK_AUTHTOKEN')

In [None]:
# Cell 3: Write the Final, Professional English Version of the App
%%writefile app.py

import streamlit as st
import os
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.chains.summarize import load_summarize_chain

# --- Core Functions ---

def get_pdf_documents(pdf_files):
    """Loads, splits, and returns document chunks from uploaded PDF files."""
    documents = []
    temp_dir = "temp_pdf"
    if not os.path.exists(temp_dir):
        os.makedirs(temp_dir)

    for pdf_file in pdf_files:
        file_path = os.path.join(temp_dir, pdf_file.name)
        with open(file_path, "wb") as f:
            f.write(pdf_file.getbuffer())

        loader = PyPDFLoader(file_path)
        documents.extend(loader.load())

    text_splitter = CharacterTextSplitter(
        separator="\n",
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len
    )
    document_chunks = text_splitter.split_documents(documents)
    return document_chunks

def get_vectorstore(document_chunks):
    """Creates a FAISS vector store from document chunks."""
    embeddings = OpenAIEmbeddings()
    vectorstore = FAISS.from_documents(documents=document_chunks, embedding=embeddings)
    return vectorstore

def get_conversation_chain(vectorstore):
    """Creates a conversational retrieval chain."""

    llm = ChatOpenAI(temperature=0.3, max_tokens=500)
    memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
    conversation_chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=vectorstore.as_retriever(),
        memory=memory
    )
    return conversation_chain

# --- Streamlit UI ---

def main():
    # --- Page Configuration ---
    st.set_page_config(
        page_title="AI Study Assistant",
        page_icon="🤖",
        layout="wide"
    )

    # --- Session State Initialization ---
    if "conversation" not in st.session_state:
        st.session_state.conversation = None
    if "chat_history" not in st.session_state:
        st.session_state.chat_history = []
    if "processed_documents" not in st.session_state:
        st.session_state.processed_documents = None
    if "summary" not in st.session_state:
        st.session_state.summary = None

    # --- Header and Introduction ---
    st.title("AI Study Assistant 🤖")
    st.write("Upload your textbooks or documents, and I'll help you study by answering questions and generating summaries.")

    with st.expander("ℹ️ How to Use"):
        st.markdown("""
        1.  **Upload:** Use the sidebar to upload one or more PDF documents.
        2.  **Process:** Click the 'Process Documents' button to let the AI read them.
        3.  **Ask:** Once processing is complete, ask any question about the content in the chat box below.
        4.  **Summarize:** You can also generate a full summary of the documents using the button in the sidebar.
        """)

    st.markdown("---")

    # --- Chat Interface ---
    st.header("Chat with Your Documents")
    user_question = st.text_input("Ask a question about the content of your documents:")
    if user_question and st.session_state.conversation:
        response = st.session_state.conversation({'question': user_question})
        st.session_state.chat_history = response['chat_history']

    # Display chat history
    for i, message in enumerate(st.session_state.chat_history):
        if i % 2 == 0:
            st.markdown(f"<div style='text-align: right;'><b>You:</b> {message.content}</div>", unsafe_allow_html=True)
        else:
            st.markdown(f"<b>Bot:</b> {message.content}")

    # --- Sidebar for Controls ---
    with st.sidebar:
        st.header("Controls")
        st.subheader("1. Upload Documents")
        pdf_docs = st.file_uploader(
            "Upload your PDF files and click 'Process'", accept_multiple_files=True, type="pdf")

        if st.button("Process Documents"):
            if pdf_docs:
                with st.spinner("Processing documents... This may take a moment."):
                    doc_chunks = get_pdf_documents(pdf_docs)
                    st.session_state.processed_documents = doc_chunks

                    vectorstore = get_vectorstore(doc_chunks)
                    st.session_state.conversation = get_conversation_chain(vectorstore)
                    st.success("Processing complete! You can now ask questions.")
            else:
                st.error("Please upload at least one PDF file.")

        st.markdown("---")

        # --- Summarization Section ---
        st.subheader("2. Additional Features")
        if st.button("Generate Full Summary"):
            if st.session_state.processed_documents:
                with st.spinner("Generating summary... This can take several minutes for large documents."):
                    llm = ChatOpenAI(temperature=0.2, model_name="gpt-3.5-turbo-16k")
                    summary_chain = load_summarize_chain(llm, chain_type="map_reduce")
                    summary = summary_chain.run(st.session_state.processed_documents)
                    st.session_state.summary = summary
            else:
                st.warning("Please process your documents first to generate a summary.")

        if st.session_state.summary:
            with st.expander("View Document Summary"):
                st.write(st.session_state.summary)

if __name__ == '__main__':
    main()

Overwriting app.py


In [None]:
# Cell 4: Run the Streamlit app and expose it with ngrok
!streamlit run app.py &>/dev/null&

from pyngrok import ngrok
import time

# ngrok tunnel creation
public_url = ngrok.connect(8501)
print("✅ The app is live now:")
print(public_url)

✅ The app is live now:
NgrokTunnel: "https://135f8589acf5.ngrok-free.app" -> "http://localhost:8501"
