In [None]:
%pip install streamlit PyPDF2 transformers accelerate pyngrok python-dotenv torch

Collecting streamlit
  Downloading streamlit-1.50.0-py3-none-any.whl.metadata (9.5 kB)
Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Collecting pyngrok
  Downloading pyngrok-7.4.1-py3-none-any.whl.metadata (8.1 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.50.0-py3-none-any.whl (10.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.1/10.1 MB[0m [31m143.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m20.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyngrok-7.4.1-py3-none-any.whl (25 kB)
Downloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m146.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PyPDF2, pyngrok, 

In [None]:
# Hugging Face Token Setup
import os
from getpass import getpass

# Get Hugging Face token
hf_token = getpass("Enter your Hugging Face token: ")

# Set the token as environment variable
os.environ["HUGGINGFACE_HUB_TOKEN"] = hf_token

print("✅ Hugging Face token set successfully!")
print("🔑 Token will be used for model authentication")


In [None]:
from pyngrok import ngrok
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Get ngrok token from environment variable
NGROK_AUTH_TOKEN = os.getenv("NGROK_AUTH_TOKEN")

if NGROK_AUTH_TOKEN:
    # Register the token
    !ngrok authtoken $NGROK_AUTH_TOKEN
    print("Ngrok token loaded from environment")
else:
    print(" NGROK_AUTH_TOKEN not found in .env file")


Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [None]:
%%writefile app.py
import streamlit as st
from PyPDF2 import PdfReader
from transformers import pipeline
import torch

st.set_page_config(page_title="PDF Chat", page_icon="📄", layout="wide")

# Clean CSS styling
st.markdown("""
<style>
    /* Hide Streamlit header */
    header[data-testid="stHeader"] {
        display: none;
    }
    
    .stApp > div:first-child {
        padding-top: 0;
    }
    
    /* Main container */
    .main-container {
        max-width: 800px;
        margin: 0 auto;
        padding: 20px;
        padding-bottom: 100px;
    }
    
    /* Header */
    .header {
        text-align: center;
        color: #ffffff;
        font-size: 24px;
        font-weight: 600;
        margin-bottom: 20px;
    }
    
    /* Chat area - no container */
    .chat-area {
        padding: 20px;
        margin-bottom: 20px;
    }
    
    /* Message styling */
    .message {
        margin-bottom: 15px;
        display: flex;
    }
    
    .message.user {
        justify-content: flex-end;
    }
    
    .message.bot {
        justify-content: flex-start;
    }
    
    .message-content {
        max-width: 70%;
        padding: 12px 16px;
        border-radius: 18px;
        font-size: 14px;
        line-height: 1.4;
        word-wrap: break-word;
    }
    
    .message.user .message-content {
        background: #007bff;
        color: white;
        border-bottom-right-radius: 4px;
    }
    
    .message.bot .message-content {
        background: #f8f9fa;
        color: #333;
        border: 1px solid #e9ecef;
        border-bottom-left-radius: 4px;
    }
    
    /* Typing indicator */
    .typing {
        background: #f8f9fa;
        color: #6c757d;
        padding: 12px 16px;
        border-radius: 18px;
        border-bottom-left-radius: 4px;
        font-style: italic;
        max-width: 70%;
        border: 1px solid #e9ecef;
        animation: pulse 1.5s infinite;
    }
    
    @keyframes pulse {
        0%, 100% { opacity: 0.6; }
        50% { opacity: 1; }
    }
    
    /* Input area - fixed at bottom */
    .input-area {
        position: fixed;
        bottom: 0;
        left: 0;
        right: 0;
        background: #000000;
        padding: 15px;
        box-shadow: 0 -2px 4px rgba(0,0,0,0.3);
        z-index: 1000;
    }
    
    /* Input field */
    .stTextInput > div > div > input {
        padding: 12px 20px;
        font-size: 14px;
        background: #333333;
        color: #ffffff;
        height: 48px;
        transition: border-color 0.15s ease-in-out;
    }
    
    .stTextInput > div > div > input:focus {
        border-color: #ffffff;
        box-shadow: 0 0 0 0.2rem rgba(255,255,255,0.25);
        outline: none;
    }
    
    .stTextInput > div > div > input::placeholder {
        color: #cccccc;
    }
    
    .stTextInput {
        width: 100%;
    }
    
    .stApp {
        background: #000000;
    }
    
    .stSuccess {
        background: #d4edda;
        border: 1px solid #c3e6cb;
        color: #155724;
        border-radius: 8px;
        padding: 12px;
        margin-bottom: 20px;
    }
    
    .stInfo {
        background: #d1ecf1;
        border: 1px solid #bee5eb;
        color: #0c5460;
        border-radius: 8px;
        padding: 12px;
        margin-bottom: 10px;
    }
</style>
""", unsafe_allow_html=True)

# Main app
st.markdown('<div class="main-container">', unsafe_allow_html=True)
st.markdown('<div class="header">📄PDF Chat</div>', unsafe_allow_html=True)

# Load model - Using Mistral for better performance
@st.cache_resource
def load_model():
    nlp = pipeline(
        "text-generation",
        model="mistralai/Mistral-7B-Instruct-v0.1",
        device=0 if torch.cuda.is_available() else -1,
        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
        trust_remote_code=True,
        use_auth_token=True  # Use the HF token for authentication
    )
    return nlp

nlp = load_model()

# Upload multiple PDFs
uploaded_files = st.file_uploader("Upload PDF files", type="pdf", accept_multiple_files=True)

if uploaded_files:
    # Process all uploaded files into a single unified document
    all_pdf_text = ""
    file_info = []
    total_pages = 0
    
    # Process each file and combine into unified document
    for uploaded_file in uploaded_files:
        reader = PdfReader(uploaded_file)
        file_text = "".join([page.extract_text() or "" for page in reader.pages])
        total_pages += len(reader.pages)
        
        # Add file content to unified document with minimal separation
        all_pdf_text += f" {file_text}"
        file_info.append(f"{uploaded_file.name}: {len(file_text)} characters, {len(reader.pages)} pages")
    
    # Display unified document information
    st.success(f"{len(uploaded_files)} PDF(s) processed, {len(all_pdf_text)} characters!")
    for info in file_info:
        st.info(info)
    
    # Store unified text as single document
    pdf_text = all_pdf_text

    # Initialize conversation
    if "conversation" not in st.session_state:
        st.session_state.conversation = []
    
    if "processing" not in st.session_state:
        st.session_state.processing = False

    # Chat area - no container
    st.markdown('<div class="chat-area">', unsafe_allow_html=True)
    
    # Display messages
    for turn in st.session_state.conversation:
        if turn["role"] == "user":
            st.markdown(f'''
                <div class="message user">
                    <div class="message-content">{turn["text"]}</div>
                </div>
            ''', unsafe_allow_html=True)
        else:
            st.markdown(f'''
                <div class="message bot">
                    <div class="message-content">{turn["text"]}</div>
                </div>
            ''', unsafe_allow_html=True)
    
    # Show typing indicator
    if st.session_state.processing:
        st.markdown('<div class="typing"> Bot is thinking...</div>', unsafe_allow_html=True)
    
    st.markdown('</div>', unsafe_allow_html=True)
    
    # Input area
    st.markdown('<div class="input-area">', unsafe_allow_html=True)
    
    # Use dynamic key to clear input field
    if "input_key" not in st.session_state:
        st.session_state.input_key = 0
    
    # Reset key when input is cleared
    if st.session_state.get("input_cleared", False):
        st.session_state.input_key += 1
        st.session_state.input_cleared = False
    
    user_input = st.text_input("", placeholder="Type your message here...", key=f"user_input_{st.session_state.input_key}", disabled=st.session_state.processing)
    st.markdown('</div>', unsafe_allow_html=True)

    # Handle input submission - only when user types and presses Enter
    if user_input and not st.session_state.processing:
        # Check if this is a new message (not already processed)
        if "last_input" not in st.session_state or st.session_state.last_input != user_input:
            st.session_state.conversation.append({"role": "user", "text": user_input})
            st.session_state.processing = True
            st.session_state.last_input = user_input
            # Clear the input field by using a different key
            st.session_state.input_cleared = True
            st.rerun()
    
    # Process bot response
    if st.session_state.processing and len(st.session_state.conversation) > 0 and st.session_state.conversation[-1]["role"] == "user":
        # Enhanced prompt engineering for better, more grounded answers
        user_question = st.session_state.conversation[-1]['text']
        
        # Build conversation context
        conversation_context = ""
        for i, turn in enumerate(st.session_state.conversation[:-1]):
            if turn["role"] == "user":
                conversation_context += f"Previous Question: {turn['text']}\n"
            else:
                conversation_context += f"Previous Answer: {turn['text']}\n"
        
        # Advanced prompt with multiple techniques
        prompt = f"""<s>[INST] You are an expert AI assistant specialized in analyzing and answering questions about PDF documents. Your task is to provide accurate, comprehensive, and well-structured answers based solely on the document content provided.

DOCUMENT CONTENT:
{pdf_text[:4000]}

CONVERSATION HISTORY:
{conversation_context}

CURRENT QUESTION: {user_question}

INSTRUCTIONS:
1. Analyze the document content carefully to find relevant information
2. Provide a comprehensive answer based on the document
3. Structure your answer clearly with key points
4. If the user asks for summary, summarize the document, if the user asks for a specific question, answer it.
5. Quote relevant sections from the document when appropriate
6. Be precise and avoid speculation
7. Only when there is no related information in the document, say "Based on the document, I cannot find specific information about this topic"

Please provide your answer: [/INST]"""

        # Generate bot response with enhanced parameters
        response = nlp(prompt, max_length=512, do_sample=True, temperature=0.2, top_p=0.85, repetition_penalty=1.1, pad_token_id=nlp.tokenizer.eos_token_id)
        answer = response[0]["generated_text"]
        
        # Extract only the answer part (remove the prompt and instruction tags)
        if "[/INST]" in answer:
            answer = answer.split("[/INST]")[-1].strip()
        # Remove any remaining instruction tags
        answer = answer.replace("<s>", "").replace("</s>", "").strip()
        
        st.session_state.conversation.append({"role": "bot", "text": answer})
        st.session_state.processing = False
        st.rerun()

st.markdown('</div>', unsafe_allow_html=True)

In [None]:
from pyngrok import ngrok
import time
import subprocess
import os

# Kill any existing ngrok processes
ngrok.kill()

# Kill any existing Streamlit processes
try:
    os.system("pkill -f streamlit")
    time.sleep(2)
except:
    pass

# Start Streamlit in the background
subprocess.Popen(["streamlit", "run", "app.py", "--server.port=8501", "--server.headless=true"])

# Wait a few seconds for Streamlit to start
time.sleep(5)

# Start ngrok tunnel
public_url = ngrok.connect(8501)
print("🌐 Streamlit URL:", public_url)
