In [1]:
# 1. Install Ollama
!sudo apt-get install zstd
!curl -fsSL https://ollama.com/install.sh | sh

# 2. Start Ollama in the background (using nohup)
import subprocess
import time

# Start the server independently
process = subprocess.Popen("nohup ollama serve > ollama.log 2>&1 &", shell=True)
print("Starting Ollama server...")
time.sleep(5)

# 3. Pull the Llama 3 model
# Note: This might take a few minutes to download the 4.7GB model
!ollama pull llama3:8b

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following NEW packages will be installed:
  zstd
0 upgraded, 1 newly installed, 0 to remove and 41 not upgraded.
Need to get 603 kB of archives.
After this operation, 1,695 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/main amd64 zstd amd64 1.4.8+dfsg-3build1 [603 kB]
Fetched 603 kB in 1s (473 kB/s)
debconf: unable to initialize frontend: Dialog
debconf: (No usable dialog-like program is installed, so the dialog based frontend cannot be used. at /usr/share/perl5/Debconf/FrontEnd/Dialog.pm line 78, <> line 1.)
debconf: falling back to frontend: Readline
debconf: unable to initialize frontend: Readline
debconf: (This frontend requires a controlling tty.)
debconf: falling back to frontend: Teletype
dpkg-preconfigure: unable to re-open stdin: 
Selecting previously unselected package zstd.
(Reading database ... 121689 files and directories currently i

In [2]:
#installing dependencies
!pip install streamlit langchain langchain langchain_community langchain_ollama duckduckgo-search
!pip install -U ddgs

Collecting streamlit
  Downloading streamlit-1.53.1-py3-none-any.whl.metadata (10 kB)
Collecting langchain_community
  Downloading langchain_community-0.4.1-py3-none-any.whl.metadata (3.0 kB)
Collecting langchain_ollama
  Downloading langchain_ollama-1.0.1-py3-none-any.whl.metadata (2.5 kB)
Collecting duckduckgo-search
  Downloading duckduckgo_search-8.1.1-py3-none-any.whl.metadata (16 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Collecting langchain-classic<2.0.0,>=1.0.0 (from langchain_community)
  Downloading langchain_classic-1.0.1-py3-none-any.whl.metadata (4.2 kB)
Collecting requests<3,>=2.27 (from streamlit)
  Downloading requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting dataclasses-json<0.7.0,>=0.6.7 (from langchain_community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting ollama<1.0.0,>=0.6.0 (from langchain_ollama)
  Downloading ollama-0.6.1-py3-none-any.whl.

In [3]:
%%writefile app.py
import streamlit as st
from langchain_community.llms import Ollama
from langchain_community.tools import DuckDuckGoSearchRun
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

# --- Page Config ---
st.set_page_config(page_title="Real-Time AI Assistant", page_icon="ü§ñ")
st.title("ü§ñ Real-Time Web RAG Assistant")

# --- Sidebar for Settings ---
with st.sidebar:
    st.header("Settings")
    model_id = st.selectbox("Select Model", ["llama3:8b", "mistral"])
    st.write("This assistant searches the web in real-time to answer your questions.")

# --- Initialize Session State (Memory) ---
if "messages" not in st.session_state:
    st.session_state.messages = []

# --- Initialize Tools & Model ---
# We cache resources to avoid reloading model on every interaction
@st.cache_resource
def get_engine():
    llm = Ollama(model="llama3:8b")
    search = DuckDuckGoSearchRun()
    return llm, search

llm, search = get_engine()

# --- Chains ---
# 1. Query Rewriter (History Aware)
rewrite_prompt = ChatPromptTemplate.from_template("""
Given the conversation history and a follow-up question, rephrase the
follow-up question to be a standalone search query.
History: {history}
Question: {question}
Standalone Query:
""")
rewrite_chain = rewrite_prompt | llm | StrOutputParser()

# 2. Answer Generator
answer_prompt = ChatPromptTemplate.from_template("""
Answer the question based *only* on the context below.
Context: {context}
Question: {question}
""")
answer_chain = answer_prompt | llm | StrOutputParser()

# --- Chat Interface ---
# Display previous messages
for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])

# Handle new user input
if prompt := st.chat_input("Ask me anything..."):
    # 1. Display User Message
    st.session_state.messages.append({"role": "user", "content": prompt})
    with st.chat_message("user"):
        st.markdown(prompt)

    # 2. Process Response
    with st.chat_message("assistant"):
        message_placeholder = st.empty()
        message_placeholder.markdown("Thinking & Searching...")

        try:
            # Build history string
            history_str = "\n".join([f"{m['role']}: {m['content']}" for m in st.session_state.messages[-4:]])

            # Generate Search Query
            search_query = rewrite_chain.invoke({"history": history_str, "question": prompt})

            # Perform Search
            search_results = search.run(search_query)

            # Generate Answer
            full_response = answer_chain.invoke({"context": search_results, "question": prompt})

            # Display Response
            message_placeholder.markdown(full_response)

            # Save to history
            st.session_state.messages.append({"role": "assistant", "content": full_response})

        except Exception as e:
            message_placeholder.markdown(f"Error: {e}")

Writing app.py


In [4]:
import subprocess
import time
import os

print("‚öôÔ∏è  Running System Diagnostics & Repair...")

# 1. Check if Ollama is installed
if os.system("which ollama > /dev/null") != 0:
    print("‚ùå Ollama not found. Re-installing...")
    os.system("curl -fsSL https://ollama.com/install.sh | sh")
else:
    print("‚úÖ Ollama is installed.")

# 2. Kill any old processes to free up ports
os.system("pkill ollama")
os.system("pkill streamlit")
os.system("pkill cloudflared")
time.sleep(2)

# 3. Start Ollama Server
print("üîÑ Starting AI Server...")
subprocess.Popen("nohup ollama serve > ollama.log 2>&1 &", shell=True)
time.sleep(5)  # Wait for it to boot

# 4. Check & Pull Model (This is usually why it hangs!)
print("üì• Verifying Llama 3 Model (This might take a minute)...")
# We try to pull. If it exists, it checks mostly instantly. If missing, it downloads.
os.system("ollama pull llama3:8b")

# 5. Start Streamlit App
print("üöÄ Launching App...")
if not os.path.exists("app.py"):
    print("‚ö†Ô∏è  app.py missing! Please re-run the cell that creates 'app.py'.")
else:
    subprocess.Popen(["streamlit", "run", "app.py", "--server.port=8501"])
    time.sleep(3)

    # 6. Start Cloudflare Tunnel (Stable Connection)
    print("\nüîó  OPEN THIS LINK TO CHAT:")
    # Ensure cloudflared is available
    if os.system("which cloudflared > /dev/null") != 0:
        os.system("wget -q -O cloudflared.deb https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64.deb")
        os.system("dpkg -i cloudflared.deb")

    !cloudflared tunnel --url http://localhost:8501

‚öôÔ∏è  Running System Diagnostics & Repair...
‚úÖ Ollama is installed.
üîÑ Starting AI Server...
üì• Verifying Llama 3 Model (This might take a minute)...
üöÄ Launching App...

üîó  OPEN THIS LINK TO CHAT:
[90m2026-01-31T13:45:57Z[0m [32mINF[0m Thank you for trying Cloudflare Tunnel. Doing so, without a Cloudflare account, is a quick way to experiment and try it out. However, be aware that these account-less Tunnels have no uptime guarantee, are subject to the Cloudflare Online Services Terms of Use (https://www.cloudflare.com/website-terms/), and Cloudflare reserves the right to investigate your use of Tunnels for violations of such terms. If you intend to use Tunnels in production you should use a pre-created named tunnel by following: https://developers.cloudflare.com/cloudflare-one/connections/connect-apps
[90m2026-01-31T13:45:57Z[0m [32mINF[0m Requesting new quick Tunnel on trycloudflare.com...
[90m2026-01-31T13:46:02Z[0m [32mINF[0m +------------------------------