In [1]:
import streamlit as st
import ollama
import chromadb
from sentence_transformers import SentenceTransformer
import hashlib
import time
from bs4 import BeautifulSoup

# Load embedding model
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")


In [None]:
# Initialize ChromaDB
chroma_client = chromadb.PersistentClient(path="./log_db")
chroma_collection = chroma_client.get_or_create_collection(name="oracle_logs")



In [3]:
# Function to clean and extract text from HTML logs
def extract_text_from_html(html_content):
    soup = BeautifulSoup(html_content, "html.parser")
    for tag in soup(["script", "style", "meta", "head", "noscript"]):
        tag.extract()
    clean_text = soup.get_text(separator=" ").strip()
    return " ".join(clean_text.split())

In [4]:
# Function to compute embeddings
def compute_embedding(text):
    return embedding_model.encode([text])[0].tolist()

# Function to chunk text into smaller parts
def chunk_text(text, chunk_size=2000):
    words = text.split()
    return [" ".join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]

In [5]:
def clear_all_logs():
    try:
        # Retrieve all document IDs
        all_docs = chroma_collection.get()  # Fetch all stored entries
        if "ids" in all_docs and all_docs["ids"]:
            chroma_collection.delete(ids=all_docs["ids"])  # Delete all by ID
            st.session_state.clear()  # Reset UI state
            st.success("🗑 Cleared all logs and embeddings from ChromaDB.")
        else:
            st.warning("⚠️ No logs found to clear.")
    except Exception as e:
        st.error(f"⚠️ Error clearing logs: {str(e)}")

In [8]:
# Function to store logs in ChromaDB
def store_log_chunks(log_text):
    chunks = chunk_text(log_text)
    chunk_ids = []

    for chunk in chunks:
        chunk_hash = hashlib.md5(chunk.encode()).hexdigest()
        emb = compute_embedding(chunk)
        chroma_collection.add(ids=[chunk_hash], embeddings=[emb], documents=[chunk])
        chunk_ids.append(chunk_hash)

    st.success(f"✅ Indexed {len(chunks)} log chunks in ChromaDB")
    return chunk_ids

In [None]:
# Function to retrieve similar logs
def retrieve_similar_logs(query_text, top_k=3):
    query_embedding = compute_embedding(query_text)
    results = chroma_collection.query(query_embeddings=[query_embedding], n_results=top_k)

    if results["documents"] and len(results["documents"][0]) > 0:
        return results["documents"][0]

    st.warning("⚠️ No relevant logs found. Processing new logs.")
    return []
    

In [10]:
# **Prompts for different log types with user input**
def get_prompt(file_type, log_text, user_summary):
    user_summary_text = f"### **User Provided Summary:**\n{user_summary}\n\n" if user_summary.strip() else ""

    if file_type == "awr":
        return f"""
        {user_summary_text}
        You are an Oracle Database expert. Analyze the following AWR report and provide insights:

        ### **Key Metrics**
        - Highlight critical performance indicators (DB Time, Wait Events, Top SQLs, I/O, CPU Usage).
        - Identify unusual spikes or trends.

        ### **Top 3 Issues**
        - List the top 3 performance bottlenecks.
        - Explain the root cause of each issue.
        - Suggest actionable recommendations.

        ### **Overall Summary**
        - Summarize key findings in simple terms.
        - Mention any configuration inefficiencies.

        **Report Data:**
        {log_text}
        """

    elif file_type in ["log", "trace", "alert"]:
        return f"""
        {user_summary_text}
        You are an Oracle Database expert. Analyze the following log file:

        ### **Error Analysis**
        - Identify critical **ORA-XXXX** errors, warnings, or unusual messages.
        - Provide probable root causes.

        ### **Impact & Severity**
        - Explain how these errors affect database operations.
        - Indicate whether it's critical or minor.

        ### **Recommended Fixes**
        - Suggest solutions to resolve the issues.
        - Mention best practices or parameter tuning.

        **Log Data:**
        {log_text}
        """

    else:
        return f"""
        {user_summary_text}
        You are an Oracle expert troubleshooting database issues. Analyze the following logs:

        ### **Root Cause Analysis**
        - Identify the primary issue based on log patterns.
        - Correlate errors with performance or configuration problems.

        ### **Potential Causes**
        - List possible reasons (e.g., resource contention, memory pressure, storage bottlenecks).
        - Indicate if tuning or infrastructure fixes are needed.

        ### **Next Steps & Recommendations**
        - Suggest diagnostic steps (e.g., check V$ views, AWR, ADDM).
        - Provide immediate and long-term solutions.

        **Log Data:**
        {log_text}
        """

In [11]:
# Function to summarize logs using LLM
def get_summary(model, relevant_chunks, file_type, user_summary):
    if not relevant_chunks:
        return "⚠️ No relevant logs found. Please upload a valid log file."

    combined_text = "\n".join(relevant_chunks[:3])
    prompt = get_prompt(file_type, combined_text, user_summary)

    try:
        start_time = time.time()
        response = ollama.chat(model=model, messages=[{"role": "user", "content": prompt}])
        execution_time = time.time() - start_time
        st.success(f"✅ Summary generated in {execution_time:.2f} seconds")

        return response["message"]["content"] if "message" in response else "No summary generated."
    except Exception as e:
        return f"⚠️ Error generating summary: {str(e)}"

In [12]:
# Streamlit UI
st.title("⚡ Oracle Log Summarizer with Issue Context")

# Model selection
model = st.selectbox("Choose Model:", ["mistral:latest", "command-r-plus:latest", "llama3:latest"], index=2)

# User's issue summary
user_summary = st.text_area("📝 Provide Your Issue Summary (Optional):", height=100)

# File upload or text input
uploaded_file = st.file_uploader("Upload Log File (.log, .txt, .html, .awr)", type=["log", "txt", "html", "awr"])
log_text = ""
file_type = ""

if uploaded_file:
    file_type = uploaded_file.name.split(".")[-1]
    content = uploaded_file.getvalue().decode("utf-8")

    if file_type == "html":
        log_text = extract_text_from_html(content)
        st.success("✅ Extracted and cleaned HTML log.")
    else:
        log_text = content
else:
    log_text = st.text_area("Paste Oracle Log Data:", height=250)

# Process log and store embeddings
if st.button("Process Log"):
    if log_text.strip():
        clear_all_logs()  # Ensure fresh analysis
        store_log_chunks(log_text)  # Store new logs in ChromaDB

        # Retrieve new logs for summarization
        similar_logs = retrieve_similar_logs(log_text)

        if not similar_logs:
            st.warning("⚠️ No relevant logs found after processing. Please upload a valid log file.")
        else:
            with st.spinner("Generating summary..."):
                st.session_state["summary_text"] = get_summary(model, similar_logs, file_type, user_summary)

# Display summary
if "summary_text" in st.session_state:
    st.subheader("📄 Log Summary:")
    st.text_area("Summary Output", st.session_state["summary_text"], height=200)

# Clear logs button
if st.button("🗑 Clear Logs"):
    clear_all_logs()
    st.rerun()

2025-03-01 08:26:34.667 
  command:

    streamlit run /opt/anaconda3/lib/python3.12/site-packages/ipykernel_launcher.py [ARGUMENTS]
2025-03-01 08:26:34.668 Session state does not function when running a script without `streamlit run`
