# Llama3 RAG Chatbot

In [1]:
#!pip install --upgrade transformers

In [2]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

zsh:1: command not found: nvidia-smi


## 1. Install Packages

In [3]:
!pip install llama-index
!pip install llama_index.embeddings.huggingface
!pip install llama-index-llms-huggingface
!pip install llama-index-llms-groq
!pip install gradio
#!pip install logpai



In [4]:
#import nest_asyncio

#nest_asyncio.apply()

In [5]:
import os

# Check if notebook is running on Google Colab
RunningInCOLAB = 'google.colab' in str(get_ipython())
if RunningInCOLAB:
    from google.colab import userdata
    print("Running on Google Colab")
    os.environ["GROQ_API_KEY"] = userdata.get('GROQ_API_KEY')
    os.environ["HF_TOKEN"] = userdata.get('HF_TOKEN')

## 2. Load LLMs

In [6]:
from llama_index.core import PromptTemplate
from prompts import rag_system_prompt

# This will wrap the default prompts that are internal to llama-index
query_wrapper_prompt = PromptTemplate("<|USER|>{query_str}<|ASSISTANT|>")


## 3. Create a Custom Router

User's query will be classified into one of the following categories:
- summary
- retrieve
- generation

**Question Examples**

In [7]:
questions = [
    "Who are you?",
    "How are you doing today?",
    "What is a Brute Force Attack?",
    "What kind of log file is this?",
    "Summarize the log.",
    "What does each column in this log mean?", #"What does each column mean in this log?",
    "Are there any abnormal activities?",
    "Show the abnormal activities.",
    "Identify the main anomaly activities.",
    "What happened at 03:28:22?",
    "What happened at '03:28:22'?",
    "What is 'combo' in this log?",
    "How many times does 'ALERT' occur in the log file?",
    "How many times did the user login?",
    "Which users have tried to log on to the FTP server?",
    "What is the event Event5?",
    "Which users are trying to log on in the event of an Event7?",
    "Which IP addresses are trying to log in for an Event7?",
]

# Create A Chatbot With Gradio

In [None]:
from types import resolve_bases
import re
import gradio as gr
from pathlib import Path
import threading
import time
from utils import get_query_engines, process_response, get_event_logs
from query import query_with_engines, generate_questions
from parser import retry_log_parsing
import json
from datetime import datetime


query_engines = None

# Initialize messages as a list with a system prompt
messages = [{"role": "system", "content": rag_system_prompt}]
stop_event = threading.Event()

def save_conversation(conversation, response_time, extra_info):
    if not conversation:
        print("No conversation to save.")
        return
    else:
        print(f"Saving conversation... {conversation}")

    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    conversation_data = [
        {
            "user_input": conversation[0],
            "response": conversation[1],
            "extra_info": extra_info,
            "timestamp": timestamp,
            "response_time": f"{response_time:.2f} seconds"
        }
    ]

    try:
        with open("conversation.json", "r+") as f:
            try:
                existing_data = json.load(f)
            except json.JSONDecodeError as e:
                print(f"Error parsing JSON: {str(e)}")
                existing_data = []
            except Exception as e:
                print(f"Unexpected error: {str(e)}")
                return
    except FileNotFoundError:
        existing_data = []

    existing_data.extend(conversation_data)

    try:
        with open("conversation.json", "w") as f:
            json.dump(existing_data, f, indent=4)
    except Exception as e:
        print(f"Error saving conversation: {str(e)}")

def print_like_dislike(x: gr.LikeData):
    # Print like/dislike data
    print(x.index, x.value, x.liked)

def add_message(message, history):
    global messages
    if history is None:
        history = []
    if message is not None and message != "":
        messages.append({"role": 'user', 'content': message})
        history.append((message, None))
    return "", "", history

def bot(history):
    global messages
    start_time = time.time()
    stop_event.clear()  # Clear any previous stop event
    if query_engines is None:
        gr.Warning("Warning: Please upload a log file first.")
        return history

    # Get the last message from the history
    if history is None or len(history) == 0:
        gr.Warning("Warning: Please enter a message.")
    else:
        text = history[-1][0]
        choice, response, context = query_with_engines(text, query_engines)
        for res in process_response(response):
            if stop_event.is_set():
                print("Stopping the bot response generation.")
                break
            history[-1][1] = res
            yield history
    # Print the last message and response
    print(history[-1][1])

    #template_ids = re.findall(r'\b[0-9a-f]{8}\b', history[-1][1])
    template_ids = re.findall(r'\bEvent\d+\b', history[-1][1])

    if template_ids:
      template_ids = list(dict.fromkeys(template_ids))
      print("Template IDs found:", template_ids)
    else:
      print("No template IDs found.")

    end_time = time.time()

    keywords = context.get('keywords') if context else None
    events = context.get('events') if context else None

    # Router + Time taken + Tokens/second

    extra_info = (f"\n> <sub><sup> **Route**: {choice.title()} </sup></sub>"
        f"{'' if not keywords else f'&nbsp;&nbsp;&nbsp;<sub><sup>üîë **Keywords**: {keywords}</sup></sub>'}"
        f"{'' if not events else f'&nbsp;&nbsp;&nbsp;<sub><sup>üîë **Events**: {events}</sup></sub>'}"
        f"&nbsp;&nbsp;&nbsp;<sub><sup>‚è∞ **Time Taken**: {end_time - start_time:.2f} s</sup></sub>"
        f"&nbsp;&nbsp;&nbsp;<sub><sup>‚úç **Tokens/Second**: {len(history[-1][1]) / (end_time - start_time):.2f}</sup></sub>"
    )

    # Node info
    if hasattr(response, 'source_nodes') and len(response.source_nodes) > 0:
        print("Length of source_nodes:", len(response.source_nodes))
        extra_info += (
            f"&nbsp;&nbsp;&nbsp;<sub><sup>üîó **Source Nodes**: {len(response.source_nodes)}</sup></sub>"
            f"\n<details>"
            f"\n<summary><sub><sup>üìñ Show Nodes</sup></sub></summary>"
            "\n\n```js\n"
            + "\n".join([f"Node ID: {node.node_id}\n{node.get_text()}\n" for node in response.source_nodes])
            + "\n```\n</details>"
        )

    # Context
    if context and context.get('context'):
        extra_info += (
            f"&nbsp;&nbsp;&nbsp;<sub><sup>üîé **Context**: {len(context['context'])}</sup></sub>"
            f"\n<details>"
            f"\n<summary><sub><sup>üîé Show Context</sup></sub></summary>"
            "\n\n```txt\n"
            + "\n".join([f"{line.strip()}" for line in context['context']])
            + "\n```\n</details>"
        )


    # Get logs from with template_ids[0]
    event_log_dict = get_event_logs(query_engines, template_ids)
    #for event_id, logs in event_log_dict.items():
    #    print(f"EventId: {event_id}, Logs: {logs}")

    # Template info
    for template_id in template_ids:
        if template_id in event_log_dict:
            extra_info += (
                f"\n<details>"
                f"\n<summary><sub><sup>üìù Show Logs for Event: {template_id}</sup></sub></summary>"
                "\n\n```js\n"
                + "\n".join([f"{log_line}" for log_line in event_log_dict[template_id]])
                + "\n```\n</details>"
            )


    # Append extra info to the last message
    history[-1][1] += extra_info
    yield history
    response_time = end_time - start_time
    print(f"Time taken : {response_time:.2f} seconds.")
    save_conversation(history[-1], response_time, extra_info)


def first_bot(history):
    global messages
    global query_engines
    start_time = time.time()
    stop_event.clear()  # Clear any previous stop event
    if query_engines is None:
        gr.Warning("Warning: Please upload a log file first.")
        return history

    log_path = query_engines["log_path"]
    print(f"log_path: {log_path}")

    # log parsing
    current_path = os.getcwd()
    output_dir = os.path.join(current_path, 'result')
    retry_log_parsing(log_path, output_dir)

    prompt = "Generate a summary and some possible questions that the user might ask for this log file."
    response = generate_questions(prompt , query_engines)
    #response = f"This is a test."
    #history.append((None, "response"))
    for res in process_response(response):
            if stop_event.is_set():
                print("Stopping the bot response generation.")
                break
            yield [(None, res)]

    end_time = time.time()
    response_time = end_time - start_time
    # Print the last message and response
    history.append((None, res))
    print(history[-1][1])
    print(f"Time taken : {response_time:.2f} seconds.")
    yield history
    save_conversation([prompt, history[-1][1]], response_time, "")


def retry(history):
    global messages
    # Set the stop event
    stop_event.set()
    time.sleep(1)
    input_text = ""
    if len(messages) > 1:
        messages.pop()
    if len(history) > 0:
        input_text = history[-1][0]
        history.pop()
    return history, input_text

def undo(history):
    global messages
    # Set the stop event
    stop_event.set()
    time.sleep(1)
    if len(messages) > 1:
        messages.pop()
    if len(history) > 0:
        history.pop()
    return history

def clear(history):
    global messages
    # Set the stop event
    stop_event.set()
    time.sleep(1)
    messages = [{"role": "system", "content": rag_system_prompt}]
    history = []
    return history

def stop():
    stop_event.set()
    time.sleep(1)
    print(f"Stopping the bot response generation.")
    gr.Warning("Warning: Stopped by user")

def upload_file(filepath):
    global query_engines
    start_time = time.time()
    if filepath is not None:
        name = Path(filepath).name
        print("File uploaded: ", name)
        query_engines = get_query_engines(filepath)
        gr.Info("File uploaded successfully, and you can now start chatting.")
        end_time = time.time()
        print(f"Time taken : {end_time - start_time:.2f} seconds.")
        node_text = f"Total Nodes <br>{len(query_engines['nodes'])} "
        html = f"<div style='text-align:center;width:100px;'>{node_text}</div>"
        content = open(filepath, "r").read()
        return [
                gr.File(filepath, visible=True, min_width=800),
                gr.HTML(value=html, visible=True),
                gr.Code(value=content, label="File Content", visible=True),

            ]
    else:
        print("No file uploaded.")
        return gr.File("", visible=False)

def insert_fre_q(question):
    print(question)
    return question

with gr.Blocks() as demo:
    gr.Markdown("## LLMLogAnalyser Chatbot")
    # tab 1 Chatbot
    with gr.Tab("Chatbot"):
        chatbot = gr.Chatbot(
            [],
            label="LLMLogAnalyser Chatbot",
            avatar_images=("https://static.vecteezy.com/system/resources/previews/002/592/172/non_2x/smile-emoji-pop-art-line-style-icon-free-vector.jpg",
                        "https://images.emojiterra.com/google/noto-emoji/unicode-15/bw/512px/1f916.png"),
            bubble_full_width=False,
            height="63vh",
            layout="bubble",
            show_copy_button=True,
            placeholder="### Hi I am a LLMLogAnalyser Chatbot! \n### I can help you analyze log files. \n### Please upload a log file above to get started.",
        )

        with gr.Row():
            retry_but = gr.Button("üîÑ Retry", elem_id="retry", variant="secondary", size="small")
            undo_but = gr.Button("‚Ü©Ô∏è Undo", elem_id="undo", variant="secondary", size="small")
            clear_but = gr.Button("üóëÔ∏è  Clear", elem_id="clear", variant="secondary", size="small")
            stop_but = gr.Button("üõë Stop", elem_id="stop", variant="secondary", size="small")

        chat_input = gr.Textbox(lines=1, placeholder="Enter your question or select a frequent question below...", label="User Input")

        frequent_queries = gr.Dropdown(questions, label="Frequent Queries", info="These are the most frequently asked questions", allow_custom_value=True)

        retry_but.click(retry, inputs=chatbot, outputs=[chatbot, chat_input])
        undo_but.click(undo, inputs=chatbot, outputs=chatbot)
        clear_but.click(clear, inputs=chatbot, outputs=chatbot)
        stop_but.click(stop, inputs=None, outputs=None)

        #frequent_queries.change(insert_fre_q, inputs=frequent_queries, outputs=chat_input)
        fre_msg = frequent_queries.select(add_message, [frequent_queries, chatbot], [chat_input, frequent_queries, chatbot])
        fre_bot_msg = fre_msg.then(bot, chatbot, chatbot, api_name="bot_response")


        chat_msg = chat_input.submit(add_message, [chat_input, chatbot], [chat_input, frequent_queries, chatbot])
        bot_msg = chat_msg.then(bot, chatbot, chatbot, api_name="bot_response")

        chatbot.like(print_like_dislike, None, None)

    # tab 2
    with gr.Tab("Upload Log File"):
        with gr.Row():
            file_output = gr.File(label="Uploaded file", visible=False)
            node_text = gr.HTML(value="", visible=False)
            upload_button = gr.UploadButton("Upload a log file", file_count="single", file_types=["text"])
        with gr.Row():
            file_content = gr.Code(value="", label="File Content", visible=False)
        # Upload file
        upload_action = upload_button.upload(upload_file, upload_button, [file_output, node_text, file_content])
        #upload_action.then(add_message, [gr.Textbox(lines=1, value="This is a test", visible=False), chatbot], [chat_input, frequent_queries, chatbot])
        upload_action.then(first_bot, chatbot , chatbot, api_name="bot_response")

demo.queue()
demo.launch(share=True, debug=True)


Running on local URL:  http://127.0.0.1:7860


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Running on public URL: https://f7defadd978cef29de.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


File uploaded:  Linux_20k.txt
[SentenceSplitter] Total number of nodes: 695
File saved to: /Users/peng/Documents/Curtin Uni/2024-Semester-1/COMP6016 Project in Computer Sci 2/workspace/LLMLog/notebooks/llama3_chatbot/modules/Linux_20k.txt
Old 'result' folder cleared.
Time taken : 39.56 seconds.
log_path: /Users/peng/Documents/Curtin Uni/2024-Semester-1/COMP6016 Project in Computer Sci 2/workspace/LLMLog/notebooks/llama3_chatbot/modules/Linux_20k.txt
Lines: 
Nov 20 01:41:58 combo kernel: Out of Memory: Killed process 15922 (httpd).
Sep 19 05:57:41 combo smartd[1788]: smartd has fork()ed into background mode. New PID=1788. 
Nov 22 02:15:35 combo kernel: Out of Memory: Killed process 30803 (httpd).
Dec  6 05:50:20 combo kernel: Out of Memory: Killed process 8960 (httpd).
Nov  5 06:41:28 combo ftpd[11288]: connection from 84.57.172.16 (dslb-084-057-172-016.pools.arcor-ip.net) at Sat Nov  5 06:41:28 2005 
Oct  8 22:31:30 combo ftpd[7492]: connection from 64.34.92.187 () at Sat Oct  8 22:31:

In [9]:

#import nest_asyncio

#nest_asyncio.apply()