In [1]:
import gradio as gr
from llm_chat import generate_response  # Import LLM function

2025-02-27 08:20:46.878914: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-02-27 08:20:46.878993: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-02-27 08:20:46.880983: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-02-27 08:20:46.893514: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

# Mistral chatbot

In [5]:
MAX_HISTORY = 2  # Keep last 5 exchanges
SUMMARIZE_AFTER = 3  # Summarize after every 6 exchanges
def summarize_conversation(history):
    """
    Summarizes the conversation to maintain context efficiently.
    """
    summary_prompt = "Summarize this conversation briefly:\n\n"

    for user_msg, bot_reply in history:
        summary_prompt += f"User: {user_msg}\nAssistant: {bot_reply}\n"

    summary_prompt += "\nSummary:"

    # Use Mistral-7B to generate summary
    summary = generate_response(summary_prompt)  
    return summary.strip()

def respond(message, history, system_message, max_tokens, temperature, top_p):
    if history is None:
        history = []

    # Summarize conversation if needed
    if len(history) >= SUMMARIZE_AFTER:
        summary = summarize_conversation(history[:SUMMARIZE_AFTER])
        history = [(f"Summary: {summary}", "")] + history[-MAX_HISTORY:]

    # Format the conversation
    formatted_history = [{"role": "system", "content": system_message}]

    for user_msg, bot_reply in history:
        formatted_history.append({"role": "user", "content": user_msg})
        formatted_history.append({"role": "assistant", "content": bot_reply})

    formatted_history.append({"role": "user", "content": message})

    # Generate response using Mistral-7B
    response = generate_response(message)  
    yield response  

    history.append((message, response))  
    return history  

interface = gr.ChatInterface(
    fn=respond,
    title="Mistral-7B Chatbot 🤖",
    description="💬 Chat with a fine-tuned Mistral-7B model for interactive conversations.",
    theme="soft",
    additional_inputs=[
        gr.Textbox(value="You are a friendly chatbot.", label="🛠 System Message", interactive=True),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="📏 Max Tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="🔥 Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="🎯 Top-p Sampling"),
    ],
)

interface.launch(share=True)




* Running on local URL:  http://127.0.0.1:7863
* Running on public URL: https://5d04053760fdd44350.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


# Zephyr Chatbot

In [4]:
import gradio as gr
from huggingface_hub import InferenceClient

client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")

MAX_HISTORY = 2  # Keep last 5 exchanges
SUMMARIZE_AFTER = 3  # Summarize after every 6 exchanges

def summarize_conversation(history):
    """
    Summarizes the conversation using Zephyr-7B API.
    """
    summary_prompt = "Summarize this conversation briefly:\n\n"

    for user_msg, bot_reply in history:
        summary_prompt += f"User: {user_msg}\nAssistant: {bot_reply}\n"

    summary_prompt += "\nSummary:"

    # Call the Zephyr-7B API correctly
    response = client.chat_completion(
        messages=[{"role": "user", "content": summary_prompt}], 
        max_tokens=100,  
        temperature=0.3  
    )

    # Extract the summary text
    summary = response["choices"][0]["message"]["content"]  # Correct structure

    return summary.strip()

def respond(message, history, system_message, max_tokens, temperature, top_p):
    if history is None:
        history = []

    # Summarize conversation if history is long
    if len(history) >= SUMMARIZE_AFTER:
        summary = summarize_conversation(history[:SUMMARIZE_AFTER])  # Summarize only the first N messages
        history = [(f"Summary: {summary}", "")] + history[-MAX_HISTORY:]  # Keep summary + latest history

    # Format conversation for Zephyr-7B
    formatted_history = [{"role": "system", "content": system_message}]

    for user_msg, bot_reply in history:
        formatted_history.append({"role": "user", "content": user_msg})
        formatted_history.append({"role": "assistant", "content": bot_reply})

    formatted_history.append({"role": "user", "content": message})

    # Get response from Zephyr-7B
    response = ""
    for msg in client.chat_completion(
        formatted_history,  
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = msg.choices[0].delta.content
        response += token
        yield response

    history.append((message, response))  # Maintain history

    return history  # Return updated history

# Create Gradio ChatInterface
demo = gr.ChatInterface(
    fn=respond,
    title="Zephyr Chatbot 🤖",
    description="💬 Chat with a fine-tuned Mistral-7B model for interactive conversations.",
    theme="soft",
    additional_inputs=[
        gr.Textbox(value="You are a friendly chatbot.", label="🛠 System Message", interactive=True),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="📏 Max Tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="🔥 Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="🎯 Top-p Sampling"),
    ],
)

demo.launch(share=True)




* Running on local URL:  http://127.0.0.1:7862
* Running on public URL: https://24bd81660fe7b01b5b.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


