In [None]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
简单的AI对话系统 - 单文件版本
使用本地Ollama部署的Gemma 3模型
"""

import os
import json
import asyncio
import httpx
import gradio as gr
from typing import List, Dict, Any, Optional
from tenacity import retry, stop_after_attempt, wait_exponential

# ===================== 配置部分 =====================
OLLAMA_BASE_URL = "http://localhost:11434"  # Ollama默认地址
MODEL_NAME = "gemma:3b"  # 使用的模型名称，根据实际部署情况调整
DEFAULT_SYSTEM_PROMPT = """你是一个友好、智能的AI助手。
你的目标是提供有用、准确的回答，并与用户进行自然的对话。

请遵循以下原则：
1. 保持回答简洁明了
2. 如果不确定答案，坦诚承认
3. 避免有害、不适当或违法的内容
4. 尊重用户的隐私
5. 保持友好和专业的语气

当用户问你问题时，请尽可能地提供帮助。
"""


# ===================== 模型服务部分 =====================
class OllamaService:
    """
    Ollama服务类，用于与本地Ollama API交互
    """

    def __init__(
            self,
            model: str = MODEL_NAME,
            base_url: str = OLLAMA_BASE_URL,
            temperature: float = 0.7,
    ):
        """
        初始化Ollama服务

        Args:
            model: 模型名称
            base_url: Ollama API基础URL
            temperature: 温度参数
        """
        self.model = model
        self.base_url = base_url
        self.temperature = temperature

    async def check_model_availability(self) -> bool:
        """
        检查模型是否可用

        Returns:
            bool: 模型是否可用
        """
        try:
            async with httpx.AsyncClient() as client:
                response = await client.get(f"{self.base_url}/api/tags")
                models = response.json().get("models", [])
                return any(m.get("name") == self.model for m in models)
        except Exception as e:
            print(f"检查模型可用性时出错: {e}")
            return False

    @retry(stop=stop_after_attempt(3), wait=wait_exponential(min=1, max=10))
    async def generate_text(self, messages: List[Dict[str, str]]) -> str:
        """
        生成文本

        Args:
            messages: 消息列表

        Returns:
            str: 生成的文本
        """
        # 转换消息格式以适应Ollama API
        prompt = ""
        system_content = ""

        for msg in messages:
            if msg["role"] == "system":
                system_content = msg["content"]
            elif msg["role"] == "user":
                prompt += f"用户: {msg['content']}\n"
            elif msg["role"] == "assistant":
                prompt += f"助手: {msg['content']}\n"

        # 最后添加助手前缀
        prompt += "助手: "

        payload = {
            "model": self.model,
            "prompt": prompt,
            "system": system_content,
            "temperature": self.temperature,
            "stream": False  # 不使用流式响应
        }

        try:
            async with httpx.AsyncClient() as client:
                response = await client.post(
                    f"{self.base_url}/api/generate",
                    json=payload,
                    timeout=60.0
                )

                response.raise_for_status()
                result = response.json()

                return result.get("response", "")
        except Exception as e:
            print(f"生成文本时出错: {e}")
            return f"抱歉，生成回复时出现错误: {str(e)}"


# ===================== 聊天代理部分 =====================
class ChatAgent:
    """
    聊天代理类，处理对话逻辑
    """

    def __init__(self, config: Dict[str, Any] = None):
        """
        初始化聊天代理

        Args:
            config: 代理配置
        """
        self.config = config or {}
        self.history = []
        self.ollama_service = OllamaService(
            model=config.get("model", MODEL_NAME),
            temperature=config.get("temperature", 0.7)
        )
        self.system_prompt = config.get("system_prompt", DEFAULT_SYSTEM_PROMPT)

    async def process(self, user_input: str, **kwargs) -> str:
        """
        处理用户输入并返回响应

        Args:
            user_input: 用户输入
            **kwargs: 其他参数

        Returns:
            str: 代理响应
        """
        # 准备消息列表
        messages = [{"role": "system", "content": self.system_prompt}]

        # 添加历史消息
        for item in self.history:
            messages.append({"role": "user", "content": item["user"]})
            messages.append({"role": "assistant", "content": item["assistant"]})

        # 添加当前用户输入
        messages.append({"role": "user", "content": user_input})

        # 调用LLM获取响应
        response = await self.ollama_service.generate_text(messages)

        # 添加到历史
        self.add_to_history(user_input, response)

        return response

    def add_to_history(self, user_input: str, response: str):
        """
        将对话添加到历史记录

        Args:
            user_input: 用户输入
            response: 代理响应
        """
        self.history.append({"user": user_input, "assistant": response})

    def get_history(self) -> List[Dict[str, str]]:
        """
        获取对话历史

        Returns:
            List[Dict[str, str]]: 对话历史
        """
        return self.history

    def clear_history(self):
        """
        清空对话历史
        """
        self.history = []


# ===================== UI部分 =====================
def create_chat_interface():
    """
    创建聊天界面

    Returns:
        gr.Blocks: Gradio界面
    """
    # 创建聊天代理
    chat_agent = ChatAgent(config={
        "model": MODEL_NAME,
        "temperature": 0.7,
        "system_prompt": DEFAULT_SYSTEM_PROMPT
    })

    with gr.Blocks(title="Gemma 3 聊天助手") as demo:
        gr.Markdown("# Gemma 3 聊天助手")
        gr.Markdown("使用本地Ollama部署的Gemma 3模型")

        # 聊天历史
        chatbot = gr.Chatbot(
            height=500,
            show_copy_button=True,
            elem_id="chatbot",
            label="对话历史"
        )

        # 输入区域
        with gr.Row():
            with gr.Column(scale=8):
                user_input = gr.Textbox(
                    placeholder="在这里输入您的问题...",
                    label="用户输入",
                    lines=2,
                    elem_id="user-input"
                )

            with gr.Column(scale=1):
                submit_btn = gr.Button("发送", variant="primary")
                clear_btn = gr.Button("清空")

        # 高级选项
        with gr.Accordion("高级选项", open=False):
            with gr.Row():
                temperature = gr.Slider(
                    minimum=0.0,
                    maximum=1.0,
                    value=0.7,
                    step=0.1,
                    label="温度参数"
                )

                model_name = gr.Dropdown(
                    choices=["gemma:3b", "gemma:7b", "gemma:2b-instruct"],
                    value=MODEL_NAME,
                    label="模型选择"
                )

            system_prompt = gr.Textbox(
                value=DEFAULT_SYSTEM_PROMPT,
                label="系统提示词",
                lines=5
            )

        # 状态显示
        status = gr.Markdown("状态: 就绪")

        # 辅助函数
        async def check_model():
            is_available = await chat_agent.ollama_service.check_model_availability()
            if is_available:
                return "状态: 模型已加载，可以开始对话"
            else:
                return f"状态: 警告 - 模型 {chat_agent.ollama_service.model} 可能未加载，请确保Ollama正在运行并已下载该模型"

        async def chat_response(
                user_input: str,
                history: List[List[str]],
                temp: float,
                model: str,
                sys_prompt: str
        ):
            if not user_input.strip():
                yield history, ""
                return  # 不返回值，只结束函数

            # 更新历史
            history.append([user_input, None])
            yield history, ""

            # 更新代理配置
            chat_agent.ollama_service.temperature = temp
            chat_agent.ollama_service.model = model
            chat_agent.system_prompt = sys_prompt

            # 获取响应
            response = await chat_agent.process(user_input)

            # 更新历史
            history[-1][1] = response
            yield history, ""

        def clear_conversation():
            # 清空代理历史
            chat_agent.clear_history()
            return [], ""

        # 注册回调
        submit_btn.click(
            fn=chat_response,
            inputs=[user_input, chatbot, temperature, model_name, system_prompt],
            outputs=[chatbot, user_input]
        )

        clear_btn.click(
            fn=clear_conversation,
            inputs=[],
            outputs=[chatbot, user_input]
        )

        # 页面加载时检查模型状态
        demo.load(
            fn=check_model,
            inputs=None,
            outputs=status
        )

    return demo


# ===================== 主函数 =====================
def main():
    """
    主函数
    """
    # 创建并启动Gradio应用
    demo = create_chat_interface()

    # 设置启动参数
    port = int(os.getenv("GRADIO_PORT", 7860))
    share = os.getenv("GRADIO_SHARE", "false").lower() == "true"

    # 启动应用
    demo.launch(
        server_name="0.0.0.0",  # 允许外部访问
        server_port=port,
        share=share,
        debug=True
    )


if __name__ == "__main__":
    main()

  from .autonotebook import tqdm as notebook_tqdm
  chatbot = gr.Chatbot(


* Running on local URL:  http://0.0.0.0:7860
* To create a public link, set `share=True` in `launch()`.
