<a href="https://colab.research.google.com/github/maojinfang/AI-Agent-Journey-2026/blob/main/Day01_Data_Cleaning_Agent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import json

# === 模拟：Agent 从网络上抓取的原始脏数据 ===
# 场景：你的 Agent 搜索到了 DeepSeek 的价格，但数据里混杂着字符串和空格
raw_response = """
{
    "source": "Tavily_Search_API",
    "timestamp": "2026-01-26",
    "results": [
        {"model": "DeepSeek-V3", "cost": 10, "currency": "CNY"},
        {"model": "GPT-4o", "cost": "200", "currency": "USD"},
        {"model": "  Llama-3  ", "cost": 0, "currency": "OpenSource"}
    ]
}
"""

def parse_agent_data(json_str):
    """
    清洗数据的函数 - DeepInsight 的'消化系统'
    """
    try:
        # 1. 解码：把字符串变成字典
        data = json.loads(json_str)
        print(f"✅ 数据来源: {data.get('source')}")

        clean_list = []

        # 2. 循环处理每一条结果
        for item in data.get("results", []):
            # 3. 清洗模型名称 (去空格)
            name = item.get("model", "Unknown").strip()

            # 4. 转换价格 (防御性编程：防止价格是字符串)
            raw_cost = item.get("cost")
            if isinstance(raw_cost, str):
                cost = int(raw_cost)
            else:
                cost = raw_cost

            clean_list.append({"name": name, "price": cost})

        return clean_list

    except json.JSONDecodeError:
        print("❌ 严重错误：抓取到的不是合法的 JSON！")
        return []
    except Exception as e:
        print(f"⚠️ 发生未知错误: {e}")
        return []

# === 运行测试 ===
output = parse_agent_data(raw_response)
print("\n清洗后的最终情报:")
print(output)

✅ 数据来源: Tavily_Search_API

清洗后的最终情报:
[{'name': 'DeepSeek-V3', 'price': 10}, {'name': 'GPT-4o', 'price': 200}, {'name': 'Llama-3', 'price': 0}]
