In [20]:
import sys
import os
sys.path.append(os.path.abspath("/home/work/disk1/LLM-ljw/agent/analysis-agent"))

In [21]:
import pandas as pd
import numpy as np
from typing import List, Optional
import json
from utils.utils import remove_think


In [22]:
def get_season(month: int) -> str:
    return {12: "Winter", 1: "Winter", 2: "Winter",
            3: "Spring", 4: "Spring", 5: "Spring",
            6: "Summer", 7: "Summer", 8: "Summer",
            9: "Autumn", 10: "Autumn", 11: "Autumn"}[month]

In [23]:
def get_wind_direction_label(degree: float) -> str:
    """将风向角度转换为中文名称（严格匹配气象标准，简洁版）"""
    return ("北", "东北", "东", "东南", "南", "西南", "西", "西北")[
        int(((degree % 360) + 22.5) // 45) % 8
    ]

In [9]:
df = pd.read_excel("../data/test.xlsx")
df

Unnamed: 0,城市,城市编号,区县,区县编号,站点,唯一编码,时间,风速(m/s),风向(°),气压(hPa),气温(℃),湿度(%),降水量(mm)
0,鄂尔多斯市,150600,达拉特旗,150621,双骏园林,150621001,2024-01-01,0,0,909.5,-8.2,75,0
1,鄂尔多斯市,150600,达拉特旗,150621,双骏园林,150621001,2024-01-02,0,0,910.2,-7.2,69,0
2,鄂尔多斯市,150600,达拉特旗,150621,双骏园林,150621001,2024-01-03,0.2,0,905.9,-7.8,69,0
3,鄂尔多斯市,150600,达拉特旗,150621,双骏园林,150621001,2024-01-04,0,0,904.3,-6.5,63,0
4,鄂尔多斯市,150600,达拉特旗,150621,双骏园林,150621001,2024-01-05,0.1,0,908.2,-7.3,54,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5485,鄂尔多斯市,150600,准格尔旗,150622,气象局,150622002,2024-12-27,2.1,150.2,892,-10.8,46,—
5486,鄂尔多斯市,150600,准格尔旗,150622,气象局,150622002,2024-12-28,0.2,83.5,885.3,-8.9,44,—
5487,鄂尔多斯市,150600,准格尔旗,150622,气象局,150622002,2024-12-29,0.4,94.6,878.9,-5.7,42,—
5488,鄂尔多斯市,150600,准格尔旗,150622,气象局,150622002,2024-12-30,0.7,152.7,882.1,-6.2,39,—


In [10]:
from model_provider.model_provider import MODEL_PROVIDER

In [11]:
from agents import Agent, Runner
from typing import Dict

async def get_column_mapping(df: pd.DataFrame, user_keys: List[str]) -> Dict[str, Optional[str]]:
    """
    将用户提供的变量名（如'风速'）映射到df中的真实列名（如'风速(m/s)'）
    """
    column_mapping_agent = Agent(
        name="列名映射agent",
        instructions="""
        请根据下列 DataFrame 列名，判断哪些列名最接近用户提供的变量意图。
        示例：
        【DataFrame列名】:
        ["风速(m/s)", "风向(16方位)", "PM2.5浓度"]

        【用户意图】:
        ["风速", "风向", "PM2.5", "季节"]

        请输出一个JSON对象，表示用户列名与真实列名的映射关系，如：
        {
        "风速": "风速(m/s)",
        "风向": "风向(16方位)",
        "PM2.5": "PM2.5浓度",
        "季节": None
        }
        请严格遵守以下规则：
        1. 请直接输出JSON对象，不要输出任何其他内容。
        2. 如果存在任何一个列名你认为无法匹配，请设置为None。
        """,
        model=MODEL_PROVIDER.get_model('qwen3:1.7b')
    )
    input = f'DataFrame列名：{df.columns.tolist()}\n用户意图：{user_keys}'
    column_map_result = await Runner.run(
        starting_agent=column_mapping_agent,
        input=input
    )

    return column_map_result

In [None]:
from agents import Agent
from agents.mcp.server import MCPServerSse,MCPServerSseParams

mcp_param = MCPServerSseParams(
    url='http://127.0.0.1:8000/sse'
)
mcp = MCPServerSse(
    name='corr',
    params=mcp_param,
    client_session_timeout_seconds=180
)
await mcp.connect()

read_timeout_seconds: 0:03:00


Error in sse_reader: peer closed connection without sending complete message body (incomplete chunked read)


In [29]:
from model_provider.model_provider import MODEL_PROVIDER
from agents import Runner
from agents.agent import StopAtTools
agent = Agent(
    name='asistant',
    instructions='你是一个乐于助人的助手。语言：简体中文',
    model=MODEL_PROVIDER.get_model(None),
    mcp_servers=[mcp],
    tool_use_behavior=StopAtTools(stop_at_tool_names=['correlation_analysis'])
)

In [82]:
res = await Runner.run(
    starting_agent=agent,
    # input='你好',
    input='对./data/corr.csv计算[纳日松]站点的不同季节不同风向方位（不是角度）下的臭氧浓度和风速的相关性',
)

In [85]:
sorted(list('春夏秋冬'))

['冬', '夏', '春', '秋']

In [84]:
res.to_input_list()

[{'content': '对./data/corr.csv计算[纳日松]站点的不同季节不同风向方位（不是角度）下的臭氧浓度和风速的相关性',
  'role': 'user'},
 {'arguments': '{"correlation_vars":["臭氧浓度","风速"],"filters":{"站点":"纳日松"},"group_by":["季节","风向方位"],"read_data_param":{"read_data_method":"PANDAS","read_data_query":"./data/corr.csv"}}',
  'call_id': 'call_nmfq3gz9',
  'name': 'correlation_analysis',
  'type': 'function_call',
  'id': '__fake_id__'},
 {'call_id': 'call_nmfq3gz9',
  'output': '{"type":"text","text":"| 季节 \\\\ 风向方位 | 东 | 东北 | 东南 | 北 | 南 | 西 | 西北 | 西南 |\\n|---|---|---|---|---|---|---|---|---|\\n| 冬 | 0.473 | 0.096 | -0.54 | 0.497 | 数据不足 | 0.657 | 0.597 | 数据不足 |\\n| 夏 | -0.559 | -0.461 | -0.249 | 0.357 | -0.599 | -0.523 | -0.397 | -0.434 |\\n| 春 | -0.095 | -0.225 | -0.999 | 0.141 | 数据不足 | 0.565 | 0.451 | 无数据 |\\n| 秋 | 0.299 | 0.668 | 0.596 | 0.542 | 数据不足 | 0.421 | -0.003 | 无数据 |\\n","annotations":null}',
  'type': 'function_call_output'}]

In [77]:
res.final_output

'<think>\n好的，用户打招呼说“你好”，我需要友好回应。先确认是否有需要调用工具的情况，但看起来只是普通问候，直接回复即可。保持自然，用中文回应。\n\n现在分析用户意图，用户可能只是测试或者开始对话。我需要确保回应亲切，并询问如何帮助他们。暂时不需要调用任何函数，所以直接回复“你好！有什么可以帮助你的吗？”比较合适。\n</think>\n\n你好！有什么可以帮助你的吗？'

In [63]:
from agents import ToolCallOutputItem
isinstance(res.new_items[-1], ToolCallOutputItem)

False

In [44]:
json.loads(res.final_output)['text']

'| 季节 \\ 风向(°) | 0.0 | 0.4 | 0.9 | 1.0 | 10.1 | 104.4 | 106.1 | 106.4 | 108.1 | 108.6 | 11.5 | 110.0 | 110.7 | 111.4 | 111.6 | 113.6 | 113.8 | 114.6 | 114.7 | 114.8 | 115.0 | 115.8 | 118.3 | 118.7 | 12.0 | 120.5 | 121.9 | 122.0 | 123.1 | 123.9 | 135.7 | 138.1 | 14.0 | 14.3 | 14.4 | 140.1 | 140.9 | 142.6 | 143.3 | 144.8 | 148.3 | 15.3 | 15.9 | 152.0 | 158.4 | 16.3 | 16.8 | 165.1 | 166.4 | 169.6 | 17.4 | 17.7 | 172.6 | 18.2 | 18.4 | 182.4 | 189.4 | 19.0 | 19.2 | 192.9 | 195.2 | 197.8 | 198.3 | 2.3 | 2.8 | 20.4 | 20.6 | 20.7 | 20.8 | 204.7 | 208.9 | 21.0 | 21.4 | 22.0 | 22.1 | 225.0 | 237.2 | 24.3 | 240.3 | 245.5 | 248.1 | 248.7 | 249.5 | 258.7 | 259.3 | 26.3 | 260.7 | 263.0 | 263.5 | 264.8 | 265.3 | 268.6 | 269.0 | 269.5 | 270.9 | 272.9 | 273.3 | 273.7 | 274.0 | 274.3 | 275.6 | 276.0 | 276.8 | 277.5 | 278.3 | 278.8 | 279.4 | 279.8 | 28.2 | 28.4 | 28.8 | 281.7 | 282.4 | 282.6 | 282.8 | 285.1 | 286.6 | 287.8 | 288.0 | 289.5 | 289.7 | 291.8 | 292.0 | 293.4 | 293.5 | 294.8 | 295.6 | 296.0 | 

In [17]:
from IPython.display import display, Code, Markdown
display(Markdown(json.loads(json.loads(res.final_output)['text'])['markdown']))

| 季节 \ 风向方位 | 东 | 东北 | 东南 | 北 | 南 | 西 | 西北 | 西南 |
|---|---|---|---|---|---|---|---|---|
| 冬 | 0.473 | 0.096 | -0.54 | 0.497 | 数据不足 | 0.657 | 0.597 | 数据不足 |
| 夏 | -0.559 | -0.461 | -0.249 | 0.357 | -0.599 | -0.523 | -0.397 | -0.434 |
| 春 | -0.095 | -0.225 | -0.999 | 0.141 | 数据不足 | 0.565 | 0.451 | - |
| 秋 | 0.299 | 0.668 | 0.596 | 0.542 | 数据不足 | 0.421 | -0.003 | - |


In [42]:
import pandas as pd
tt = pd.read_excel('/home/work/disk1/LLM-ljw/agent/analysis-agent/data/区控.xlsx')


In [43]:
tt = tt[tt['站点'] == '达旗白塔公园']

In [44]:
tt.columns

Index(['城市', '区县', '站点', '唯一编码', '时间', '风速(m/s)', '风向(°)', '气压(hPa)', '气温(℃)',
       '湿度(%)', '降水量(mm)', '能见度(km)', 'SO2', 'NO2', 'PM10', 'CO', 'O3',
       'PM2.5', 'AQI', '首要污染物', '污染级别', '颜色', '等级'],
      dtype='object')

In [45]:
tt['时间'] = pd.to_datetime(tt['时间'])

In [46]:
tt['season'] = tt['时间'].dt.month.map(get_season)
tt['wind_direction'] = tt['风向(°)'].apply(get_wind_direction_label)

In [47]:
g = tt.groupby(['season','wind_direction'])

In [48]:
from scipy.stats import pearsonr

In [49]:
grouped = tt.groupby(['season','wind_direction'])
var1 = 'O3'
var2 = '风速(m/s)'
result = {}
for keys, group in grouped:
    sub = group[[var1, var2]].dropna()
    key_str = " | ".join(str(k) for k in keys) if isinstance(keys, tuple) else str(keys)
    if sub.shape[0] < 15:
        corr = -100
        result[key_str] = corr
    else:
        corr = pearsonr(sub[var1],sub[var2])[0]
        result[key_str] = corr

In [51]:
result

{'Autumn | 东南': -100,
 'Autumn | 北': 0.14280889411236317,
 'Autumn | 南': -100,
 'Autumn | 西': -100,
 'Autumn | 西北': -100,
 'Autumn | 西南': -100,
 'Spring | 东北': -100,
 'Spring | 北': 0.07780597262534006,
 'Spring | 西': -100,
 'Spring | 西北': -100,
 'Spring | 西南': -100,
 'Summer | 东': 0.17295097068796428,
 'Summer | 东北': -100,
 'Summer | 东南': -0.06369248835381733,
 'Summer | 北': 0.2561486371475464,
 'Summer | 南': -100,
 'Summer | 西': 0.45726847545218297,
 'Summer | 西北': 0.5467668554455424,
 'Summer | 西南': -100,
 'Winter | 北': 0.305119315907697}