# 定义模型

In [22]:
from langchain_openai import AzureChatOpenAI, ChatOpenAI
from dotenv import load_dotenv, find_dotenv
import os

_ = load_dotenv(find_dotenv())

model = AzureChatOpenAI(
    openai_api_version="2024-02-01",
    azure_deployment=os.getenv('AZURE_OPENAI_DEPLOYMENT'),
    temperature=0,
)
model_35 = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)

In [23]:
model_35.invoke('介绍下NanoDet模型')

AIMessage(content='NanoDet是一种轻量级目标检测模型，专门设计用于在资源受限的环境下进行实时目标检测。该模型基于MobileNetV3网络结构，通过精心设计的网络结构和优化策略，实现了高效的目标检测性能。\n\nNanoDet模型具有以下特点：\n1. 轻量级：NanoDet模型采用了轻量级的MobileNetV3网络结构，减少了模型参数和计算量，适合在资源受限的设备上部署。\n2. 高效性能：尽管是轻量级模型，NanoDet在目标检测任务上表现出色，具有较高的准确率和检测速度。\n3. 实时性能：NanoDet模型设计用于实时目标检测，能够在较短的时间内完成目标检测任务。\n4. 易部署：NanoDet模型可以轻松部署在各种嵌入式设备和移动设备上，为各种应用场景提供目标检测功能。\n\n总的来说，NanoDet是一种高效、轻量级的目标检测模型，适合在资源受限的环境下进行实时目标检测任务。', response_metadata={'token_usage': {'completion_tokens': 349, 'prompt_tokens': 15, 'total_tokens': 364}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-d3155bb5-215a-4370-912d-5d1bc8c62939-0')

## Function calling

In [33]:
from langchain_core.tools import tool

@tool
def add(a: int, b: int) -> int:
    """Adds a and b."""
    return a + b

@tool
def multiply(a: int, b: int) -> int:
    """Multiplies a and b."""
    return a * b


@tool
def send_email(address: str, content: str) -> str:
    """Send an email with certain content to address provided"""
    print(f"The {content} is sent to {address}")
    return "success"

tools = [add, multiply, send_email]
model_with_tools = model_35.bind_tools(tools)

model_with_tools

RunnableBinding(bound=ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x1129ba7a0>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x1129bbeb0>, model_name='gpt-3.5-turbo-0125', temperature=0.0, openai_api_key=SecretStr('**********'), openai_proxy=''), kwargs={'tools': [{'type': 'function', 'function': {'name': 'add', 'description': 'add(a: int, b: int) -> int - Adds a and b.', 'parameters': {'type': 'object', 'properties': {'a': {'type': 'integer'}, 'b': {'type': 'integer'}}, 'required': ['a', 'b']}}}, {'type': 'function', 'function': {'name': 'multiply', 'description': 'multiply(a: int, b: int) -> int - Multiplies a and b.', 'parameters': {'type': 'object', 'properties': {'a': {'type': 'integer'}, 'b': {'type': 'integer'}}, 'required': ['a', 'b']}}}, {'type': 'function', 'function': {'name': 'send_email', 'description': 'send_email(address: str, content: str) -> str - Send an email with certain content to address provided', 'para

In [34]:
query = "What is 3 * 12? Also, what is 11 + 49? Please send the result to my friend with address nathan@gmail.com"

model_with_tools.invoke(query).tool_calls

[{'name': 'multiply',
  'args': {'a': 3, 'b': 12},
  'id': 'call_MGRwnMadov5EtQ5h0jfmCmtq'},
 {'name': 'add',
  'args': {'a': 11, 'b': 49},
  'id': 'call_aEGpmzSxFmX4zPutMf8XdmwD'},
 {'name': 'send_email',
  'args': {'address': 'nathan@gmail.com',
   'content': 'The result of 3 * 12 is 36. The result of 11 + 49 is 60.'},
  'id': 'call_oadjEP1qfE9nWrH0sAiKzmUZ'}]

In [35]:
from langchain_core.messages import HumanMessage, ToolMessage

messages = [HumanMessage(query)]
ai_msg = model_with_tools.invoke(messages)
messages.append(ai_msg)
for tool_call in ai_msg.tool_calls:
    selected_tool = {"add": add, "multiply": multiply, "send_email": send_email}[tool_call["name"].lower()]
    tool_output = selected_tool.invoke(tool_call["args"])
    print(tool_call)
    messages.append(ToolMessage(tool_output, tool_call_id=tool_call["id"]))
messages

{'name': 'multiply', 'args': {'a': 3, 'b': 12}, 'id': 'call_pnkNRgUN0K1oqwoEiJ4zAAIa'}
{'name': 'add', 'args': {'a': 11, 'b': 49}, 'id': 'call_QEcUfBPDIwqyJjYzr60FemLC'}
The The result of 3 * 12 is 36. The result of 11 + 49 is 60. is sent to nathan@gmail.com
{'name': 'send_email', 'args': {'address': 'nathan@gmail.com', 'content': 'The result of 3 * 12 is 36. The result of 11 + 49 is 60.'}, 'id': 'call_86TWbLZlHQZWBveMqslG0bWv'}


[HumanMessage(content='What is 3 * 12? Also, what is 11 + 49? Please send the result to my friend with address nathan@gmail.com'),
 AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_pnkNRgUN0K1oqwoEiJ4zAAIa', 'function': {'arguments': '{"a": 3, "b": 12}', 'name': 'multiply'}, 'type': 'function'}, {'id': 'call_QEcUfBPDIwqyJjYzr60FemLC', 'function': {'arguments': '{"a": 11, "b": 49}', 'name': 'add'}, 'type': 'function'}, {'id': 'call_86TWbLZlHQZWBveMqslG0bWv', 'function': {'arguments': '{"address": "nathan@gmail.com", "content": "The result of 3 * 12 is 36. The result of 11 + 49 is 60."}', 'name': 'send_email'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 93, 'prompt_tokens': 167, 'total_tokens': 260}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-e47590b6-5a95-4cb6-a97e-177e6eb74a52-0', tool_calls=[{'name': 'multiply', 'args': {'a': 3, 'b': 12}, 'id': 'call_p

In [36]:
model_with_tools.invoke(messages)

AIMessage(content='I have calculated the results for you:\n- 3 * 12 = 36\n- 11 + 49 = 60\n\nI have also sent the results to your friend at the email address nathan@gmail.com.', response_metadata={'token_usage': {'completion_tokens': 47, 'prompt_tokens': 279, 'total_tokens': 326}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-12ff43fe-82b5-4a1b-bfb6-b710e64ec8f0-0')

## Prompt and Parser

In [3]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

prompt = ChatPromptTemplate.from_messages(
    [
        ('system', '你是一个专业的软件安全测试工程师，善于发现HTML网页中的漏洞'),
        ('user', '请指出这段网页HTLM代码中的问题并做详细解释，结合HTML实际代码给出解决建议:\n{html}')
    ]
)

with open('Aidd.html', "r", encoding="utf-8") as f:
    html = f.read()

output_parser = StrOutputParser()

chain = prompt | model | output_parser

response = chain.invoke({"html": html})
print(response)

这段HTML代码中存在的问题主要有以下几点：

1. 缺少了对于HTML注入攻击的防护。在HTML代码中，如果用户输入被直接插入到页面中，可能会导致HTML注入攻击。比如，用户可以在输入框中输入一段脚本，这段脚本会被插入到HTML中并执行。这可能会导致各种安全问题，比如窃取用户数据、在用户的浏览器中执行恶意操作等。

2. 缺少了对于跨站脚本攻击（XSS）的防护。在这段HTML代码中，没有对用户输入进行适当的过滤或转义，可能会导致XSS攻击。比如，攻击者可以在URL参数中插入一段脚本，当其他用户访问这个URL时，这段脚本会在他们的浏览器中执行。

3. 缺少了对于跨站请求伪造（CSRF）的防护。在这段HTML代码中，没有使用CSRF令牌来防止CSRF攻击。比如，攻击者可以构造一个请求，诱导用户去点击，从而在用户不知情的情况下以用户的身份执行操作。

解决建议：

1. 对用户输入进行适当的过滤或转义，防止HTML注入攻击。可以使用一些库，比如OWASP的Java Encoder库，来对用户输入进行转义。

2. 使用内容安全策略（CSP）来防止XSS攻击。CSP可以限制浏览器只加载和执行来自特定来源的脚本，从而防止XSS攻击。

3. 使用CSRF令牌来防止CSRF攻击。在每个需要防止CSRF攻击的表单中，都应该包含一个CSRF令牌。服务器应该验证这个令牌，只有当令牌验证通过时，才执行相应的操作。


In [8]:
from langchain_openai import AzureOpenAI
from langchain_core.messages import HumanMessage
llm = AzureOpenAI(
    openai_api_version="2024-02-01", 
    azure_deployment=os.getenv('AZURE_OPENAI_DEPLOYMENT')
)
# print(llm.invoke('请告诉我网页一般存在哪些漏洞？'))

# print(llm.invoke([HumanMessage(content='请告诉我网页一般存在哪些漏洞？')]))

print(model.invoke([HumanMessage(content='请告诉我网页一般存在哪些漏洞？')]))

content='1. SQL注入漏洞：攻击者通过输入恶意的SQL语句，使得应用程序执行非预期的SQL命令。\n\n2. 跨站脚本攻击（XSS）：攻击者通过在网页中插入恶意脚本，当其他用户浏览该网页时，这些脚本会被执行，从而达到攻击的目的。\n\n3. 跨站请求伪造（CSRF）：攻击者诱导用户点击链接，以用户的身份发送恶意请求。\n\n4. 文件上传漏洞：由于服务器对上传文件的处理不当，攻击者可以上传恶意文件，进而控制服务器。\n\n5. 命令注入漏洞：攻击者通过输入恶意的命令，使得应用程序执行非预期的系统命令。\n\n6. 信息泄露和错误处理：应用程序在处理错误时，可能会泄露系统的敏感信息，为攻击者提供攻击线索。\n\n7. 服务器配置错误：如目录列表、默认页面、未授权的HTTP方法等，可能会被攻击者利用。\n\n8. 无效的输入验证：如果应用程序没有对用户的输入进行严格的验证和过滤，可能会被攻击者利用。\n\n9. 会话管理漏洞：如会话劫持、会话固定等，攻击者可以利用这些漏洞，冒充其他用户。\n\n10. 不安全的直接对象引用：攻击者可以通过修改参数，访问到不应该访问的对象。' response_metadata={'token_usage': {'completion_tokens': 428, 'prompt_tokens': 26, 'total_tokens': 454}, 'model_name': 'gpt-4-32k', 'system_fingerprint': None, 'prompt_filter_results': [{'prompt_index': 0, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}], 'finish_reason': 'stop', 'logprobs': None, 'content_filter_

In [7]:
prompt = ChatPromptTemplate.from_messages(
    [
        ('system', '你是一个专业的{role}，善于发现HTML网页中的漏洞'),
        ('user', '请指出这段网页HTLM代码中的问题并做详细解释，结合HTML实际代码给出解决建议:\n{html}')
    ]
)

msgs = prompt.format_messages(role='软件安全测试工程师', html=html)
print(msgs)

[SystemMessage(content='你是一个专业的软件安全测试工程师，善于发现HTML网页中的漏洞'), HumanMessage(content='请指出这段网页HTLM代码中的问题并做详细解释，结合HTML实际代码给出解决建议:\n\n<!DOCTYPE html>\n<html lang="zh">\n    <head>\n        <meta http-equiv="Content-Type" content="text/html;charset=utf-8">\n        <title>AiDD软件研发数字峰会-深圳站</title>\n        <meta name="description" content="">\n        <meta name="keywords" content="AiDD,数字峰会,大会嘉宾,深圳站">\n        \n        \n        <meta name="viewport" content="width=device-width">\n        <script>\n            (function() {\n                window.os = (function() {\n                    var ua = navigator.userAgent;\n                    var isWindowsPhone = /(?:Windows Phone)/.test(ua);\n                    var isSymbian = /(?:SymbianOS)/.test(ua) || isWindowsPhone;\n                    var isAndroid = /(?:Android)/.test(ua);\n                    var isFireFox = /(?:Firefox)/.test(ua);\n                    var isChrome = /(?:Chrome|CriOS)/.test(ua);\n                    var isTablet = /(?:iPad

In [27]:
from langchain_core.prompts import PromptTemplate

template = PromptTemplate.from_template('你作为一个测试人员{next}')
template.format(next='任重道远')

'你作为一个测试人员任重道远'

In [9]:
for chunk in model.stream(msgs):
    print(chunk.content, end='', flush=True)

这段HTML代码中存在的问题主要有以下几点：

1. 代码中存在大量的内联样式，这会导致代码的可读性和可维护性降低。建议将样式提取到外部CSS文件中，通过class或id进行引用。

2. 代码中存在大量的JavaScript代码，这同样会影响代码的可读性和可维护性。建议将JavaScript代码提取到外部JS文件中。

3. 代码中存在大量的空行和无用注释，这会增加文件的大小，影响网页的加载速度。建议删除无用的空行和注释。

4. 代码中存在大量的绝对定位，这会导致页面的布局在不同设备和浏览器上显示不一致。建议使用相对定位或者flex布局进行页面布局。

5. 代码中的图片资源使用了绝对路径，这会导致在不同环境下资源无法加载。建议使用相对路径或者将资源上传到CDN。

6. 代码中的meta标签中的description和keywords为空，这会影响搜索引擎的SEO。建议填写合适的description和keywords。

7. 代码中的链接没有添加rel="noopener noreferrer"，这可能会导致页面被恶意利用。建议在所有的_blank打开的链接中添加rel="noopener noreferrer"。

8. 代码中的JavaScript代码没有进行错误处理，这可能会导致JavaScript错误导致页面无法正常工作。建议添加适当的错误处理代码。

9. 代码中的图片没有添加alt属性，这会影响搜索引擎的SEO和无障碍访问。建议为所有的图片添加合适的alt属性。

10. 代码中的script标签没有添加async或defer属性，这会导致页面阻塞。建议添加async或defer属性，使得JavaScript代码异步加载。

以上就是我对这段HTML代码的分析和建议，希望对你有所帮助。

In [11]:
import json
from langchain_core.utils.function_calling import convert_to_openai_tool

def divide(a: int, b: int):
    return a/b

print(json.dumps(convert_to_openai_tool(divide), indent=2))

{
  "type": "function",
  "function": {
    "name": "divide",
    "description": "",
    "parameters": {
      "type": "object",
      "properties": {
        "a": {
          "type": "integer"
        },
        "b": {
          "type": "integer"
        }
      },
      "required": [
        "a",
        "b"
      ]
    }
  }
}


In [13]:
model_with_div = model.bind_tools([divide])
resp = model_with_div.invoke("16 / 4")
print(resp.content)




In [33]:
import os

os.environ['LANGCHAIN_TRACING_V2']='false'
os.environ['LANGCHAIN_API_KEY']='langchain_key'

In [34]:
for chunk in model.stream(msgs):
    print(chunk.content, end='', flush=True)

这段HTML代码存在以下问题：

1. 缺少输入验证：在表单提交时，没有对用户输入进行验证，容易导致安全漏洞，如SQL注入、跨站脚本攻击等。

2. 缺少HTTPS协议：该网页没有使用HTTPS协议进行加密传输，容易被黑客窃取用户信息。

3. 缺少CSRF防护：该网页没有对CSRF攻击进行防护，容易导致用户信息被窃取或篡改。

4. 缺少XSS防护：该网页没有对XSS攻击进行防护，容易导致用户信息被窃取或篡改。

解决建议：

1. 在表单提交时，对用户输入进行验证，防止安全漏洞的发生。

2. 使用HTTPS协议进行加密传输，保障用户信息的安全。

3. 对CSRF攻击进行防护，如使用Token验证等方式。

4. 对XSS攻击进行防护，如对用户输入进行过滤、转义等方式。

In [11]:
from langchain_community.vectorstores import Chroma
Sa

loader = WebBaseLoader("https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/chatgpt")
web_attack_data = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
splits = text_splitter.split_documents(web_attack_data)
print(len(splits))

77


In [4]:
from langchain_community.vectorstores import Chroma
from chromadb.utils import embedding_functions
from langchain_openai import AzureOpenAIEmbeddings
from langchain_community.embeddings.sentence_transformer import (
    SentenceTransformerEmbeddings,
)

from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("auto-dev.pdf")
pages = loader.load_and_split()
print('load_and_split done ')

# embedding_model = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-MiniLM-L6-v2")
embedding_model_default = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
# embedding_model = AzureOpenAIEmbeddings(
#         openai_api_version="2024-02-01",
#         azure_deployment="embedding-small"
#     )
db = Chroma.from_documents(documents=pages, embedding=embedding_model_default)
print(db)
retriever= db.as_retriever(search_kwargs={"k":5})
docs = retriever.get_relevant_documents("What is auto-dev")
print(len(docs))
for doc in docs:
    print("=========New==========")
    print(doc.page_content)

load_and_split done 
<langchain_community.vectorstores.chroma.Chroma object at 0x0000019DA5D88A30>
5
AutoDev offers the following key features: (i) the ability to
track and manage user and AI agents conversations through
aConversation Manager , (ii) a library of customized Tools
to accomplish a variety of code and SE related objectives,
(iii) the ability to schedule various AI agents to work col-
laboratively towards a common objective through an Agent
Scheduler , and (iv) the ability to execute code and run tests
through an Evaluation Environment .arXiv:2403.08299v1  [cs.SE]  13 Mar 2024
Michele Tufano, et al.
Figure 1 illustrates a high-level example of the AutoDev
workflow. The user defines an objective (e.g., testing a spe-
cific method). The AI Agent writes tests in a new file and
initiates the test execution command, all within a secure
Evaluation Environment. The output of the test execution,
including failure logs, is then incorporated into the conver-
sation. The AI agent anal

  warn_deprecated(


In [39]:
import logging
logging.basicConfig()
logging.getLogger('langchain.retrievers.multi_query').setLevel(logging.INFO)

from langchain.retrievers.multi_query import MultiQueryRetriever

retriever_multi = MultiQueryRetriever.from_llm(retriever, llm=model)

In [42]:
find = retriever_multi.get_relevant_documents(query="What is auto-dev")
print(len(find))
for f in find:
    print("=========New==========")
    print(f.page_content)
    print("\t========")
    print(f.metadata)

INFO:langchain.retrievers.multi_query:Generated queries: ['1. Can you provide information on auto-dev technology?', '2. How does auto-dev work and what are its benefits?', '3. What are the key features and applications of auto-dev?']


3
Michele Tufano, et al.
Figure 1 illustrates a high-level example of the AutoDev
workflow. The user defines an objective (e.g., testing a spe-
cific method). The AI Agent writes tests in a new file and
initiates the test execution command, all within a secure
Evaluation Environment. The output of the test execution,
including failure logs, is then incorporated into the conver-
sation. The AI agent analyzes this output, triggers a retrieval
command, incorporates the retrieved information by edit-
ing the file, and re-invokes the test execution. Finally, the
environment provides feedback on the success of the test
execution and completion of the user’s objective.
The entire process is orchestrated by AutoDev autonomously,
requiring no developer intervention beyond setting the ini-
tial objective. In contrast, with existing AI coding assistants
integrated into IDEs, developers would have to manually
execute tests (e.g., run pytest), provide failure logs to the AI
chat interface, possibly