In [None]:
import importlib
import business
importlib.reload(business)

In [None]:
# 第一轮规则审核代码 
import pandas as pd
import os
from datetime import datetime
import re
from typing import Tuple, Dict, List
from business import validate_business
from tqdm import tqdm

def first_round_rule_audit(input_file: str) -> pd.DataFrame:
    """
    执行第一轮规则审核并返回完整的审核结果DataFrame
    
    Args:
        input_file: 输入Excel文件路径
    
    Returns:
        df: 包含所有短信审核结果的DataFrame
    """
    # 读取Excel文件
    print(f"读取文件: {input_file}")
    df = pd.read_excel(input_file)
    print(f"成功读取文件，共 {len(df)} 条记录")
    
    # 创建结果列
    df['总体操作类型'] = None
    df['业务操作类型'] = None
    
    # 审核计数器
    pass_count = 0
    fail_count = 0
    error_count = 0
    
    print("=============== 开始执行第一轮：规则审核==============")
    
    # 对每条短信执行规则审核
    for index, row in tqdm(df.iterrows(), total=len(df), desc="规则审核进度", ncols=100):
        
            
        try:
            # 调用业务规则审核
            business_passed, business_reason = validate_business(
                row['产品类型'],
                row['短信内容'], 
                row['短信签名'],
                row.get('账户类型')
            )
            
            # 更新结果
            if business_passed:
                df.loc[index, '总体操作类型'] = '放行'
                pass_count += 1
            else:
                df.loc[index, '总体操作类型'] = '失败'
                fail_count += 1
                
            df.loc[index, '业务操作类型'] = business_reason
            
        except Exception as e:
            print(f"处理行 {index} 时出错: {str(e)}")
            df.loc[index, '总体操作类型'] = '处理错误'
            df.loc[index, '业务操作类型'] = f"处理错误: {str(e)}"
            error_count += 1
    
    # 输出统计信息
    total = len(df)
    print(f"\n规则审核完成:")
    print(f"- 总记录数: {total}")
    print(f"- 放行数量: {pass_count} ({pass_count/total*100:.2f}%)")
    print(f"- 失败数量: {fail_count} ({fail_count/total*100:.2f}%)")
    if error_count > 0:
        print(f"- 错误数量: {error_count} ({error_count/total*100:.2f}%)")
    
    #可选：保存结果到Excel文件
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    output_file = f"123规则审核_{timestamp}.xlsx"
    df.to_excel(output_file, index=False)
    print(f"完整审核结果已保存至: {output_file}")
    
    
    # 统计不同情况的数量
    code_pass_human_fail = len(df[(df['总体操作类型'] == '放行') & (df['操作类型'] == '失败')])
    code_fail_human_pass = len(df[(df['总体操作类型'] == '失败') & (df['操作类型'] == '放行')])
    matched = len(df[df['总体操作类型'] == df['操作类型']])
    match_rate = (matched / total) * 100
    
    # 打印结果
    print('\n审核结果统计:')
    print(f'总样本数: {total}')
    print(f'代码放行但人工失败数量: {code_pass_human_fail}')
    print(f'代码失败但人工放行数量: {code_fail_human_pass}')
    print(f'匹配数量: {matched}')
    print(f'匹配率: {match_rate:.2f}%')


    return df

# 使用示例 
df_with_results = first_round_rule_audit("3月审核记录.xlsx")



In [None]:
import importlib
import business
import ai_check 
importlib.reload(business)
importlib.reload(ai_check )

In [156]:
import psutil
import os

def show_memory_usage():
    process = psutil.Process(os.getpid())
    print(f"当前内存使用: {process.memory_info().rss / 1024 / 1024:.2f} MB")

In [None]:

import time
from business import validate_business
from typing import Tuple, Dict
import pandas as pd
import os
from datetime import datetime
import re
from collections import Counter
from ai_check import AIAuditor
import json

# 2. 准备AI审核数据
def prepare_ai_data(sample_df):
    """准备AI审核数据"""
    ai_audit_list = []
    for _, row in sample_df.iterrows():
        try:
            score = 100.0  # 默认分数
            try:
                score_match = re.search(r'总分: (\d+\.?\d*)', row['业务操作类型'])
                if score_match:
                    score = float(score_match.group(1))
                elif "直接放行" in row['业务操作类型']:
                    score = 100.0  # 直接放行给高分
            except Exception as e:
                print(f"提取分数时出错: {str(e)}")
            
            ai_audit_list.append({
                "signature": row['短信签名'],
                "content": row['短信内容'],
                "business_type": row['产品类型'],
                "rule_score": score,
                "rule_reason": row['业务操作类型']
            })
        except Exception as e:
            print(f"准备AI审核数据时出错: {str(e)}")
    return ai_audit_list


# 4. 测试小批量审核
def test_batch_audit(df):
    """测试小批量审核"""    
    # 然后在下一个单元格中可以随机抽取部分放行的短信进行AI二次审核
    sample_size = 10  # 设置样本大小
    passed_df = df[df['总体操作类型'] == '放行']
    sample_df = passed_df
    sample_df = sample_df.sample(min(sample_size, len(passed_df)))
    print(f"已随机抽取 {len(sample_df)} 条放行规则审核的短信用于AI二次审核")
    print("============= 开始第二轮：AI审核 =============")
    # 准备AI审核数据
    ai_audit_list = prepare_ai_data(sample_df)

    # 批量审核
    auditor = AIAuditor()
    start_time = time.time()
    results = auditor.batch_audit(ai_audit_list)
    elapsed = time.time() - start_time
    
    # 统计结果
    pass_count = sum(1 for result in results if result['passed'])
    reject_count = len(results) - pass_count
    
    print(f"审核完成: 放行 {pass_count} 条, 失败 {reject_count} 条")
    print(f"总耗时: {elapsed:.2f}秒, 平均每条: {elapsed/len(results):.2f}秒")
    
    # 显示详细结果
    print("\n详细结果:")
    for i, result in enumerate(results):
        sms = result['sms']
        print(f"\n短信 {i+1}:")
        print(f"签名: {sms['signature']}")
        print(f"内容: {sms['content'][:50]}..." if len(sms['content']) > 50 else f"内容: {sms['content']}")
        print(f"业务类型: {sms['business_type']}")
        print(f"审核结果: {'放行' if result['passed'] else '失败'}")
        if not result['passed'] and 'reasons' in result['details']:
            print(f"失败原因: {', '.join(result['details']['reasons'])}")
    
    return results

# 运行测试
# test_single_audit()
test_batch_audit(df_with_results)

In [None]:
# 3. 测试单条短信审核
import time
from business import validate_business
from typing import Tuple, Dict
import pandas as pd
import os
from datetime import datetime
import re
from collections import Counter
from ai_check import AIAuditor
import json

def test_single_audit():
    """测试单条短信审核"""
    # 测试一条短信
    test_sms = {
        "signature": " 饿了么 ",
        "content": "【饿了么】尊敬的超级吃货卡用户，我们将在2025-04-02放行支付宝扣除你对应下一周期的卡费，扣款次日即可领取4个红包，扣除的卡费及卡种类请见https://to.ele.me/WPT1ZT9 ",
        "business_type": "行业-物流",
        "account_type": "直客"
    }
    print("=============== 短信信息 =============")
    print(f"签名: {test_sms['signature']}")
    print(f"内容: {test_sms['content']}")
    print(f"业务类型: {test_sms['business_type']}")
    if test_sms.get("account_type"):
        print(f"账户类型: {test_sms['account_type']}")
    print("\n")
    print("=============== 第一轮：规则审核 =============")
    # 执行规则审核
    business_passed, business_reason = validate_business(
        test_sms["business_type"],
        test_sms["content"],
        test_sms["signature"],
        test_sms.get("account_type")
    )   

    
    print(f"规则审核结果: {'放行' if business_passed else '失败'}")
    print(f"规则审核原因: {business_reason}")

# 运行测试
test_single_audit()

In [2]:
#测试DeepSeek API连接
import requests
import time
from typing import Dict, Any

# API配置
API_URL = "https://api.deepseek.com/chat/completions"
API_KEY = "sk-fdb5269b9e0e43aca3cf7dea21d63322"  # 替换为实际API密钥
TIMEOUT = 10  # 请求超时时间(秒)

def test_api():
    headers = {"Content-Type": "application/json", "Authorization": f"Bearer {API_KEY}"}
    payload = {"model": "deepseek-chat", "messages": [{"role": "user", "content": "Hello"}]}
    
    try:
        print(f"测试API连接，超时: {TIMEOUT}秒...")
        start_time = time.time()
        response = requests.post(API_URL, headers=headers, json=payload, timeout=TIMEOUT)
        elapsed = time.time() - start_time
        
        print(f"API响应: 状态码={response.status_code}, 耗时={elapsed:.2f}秒")
        return response.status_code == 200, elapsed
    except Exception as e:
        print(f"API连接错误: {str(e)}")
        return False, 0

if __name__ == "__main__":
    success, time_used = test_api()
    print(f"API测试结果: {'成功' if success else '失败'}")

测试API连接，超时: 10秒...
API响应: 状态码=200, 耗时=4.97秒
API测试结果: 成功


In [6]:
import importlib
import ai_check 
importlib.reload(ai_check )

<module 'ai_check' from '/Users/a92461/Documents/硕汉/短信审核/ai_check.py'>

In [10]:
import time
import json
from ai_check import AIAuditor

def test_single_sms():
    """测试单条短信AI审核"""
    test_sms = {
        "signature": "广东商城",
        "content": "【广东商城】(温馨提示)尊敬的用户，您的电话号码卡已经办理成功，请插卡到手机使用，流量等优惠于48小时内到账，可关注微信公众号\"gdkf10000\"查询，感谢您的支持！",
        "business_type": "会销-普通"
    }
    
    print("=============== 短信信息 =============")
    print(f"签名: {test_sms['signature']}")
    print(f"内容: {test_sms['content']}")
    print(f"业务类型: {test_sms['business_type']}")
    print("\n")
   
    print("=============== AI审核 =============")
    auditor = AIAuditor()
    start_time = time.time()
    
    # 直接进行AI审核，不再进行规则审核
    passed, details = auditor.audit_sms(
        test_sms["signature"],
        test_sms["content"],
        test_sms["business_type"]
    )
    
    elapsed = time.time() - start_time   
    print(f"AI审核结果: {'放行' if passed else '失败'}")
    print(f"AI审核详情: {json.dumps(details, ensure_ascii=False, indent=2)}")
    print(f"AI审核耗时: {elapsed:.2f}秒")
  
    # 输出最终结果
    print("\n=============== 最终审核结果 =============")
    print(f"审核结果: {'放行' if passed else 'AI审核未放行'}")
    if not passed and details.get('reasons'):
        print(f"失败原因: {', '.join(details.get('reasons', []))}")

if __name__ == "__main__":
    test_single_sms()


签名: 广东商城
内容: 【广东商城】(温馨提示)尊敬的用户，您的电话号码卡已经办理成功，请插卡到手机使用，流量等优惠于48小时内到账，可关注微信公众号"gdkf10000"查询，感谢您的支持！
业务类型: 会销-普通




2025-04-22 10:50:24,451 - INFO - Token使用: 输入=781, 输出=58, 总计=839


AI审核结果: 放行
AI审核详情: {
  "should_pass": true,
  "risk_areas": [
    "微信、公众号内容敏感"
  ],
  "reasons": [
    "短信内容包含微信公众号信息，这在会销-普通业务类型中属于敏感内容，但未达到硬性违规标准"
  ],
  "token_usage": {
    "input_tokens": 781,
    "output_tokens": 58,
    "total_tokens": 839
  }
}
AI审核耗时: 8.33秒

审核结果: 放行


In [9]:
import importlib
import ai_check 
importlib.reload(ai_check)

<module 'ai_check' from '/Users/a92461/Documents/硕汉/短信审核/ai_check.py'>

In [12]:
import time
import pandas as pd
from datetime import datetime
from ai_check import AIAuditor

def test_Excel_batch():
    """测试批量Excel文件处理"""

    
    # 读取Excel文件
    df = pd.read_excel("3月审核记录.xlsx", nrows=10)
    print(f"读取Excel文件成功，共{len(df)}条记录")
    
    # 准备AI审核数据
    ai_audit_list = []
    for _, row in df.iterrows():
        ai_audit_list.append({
            "signature": row['短信签名'],
            "content": row['短信内容'],
            "business_type": row['产品类型'],
        })
    
    # 进行批量审核
    print("=============== 开始AI批量审核 =============")
    auditor = AIAuditor()
    start_time = time.time()
    results = auditor.batch_audit(ai_audit_list)
    elapsed = time.time() - start_time
    
   # 基础统计结果
    pass_count = sum(1 for result in results if result['passed'])
    reject_count = len(results) - pass_count
    reject_rate = (reject_count / len(results)) * 100 if len(results) > 0 else 0
    
    print(f"审核完成: 放行 {pass_count} 条, 失败 {reject_count} 条")
    print(f"拦截率: {reject_rate:.2f}%")
    print(f"总耗时: {elapsed:.2f}秒, 平均每条: {elapsed/len(results):.2f}秒")
    
    # # 显示详细结果
    # print("\n详细结果:")
    # for i, result in enumerate(results):
    #     sms = result['sms']
    #     print(f"\n短信 {i+1}:")
    #     print(f"签名: {sms['signature']}")
    #     print(f"内容: {sms['content'][:50]}..." if len(sms['content']) > 50 else f"内容: {sms['content']}")
    #     print(f"业务类型: {sms['business_type']}")
    #     print(f"审核结果: {'放行' if result['passed'] else '失败'}")
    #     if not result['passed'] and 'reasons' in result['details']:
    #         print(f"失败原因: {', '.join(result['details']['reasons'])}")
    
    # 与操作类型比较的统计
    if '操作类型' in df.columns:
        # 创建结果对比
        df['AI审核结果'] = ['放行' if results[i]['passed'] else '失败' for i in range(len(results))]
        
        # 计算匹配率相关统计
        ai_pass_op_pass = sum(1 for i, result in enumerate(results) 
                           if result['passed'] and df.iloc[i]['操作类型'] == '放行')
        ai_reject_op_pass = sum(1 for i, result in enumerate(results) 
                             if not result['passed'] and df.iloc[i]['操作类型'] == '放行')
        ai_pass_op_reject = sum(1 for i, result in enumerate(results) 
                             if result['passed'] and df.iloc[i]['操作类型'] == '失败')
        ai_reject_op_reject = sum(1 for i, result in enumerate(results) 
                               if not result['passed'] and df.iloc[i]['操作类型'] == '失败')
        
        total_op_pass = sum(1 for i in range(len(results)) if df.iloc[i]['操作类型'] == '放行')
        total_op_reject = sum(1 for i in range(len(results)) if df.iloc[i]['操作类型'] == '失败')
        
        match_count = ai_pass_op_pass + ai_reject_op_reject
        match_rate = (match_count / len(results)) * 100 if len(results) > 0 else 0
        
        print("\n=============== AI与人工操作对比 =============")
        print(f"总记录数: {len(results)}")
        print(f"AI放行 & 人工放行: {ai_pass_op_pass} 条")
        print(f"AI失败 & 人工放行: {ai_reject_op_pass} 条")
        print(f"AI放行 & 人工失败: {ai_pass_op_reject} 条")
        print(f"AI失败 & 人工失败: {ai_reject_op_reject} 条")
        print(f"匹配率: {match_rate:.2f}%")
        
        # 细分统计
        if total_op_pass > 0:
            correct_pass_rate = (ai_pass_op_pass / total_op_pass) * 100
            print(f"人工放行中AI正确判断率: {correct_pass_rate:.2f}%")
        
        if total_op_reject > 0:
            correct_reject_rate = (ai_reject_op_reject / total_op_reject) * 100
            print(f"人工失败中AI正确判断率: {correct_reject_rate:.2f}%")

        if total_op_reject > 0:
            ai_wrong_rate = (ai_pass_op_reject / len(results)) * 100
            print(f"AI漏杀率: {ai_wrong_rate:.2f}%")
    
    
    
    # 添加结果列到DataFrame并保存
    df['AI审核结果'] = ['放行' if results[i]['passed'] else '失败' for i in range(len(results))]
    
    # 添加失败原因
    reasons = []
    for result in results:
        if not result['passed'] and 'reasons' in result['details']:
            reasons.append(', '.join(result['details']['reasons']))
        else:
            reasons.append('')
    df['失败原因'] = reasons

    # 添加匹配情况
    if '操作类型' in df.columns:
        df['AI与人工是否匹配'] = [1 if ((result['passed'] and row['操作类型'] == '放行') or 
                           (not result['passed'] and row['操作类型'] == '失败')) else 0
                           for result, (_, row) in zip(results, df.iterrows())]
    # 保存结果
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    output_file = f"AI审核结果_{timestamp}.xlsx"
    df.to_excel(output_file, index=False)
    print(f"\n审核结果已保存至: {output_file}")

if __name__ == "__main__":
   
    test_Excel_batch()
 

2025-04-22 10:54:00,308 - INFO - 开始批量审核 10 条短信
2025-04-22 10:54:00,308 - INFO - 正在处理: 1/10


读取Excel文件成功，共10条记录


2025-04-22 10:54:10,017 - INFO - Token使用: 输入=813, 输出=109, 总计=922
2025-04-22 10:54:10,019 - INFO - 正在处理: 2/10
2025-04-22 10:54:20,581 - INFO - Token使用: 输入=788, 输出=81, 总计=869
2025-04-22 10:54:20,584 - INFO - 正在处理: 3/10
2025-04-22 10:54:30,768 - INFO - Token使用: 输入=729, 输出=114, 总计=843
2025-04-22 10:54:30,770 - INFO - 正在处理: 4/10
2025-04-22 10:54:39,739 - INFO - Token使用: 输入=791, 输出=93, 总计=884
2025-04-22 10:54:39,741 - INFO - 正在处理: 5/10
2025-04-22 10:54:49,160 - INFO - Token使用: 输入=788, 输出=94, 总计=882
2025-04-22 10:54:49,163 - INFO - 正在处理: 6/10
2025-04-22 10:54:58,090 - INFO - Token使用: 输入=809, 输出=95, 总计=904
2025-04-22 10:54:58,093 - INFO - 正在处理: 7/10
2025-04-22 10:55:08,309 - INFO - Token使用: 输入=808, 输出=120, 总计=928
2025-04-22 10:55:08,312 - INFO - 正在处理: 8/10
2025-04-22 10:55:17,257 - INFO - Token使用: 输入=781, 输出=87, 总计=868
2025-04-22 10:55:17,260 - INFO - 正在处理: 9/10
2025-04-22 10:55:27,742 - INFO - Token使用: 输入=788, 输出=112, 总计=900
2025-04-22 10:55:27,744 - INFO - 正在处理: 10/10
2025-04-22 10:55:39,771

审核完成: 放行 6 条, 失败 4 条
拦截率: 40.00%
总耗时: 99.47秒, 平均每条: 9.95秒

总记录数: 10
AI放行 & 人工放行: 4 条
AI失败 & 人工放行: 2 条
AI放行 & 人工失败: 2 条
AI失败 & 人工失败: 2 条
匹配率: 60.00%
人工放行中AI正确判断率: 66.67%
人工失败中AI正确判断率: 50.00%
AI漏杀率: 20.00%

审核结果已保存至: AI审核结果_20250422_105539.xlsx
