# 用户故事角色歧义检测分析

本notebook用于分析大模���在检测用户故事角色歧义方面的性能。

## 角色歧义定义
角色歧义是指用户故事中的角色（actor）不明确或不清晰，导致对功能的执行者、受益者或相关方产生理解偏差。例如：
- 角色定义模糊或不具体
- 存在多个可能的执行者
- 角色权限和职责不明确
- 缺乏对角色的具体描述

In [None]:
import pandas as pd
import json
import time
from typing import Dict, List, Tuple
import matplotlib.pyplot as plt
import numpy as np
import re
from openai import OpenAI
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report

In [None]:
# 配置信息
CONFIG = {
    "base_url": "https://api.zhizengzeng.com/v1/",
    "api_key": "sk-zk20f741becece1c055c848225093b2e458662329a0f1016"
}

# 模型列表
MODELS = [
    "gpt-3.5-turbo",
    "claude-sonnet-4-20250514", 
    "gemini-2.5-flash",
    "grok-3-mini",
    "deepseek-chat",
    "qwen3-coder-plus"
]

In [None]:
# 加载数据集
print("Loading user story ambiguity dataset...")
df = pd.read_excel(r"data/User Story Ambiguity Dataset_A Comprehensive Research Resource/Cornelius_2025_user_story_ambiguity_dataset.xlsx", sheet_name='User_Stories')

print(f"数据集形状: {df.shape}")
print(f"角色歧义统计: {df['ActorAmbiguity'].value_counts()}")

# 显示一些例子
print("\n角色歧义示例:")
actor_examples = df[df['ActorAmbiguity'] == True][['StoryText']].head(3)
for i, story in enumerate(actor_examples['StoryText'], 1):
    print(f"{i}. {story}")

print("\n无角色歧义示例:")
non_actor_examples = df[df['ActorAmbiguity'] == False][['StoryText']].head(3)
for i, story in enumerate(non_actor_examples['StoryText'], 1):
    print(f"{i}. {story}")

In [None]:
def get_actor_ambiguity_prompt(story_text: str) -> str:
    """
    生成用于检测用户故事角色歧义的提示词
    """
    prompt = f"""
**背景**: 角色歧义是指用户故事中的角色（actor）不明确或不清晰，导致对功能的执行者、受益者或相关方产生理解偏差。

**角色**: 你是一名专业的自然语言处理专家，专门检测软件需求规格中的角色歧义。

**任务**: 分析以下用户故事，判断其中是否存在角色歧义。

**角色歧义的特征包括**:
1. 角色定义模糊或不具体（如"用户"、"相关人员"等过于宽泛的表述）
2. 存在多个可能的执行者或受益者
3. 角色权限和职责不明确
4. 缺乏对角色的具体描述或特征
5. 角色与功能的关系不清晰
6. 使用代词指代角色但不明确具体指谁
7. 角色身份在业务流程中不明确
8. 多个角色之间的权限边界模糊

**用户故事**: {story_text}

**输出要求**:
请按照以下JSON格式输出你的分析结果：
{{
    "has_actor_ambiguity": true/false,
    "ambiguity_explanation": "如果存在角色歧义，请解释歧义的具体内容和角色不明确的地方；如果不存在角色歧义，请说明为什么用户故事的角色是清晰的",
    "identified_actors": "列出用户故事中识别出的角色，如果存在歧义请说明哪些角色不明确",
    "role_clarification_needed": "如果存在歧义，请说明需要澄清的角色信息；如果不存在歧义，请说明角色定义已经清晰的部分",
    "suggested_improvement": "如果存在角色歧义，请提出改进建议；如果不存在角色歧义，请填写'无角色歧义，无需改进'"
}}

**注意事项**:
- has_actor_ambiguity的值只能是true或false
- ambiguity_explanation应详细说明角色歧义的原因
- identified_actors应明确指出识别的角色
- role_clarification_needed应指出需要澄清的具体角色信息
- suggested_improvement应提供具体的改进建议
"""
    return prompt

In [None]:
def call_llm(prompt: str, model: str) -> Dict:
    """
    调用大模型API
    """
    try:
        client = OpenAI(api_key=CONFIG["api_key"], base_url=CONFIG["base_url"])
        
        response = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "user", "content": prompt}
            ],
            temperature=0.1
        )
        
        content = response.choices[0].message.content
        
        # 尝试解析JSON格式的响应
        match = re.search(r'\{.*\}', content, re.DOTALL)
        if match:
            json_str = match.group()
            parsed_result = json.loads(json_str)
            return parsed_result
        else:
            # 如果无法解析JSON，返回默认值
            return {
                "has_actor_ambiguity": True,
                "ambiguity_explanation": "无法解析模型响应",
                "identified_actors": "无法解析模型响应",
                "role_clarification_needed": "无法解析模型响应",
                "suggested_improvement": "无法解析模型响应"
            }
    except Exception as e:
        print(f"API调用失败 ({model}): {str(e)}")
        return {
            "has_actor_ambiguity": True,
            "ambiguity_explanation": f"API调用失败: {str(e)}",
            "identified_actors": f"API调用失败: {str(e)}",
            "role_clarification_needed": f"API调用失败: {str(e)}",
            "suggested_improvement": f"API调用失败: {str(e)}"
        }

In [None]:
def evaluate_actor_detection(y_true: List[bool], y_pred: List[bool]) -> Dict:
    """
    计算角色歧义检测的评估指标
    """
    tp = fp = fn = tn = 0
    
    for true_label, pred_label in zip(y_true, y_pred):
        if true_label and pred_label:
            tp += 1
        elif not true_label and pred_label:
            fp += 1
        elif true_label and not pred_label:
            fn += 1
        else:
            tn += 1
    
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    f1_score = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
    accuracy = (tp + tn) / (tp + fp + fn + tn)
    
    return {
        "precision": precision,
        "recall": recall,
        "f1_score": f1_score,
        "accuracy": accuracy,
        "tp": tp,
        "fp": fp,
        "fn": fn,
        "tn": tn
    }

In [None]:
def process_actor_dataset(df_subset: pd.DataFrame, model: str) -> Dict:
    """
    处理数据集子集并评估模型在角色歧义检测上的性能
    """
    results = {
        "model": model,
        "predictions": [],
        "metrics": {}
    }
    
    print(f"Processing model {model}...")
    
    for idx, row in df_subset.iterrows():
        story_text = row['StoryText']
        true_has_ambiguity = row['ActorAmbiguity']
        
        prompt = get_actor_ambiguity_prompt(story_text)
        prediction = call_llm(prompt, model)
        
        # 确保预测结果格式正确
        if "has_actor_ambiguity" not in prediction:
            prediction["has_actor_ambiguity"] = True
        if "ambiguity_explanation" not in prediction:
            prediction["ambiguity_explanation"] = "模型未提供解释"
        if "identified_actors" not in prediction:
            prediction["identified_actors"] = "模型未识别角色"
        if "role_clarification_needed" not in prediction:
            prediction["role_clarification_needed"] = "模型未提供角色澄清信息"
        if "suggested_improvement" not in prediction:
            prediction["suggested_improvement"] = "模型未提供改进建议"
        
        results["predictions"].append({
            "story_id": row['StoryID'],
            "story_text": story_text,
            "true_has_ambiguity": true_has_ambiguity,
            "pred_has_ambiguity": prediction["has_actor_ambiguity"],
            "ambiguity_explanation": prediction["ambiguity_explanation"],
            "identified_actors": prediction["identified_actors"],
            "role_clarification_needed": prediction["role_clarification_needed"],
            "suggested_improvement": prediction["suggested_improvement"]
        })
        
        # 添加进度信息
        if (idx + 1) % 10 == 0:
            print(f"  Processed {idx + 1}/{len(df_subset)} samples")
    
    # 计算指标
    true_labels = [item["true_has_ambiguity"] for item in results["predictions"]]
    pred_labels = [item["pred_has_ambiguity"] for item in results["predictions"]]
    
    results["metrics"] = evaluate_actor_detection(true_labels, pred_labels)
    
    metrics = results["metrics"]
    print(f"  Results - Precision: {metrics['precision']:.3f}, Recall: {metrics['recall']:.3f}, F1: {metrics['f1_score']:.3f}, Accuracy: {metrics['accuracy']:.3f}")
    
    return results

In [None]:
def create_actor_visualization(results: List[Dict]):
    """
    创建角色歧义检测的可视化图表
    """
    models = [r["model"] for r in results]
    
    # 提取指标数据
    precisions = [r["metrics"]["precision"] for r in results]
    recalls = [r["metrics"]["recall"] for r in results]
    f1_scores = [r["metrics"]["f1_score"] for r in results]
    accuracies = [r["metrics"]["accuracy"] for r in results]
    
    # 创建图表
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    
    x = np.arange(len(models))
    width = 0.35
    
    # 精确率和召回率
    axes[0, 0].bar(x - width/2, precisions, width, label='Precision', alpha=0.8, color='plum')
    axes[0, 0].bar(x + width/2, recalls, width, label='Recall', alpha=0.8, color='powderblue')
    axes[0, 0].set_xlabel('Model')
    axes[0, 0].set_ylabel('Score')
    axes[0, 0].set_title('角色歧义检测 - 精确率与召回率')
    axes[0, 0].set_xticks(x)
    axes[0, 0].set_xticklabels(models, rotation=45, ha='right')
    axes[0, 0].legend()
    axes[0, 0].grid(axis='y', linestyle='--', alpha=0.7)
    axes[0, 0].set_ylim(0, 1)
    
    # F1分数
    axes[0, 1].bar(models, f1_scores, alpha=0.8, color='rosybrown')
    axes[0, 1].set_xlabel('Model')
    axes[0, 1].set_ylabel('F1 Score')
    axes[0, 1].set_title('角色歧义检测 - F1分数')
    axes[0, 1].tick_params(axis='x', rotation=45)
    axes[0, 1].grid(axis='y', linestyle='--', alpha=0.7)
    axes[0, 1].set_ylim(0, 1)
    
    # 准确率
    axes[1, 0].bar(models, accuracies, alpha=0.8, color='lightsteelblue')
    axes[1, 0].set_xlabel('Model')
    axes[1, 0].set_ylabel('Accuracy')
    axes[1, 0].set_title('角色歧义检测 - 准确率')
    axes[1, 0].tick_params(axis='x', rotation=45)
    axes[1, 0].grid(axis='y', linestyle='--', alpha=0.7)
    axes[1, 0].set_ylim(0, 1)
    
    # 综合性能雷达图
    angles = np.linspace(0, 2 * np.pi, 4, endpoint=False)
    angles = np.concatenate((angles, [angles[0]]))
    
    ax_radar = plt.subplot(2, 2, 4, projection='polar')
    colors = plt.cm.Set3(np.linspace(0, 1, len(models)))
    
    for i, model in enumerate(models):
        values = [precisions[i], recalls[i], f1_scores[i], accuracies[i]]
        values = np.concatenate((values, [values[0]]))
        ax_radar.plot(angles, values, 'o-', linewidth=2, label=model, color=colors[i])
        ax_radar.fill(angles, values, alpha=0.25, color=colors[i])
    
    ax_radar.set_xticks(angles[:-1])
    ax_radar.set_xticklabels(['Precision', 'Recall', 'F1', 'Accuracy'])
    ax_radar.set_ylim(0, 1)
    ax_radar.set_title('角色歧义检测 - 综合性能对比')
    ax_radar.legend(loc='upper right', bbox_to_anchor=(1.3, 1.0))
    
    plt.tight_layout()
    plt.savefig('actor_ambiguity_detection_results.png', dpi=300, bbox_inches='tight')
    plt.show()

In [None]:
def main():
    """
    主函数：执行角色歧义检测分析
    """
    print("开始角色歧义检测分析...")
    
    # 准备数据集 - 从每个类别中取样
    actor_df = df[df['ActorAmbiguity'] == True].sample(n=min(25, df[df['ActorAmbiguity'] == True].shape[0]), random_state=42)
    non_actor_df = df[df['ActorAmbiguity'] == False].sample(n=min(25, df[df['ActorAmbiguity'] == False].shape[0]), random_state=42)
    test_df = pd.concat([actor_df, non_actor_df]).reset_index(drop=True)
    
    print(f"测试数据集大小: {test_df.shape}")
    print(f"角色歧义样本数: {actor_df.shape[0]}")
    print(f"无角色歧义样本数: {non_actor_df.shape[0]}")
    
    all_results = []
    
    # 对每个模型进行评估
    for model in MODELS:
        result = process_actor_dataset(test_df, model)
        all_results.append(result)
        
        # 添加延迟以避免API限制
        time.sleep(1)
    
    # 保存结果
    with open('actor_ambiguity_results.json', 'w', encoding='utf-8') as f:
        json.dump(all_results, f, ensure_ascii=False, indent=2, default=str)
    
    print("\n角色歧义检测分析完成，结果已保存到 actor_ambiguity_results.json")
    
    # 创建可视化图表
    create_actor_visualization(all_results)
    
    # 打印详细结果
    print("\n详细评估结果:")
    for result in all_results:
        model = result["model"]
        metrics = result["metrics"]
        
        print(f"\n模型: {model}")
        print(f"  精确率: {metrics['precision']:.3f}")
        print(f"  召回率: {metrics['recall']:.3f}")
        print(f"  F1分数: {metrics['f1_score']:.3f}")
        print(f"  准确率: {metrics['accuracy']:.3f}")
        print(f"  真正例: {metrics['tp']}, 假正例: {metrics['fp']}, 假负例: {metrics['fn']}, 真负例: {metrics['tn']}")

# 执行主函数
if __name__ == "__main__":
    main()