# agent_document_summarize

In [45]:
from typing import List, Dict, Any
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
import json
import time
from tenacity import retry, stop_after_attempt, wait_exponential
from module.document_processing import load_documents
from module.get_model_and_embeding import get_llm
import config

In [46]:

@retry(
    stop=stop_after_attempt(3),
    wait=wait_exponential(multiplier=1, min=4, max=10),
    retry_error_callback=lambda retry_state: {}
)

def process_single_document(
    doc_content: str,
    current_roles: List[str],
    llm: Any,
    output_parser: Any,
    request_delay: float = 1.0
) -> Dict[str, List[str]]:
    """Process a single document with retry logic and rate limiting"""

    try:
        prompt = ChatPromptTemplate.from_template(
            template="""
            <system>
            You are a helpful assistant specializing in data extraction from documents.
            </system>

            <user>
            Your task is to extract all roles mentioned in the given documents and their associated tasks.
            Provide your answer as a JSON string where keys are roles and values are lists of tasks.
            Only return the JSON string without any additional explanation or formatting.

            Example format:
            {{"Role1": ["Task1", "Task2", "Task3"], "Role2": ["Task1", "Task2"]}}

            Ensure your output is a valid JSON string that can be parsed directly.
            </user>

            <query>
            Role in my team: {myteam}
            Extract all roles and their associated tasks from the following document:
            {document_content}
            </query>
            """
        )

        chain = prompt | llm | output_parser
        
        # Add delay before making the API request
        time.sleep(request_delay)
        
        response = chain.invoke({
            "myteam": ", ".join(current_roles),
            "document_content": doc_content
        })
        
        return json.loads(response)
    
    except json.JSONDecodeError as e:
        print(f"Error parsing JSON response: {e}")
        raise

    except Exception as e:
        print(f"Error during document processing: {e}")
        raise

In [47]:

def process_documents(
    document_paths: List[str],
    current_roles: List[str],
    model_name: str = "llama-3.1-70b-versatile",
    temperature: float = 0.3,
    request_delay: float = 1.0,
    max_retries: int = 3
) -> Dict[str, List[str]]:
    """
    Process documents to extract roles and tasks with retry logic and rate limiting
    """
    try:
        # Initialize components
        llm = get_llm(model_name, temperature)
        output_parser = StrOutputParser()
        
        print(f"Loading documents from {len(document_paths)} paths...")
        loaded_docs = load_documents(document_paths)
        print(f"Loaded {len(loaded_docs)} documents successfully.")
        
        # Process each document and combine results
        roles_tasks_summary = {}
        failed_docs = []
        
        for i, doc in enumerate(loaded_docs, 1):
            print(f"\nProcessing document {i}/{len(loaded_docs)}...")
            try:
                doc_results = process_single_document(
                    doc_content=doc.page_content,
                    current_roles=current_roles,
                    llm=llm,
                    output_parser=output_parser,
                    request_delay=request_delay
                )
                
                print(f"Successfully processed document {i}")
                # Update summary with new results
                for role, tasks in doc_results.items():
                    if role not in roles_tasks_summary:
                        roles_tasks_summary[role] = set()
                    roles_tasks_summary[role].update(tasks)
                    
            except Exception as e:
                print(f"Failed to process document {i} after retries: {e}")
                failed_docs.append(doc)
                continue
        
        # Report any failed documents
        if failed_docs:
            print(f"\nFailed to process {len(failed_docs)} documents:")
            for doc in failed_docs:
                print(f"- Document: {getattr(doc, 'metadata', {}).get('source', 'Unknown source')}")
        
        # Convert sets back to lists for JSON serialization
        result = {
            role: list(tasks) 
            for role, tasks in roles_tasks_summary.items()
        }
        
        # Add processing summary
        result['_processing_summary'] = {
            'total_documents': len(loaded_docs),
            'successful_documents': len(loaded_docs) - len(failed_docs),
            'failed_documents': len(failed_docs),
            'settings': {
                'model': model_name,
                'temperature': temperature,
                'request_delay': request_delay,
                'max_retries': max_retries
            }
        }
        
        return result
        
    except Exception as e:
        print(f"Critical error in document processing pipeline: {e}")
        return {
            '_processing_summary': {
                'error': str(e),
                'total_documents': len(document_paths),
                'successful_documents': 0,
                'failed_documents': len(document_paths),
                'settings': {
                    'model': model_name,
                    'temperature': temperature,
                    'request_delay': request_delay,
                    'max_retries': max_retries
                }
            }
        }

In [48]:
def example():
    """
    {
  "Software Developer": [
    "Actual coding",
    "Implement functional cohesion",
    "Construct dynamic model diagram, comprising of state transition diagrams",
    "attempt to quantify software projects by using the size of the project to normalize other quality measures",
    "implement and integrate the services to the final prototype",
    .
    .
    .
  ],
  "UX Designer": [
    "understand customer requirements",
    "understand user specific requirements",
    "create prototypes to get user feedback",
    "give the exact look and feel of the software",
    "System Design",
    "design a software prototype",
    "Planning and organizing the project",
    .
    .
    .
  ],
  "Project Manager": [
    "Use design review for verification and validation",
    "Detect defects caused by overlooking some conditions",
    "Revise and enhance the Prototype",
    "Generate a System Architecture Document",
    "Deployment of system",
    "Check if all requirements are finalized",
    "Maintenance",
    "identify technological or business bottlenecks or challenges early",
    "Requirement Gathering and analysis",
    "Planning and organizing the project",
    "estimation",
    "Implementation",
    "Tracking and running the project",
    "plan integration as a big-bang at the very end",
    "control",
    "Apply software estimation techniques",
    "management control of software project",
    "Integration and Testing",
    "manage project complexity",
    "maintenance",
    "prediction of task duration",
    .
    .
    .
  ],
  "_processing_summary": {
    "total_documents": 27,
    "successful_documents": 27,
    "failed_documents": 0,
    "settings": {
      "model": "llama-3.1-70b-versatile",
      "temperature": 0.3,
      "request_delay": 1.0,
      "max_retries": 3
    }
  }
}
    """
    # Example usage
    document_paths = [
        r"D:\Mindforge\AIService\test_doc\doc1.pdf"
    ]
    
    current_roles = [
        "Software Developer",
        "UX Designer",
        "Project Manager"
    ]
    
    # Process documents
    print("Starting document processing...")
    results = process_documents(
        document_paths=document_paths,
        current_roles=current_roles,
        request_delay=1.0  # 1 second delay between requests
    )
    
    # Print results
    print("\nProcessing Results:")
    print(json.dumps(results, indent=2))

if __name__ == "__main__":
    example()

Starting document processing...
Loading documents from 1 paths...
Loading documents...
Loaded 27 documents.
Loaded 27 documents successfully.

Processing document 1/27...
Successfully processed document 1

Processing document 2/27...
Successfully processed document 2

Processing document 3/27...
Successfully processed document 3

Processing document 4/27...
Successfully processed document 4

Processing document 5/27...
Successfully processed document 5

Processing document 6/27...
Successfully processed document 6

Processing document 7/27...
Successfully processed document 7

Processing document 8/27...
Successfully processed document 8

Processing document 9/27...
Successfully processed document 9

Processing document 10/27...
Successfully processed document 10

Processing document 11/27...
Successfully processed document 11

Processing document 12/27...
Successfully processed document 12

Processing document 13/27...
Successfully processed document 13

Processing document 14/27...
S

# agent_human_management.py

In [1]:
import json
from typing import Dict, List, Any
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers.string import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough
from module.get_model_and_embeding import get_llm
import logging
from typing import Optional
import config

Load Env





In [2]:

def create_role_analysis_chain(llm: Any):
    """
    Create a LangChain chain for role analysis.
    
    Args:
        llm (Any): Language model instance
        
    Returns:
        Chain: Composed LangChain chain
    """
    template = """
    You are an AI assistant specializing in human resource management and team organization.
    
    Based on the provided team member details, identify all possible roles this person could perform effectively.
    Consider their current roles, skills, project experiences, and the description provided.
    Your answer should be a list of role names, including but not limited to their current roles.

    Team Member: {member_name}
    Details: {member_details}

    Provide your answer as a JSON string containing a list of strings, e.g., ["Role1", "Role2", "Role3"]. 
    Only return the JSON string, without any additional explanation.
    """

    prompt = ChatPromptTemplate.from_template(template)
    output_parser = StrOutputParser()
    
    chain = (
        {"member_name": RunnablePassthrough(), "member_details": RunnablePassthrough()}
        | prompt
        | llm
        | output_parser
    )
    
    return chain


In [3]:
def parse_roles_response(response: str) -> List[str]:
    """
    Parse the LLM response into a list of roles.
    
    Args:
        response (str): JSON string response from LLM
        
    Returns:
        List[str]: List of roles
        
    Raises:
        RoleAnalysisError: If parsing fails
    """
    cleaned_response = response.strip()
    roles = json.loads(cleaned_response)
    
    if not isinstance(roles, list) or not all(isinstance(role, str) for role in roles):
        raise ValueError("Response must be a list of strings")
        
    return roles

In [10]:
def analyze_team_roles(
    team_details: Dict[str, Any], 
    llm: Any,
    max_retries: int = 3
) -> Dict[str, List[str]]:
    """
    Analyze possible roles for each team member using LangChain chain.
    
    Args:
        team_details (Dict[str, Any]): Dictionary containing team member details
        llm (Any): Language model instance
        max_retries (int, optional): Maximum number of retries for failed analyses. Defaults to 3.
        
    Returns:
        Dict[str, List[str]]: Dictionary mapping team members to their potential roles
        
    Raises:
        RoleAnalysisError: If analysis fails for any team member after all retries
    """
    chain = create_role_analysis_chain(llm)
    roles_by_member: Dict[str, List[str]] = {}
    
    for member_name, member_details in team_details.items():
        print(f"Analyzing roles for team member: {member_name}")
        
        for _ in range(max_retries):
            response = chain.invoke({
                "member_name": member_name,
                "member_details": str(member_details)
            })
            roles = parse_roles_response(response)
            
            roles_by_member[member_name] = roles
            print(f"Successfully analyzed roles for {member_name}")
            break

    return roles_by_member

In [11]:

def validate_team_details(team_details: Dict[str, Any]) -> None:
    """
    Validate the structure of team details input.
    
    Args:
        team_details (Dict[str, Any]): Team details to validate
        
    Raises:
        ValueError: If validation fails
    """
    required_fields = {'current_role', 'skills', 'experience'}
    
    if not team_details:
        raise ValueError("Team details cannot be empty")
        
    for member_name, details in team_details.items():
        if not isinstance(details, dict):
            raise ValueError(f"Details for {member_name} must be a dictionary")
            
        missing_fields = required_fields - set(details.keys())
        if missing_fields:
            raise ValueError(
                f"Missing required fields for {member_name}: {missing_fields}"
            )
            
        if not isinstance(details['skills'], list):
            raise ValueError(f"Skills for {member_name} must be a list")

async def analyze_team_roles_async(
    team_details: Dict[str, Any], 
    llm: Any,
    max_retries: int = 3
) -> Dict[str, List[str]]:
    """
    Asynchronous version of analyze_team_roles using LangChain chain.
    
    Args:
        team_details (Dict[str, Any]): Dictionary containing team member details
        llm (Any): Language model instance
        max_retries (int, optional): Maximum number of retries for failed analyses. Defaults to 3.
        
    Returns:
        Dict[str, List[str]]: Dictionary mapping team members to their potential roles
    """
    chain = create_role_analysis_chain(llm)
    roles_by_member: Dict[str, List[str]] = {}
    
    for member_name, member_details in team_details.items():
        print(f"Analyzing roles for team member: {member_name}")
        
        for _ in range(max_retries):
            response = await chain.ainvoke({
                "member_name": member_name,
                "member_details": str(member_details)
            })
            roles = parse_roles_response(response)
            
            roles_by_member[member_name] = roles
            print(f"Successfully analyzed roles for {member_name}")
            break

    return roles_by_member

In [12]:

team_details = {
    "John Doe": {
        "current_role": "Software Developer",
        "skills": ["Python", "JavaScript", "Docker"],
        "experience": "5 years in web development"
    },
    "Jane Smith": {
        "current_role": "UX Designer",
        "skills": ["UI/UX Design", "Figma", "User Research"],
        "experience": "3 years in product design"
    },
    "Mike Johnson": {
        "current_role": "Project Manager",
        "skills": ["Agile Methodologies", "Risk Management", "Stakeholder Communication"],
        "experience": "7 years in IT project management"
    }
}

In [13]:
validate_team_details(team_details)
llm = get_llm()

# Synchronous execution
result = analyze_team_roles(team_details, llm)
print("Synchronous Results:")
print(json.dumps(result, indent=2))

Analyzing roles for team member: John Doe
Successfully analyzed roles for John Doe
Analyzing roles for team member: Jane Smith
Successfully analyzed roles for Jane Smith
Analyzing roles for team member: Mike Johnson
Successfully analyzed roles for Mike Johnson
Synchronous Results:
{
  "John Doe": [
    "Software Developer",
    "DevOps Engineer",
    "Full Stack Developer",
    "Backend Developer",
    "Cloud Engineer",
    "Technical Lead",
    "Web Developer",
    "Quality Assurance Engineer",
    "IT Project Manager"
  ],
  "Jane Smith": [
    "UX Designer",
    "UI Designer",
    "User Experience Researcher",
    "Interaction Designer",
    "Visual Designer",
    "Product Designer",
    "Design Consultant",
    "Human-Centered Design Specialist"
  ],
  "Mike Johnson": [
    "Project Manager",
    "Scrum Master",
    "Risk Manager",
    "IT Manager",
    "Operations Manager",
    "Business Analyst",
    "Team Lead",
    "Program Manager",
    "Portfolio Manager"
  ]
}
