In [0]:

%pip install numpy==1.24.4 --no-cache-dir
%pip install --force-reinstall --no-deps langchain langchain-community langchain-openai
%pip install databricks-sdk
%pip install -U duckduckgo-search
%pip install langchain-core
%pip install langchain-text-splitters
%pip install aiohttp
dbutils.library.restartPython()  # Restart the kernel after installing


In [0]:
%pip install -U langchain
dbutils.library.restartPython() 

In [0]:
%pip install openpyxl
dbutils.library.restartPython() 

In [0]:
import re
import json
import pandas as pd
from langchain.chat_models import ChatDatabricks
from langchain.agents import initialize_agent, Tool, AgentType
from langchain.prompts import PromptTemplate

# -------------------------------------
# ✅ CONFIG
# -------------------------------------
TRANSCRIPT_FILE = "/Workspace/Users/rajesh.ghosh@xebia.com/GenAi/GenAI codes/Agentic Agile Maturity Assesment/Maturity Assessment/transcripts.txt"
OUTPUT_EXCEL_FILE = "/Workspace/Users/rajesh.ghosh@xebia.com/GenAi/GenAI codes/Agentic Agile Maturity Assesment/Maturity Assessment/agile_maturity_assessment.xlsx"
OUTPUT_JSON_FILE = "/Workspace/Users/rajesh.ghosh@xebia.com/GenAi/GenAI codes/Agentic Agile Maturity Assesment/Maturity Assessment/agile_maturity_assessment.json"
DATABRICKS_LLM_ENDPOINT = "databricks-llama-4-maverick"

# -------------------------------------
# ✅ LLM Setup
# -------------------------------------
llm = ChatDatabricks(
    endpoint=DATABRICKS_LLM_ENDPOINT,
    temperature=0.3,
    max_tokens=1000
)

# -------------------------------------
# ✅ Agile Maturity Dimensions
# -------------------------------------
dimensions = [
    "Sprint Planning",
    "Team Collaboration",
    "CI/CD & DevOps",
    "Stakeholder Engagement",
    "Retrospectives"
]

# -------------------------------------
# ✅ Prompt Template
# -------------------------------------
prompt_template = PromptTemplate.from_template("""
You are an Agile consultant.

Your task: evaluate the Agile maturity for the following dimension: "{dimension}" based strictly on this transcript:
{transcript}

⚠️ Follow this exact format:
Score: <number from 1 to 5>
Justification: <one short paragraph>

Do not return Thought, Action, Observation, or Markdown.
Do not explain your steps — only return the final score and justification in plain text.
""")

# -------------------------------------
# ✅ Read Transcript File
# -------------------------------------
def read_transcript(path):
    with open(path, "r") as file:
        return file.read()

transcript = read_transcript(TRANSCRIPT_FILE)

# -------------------------------------
# ✅ Tool Function per Dimension
# -------------------------------------
def assess_agile_dimension(input: str) -> str:
    dim, txt = input.split("||")
    prompt = prompt_template.format(dimension=dim.strip(), transcript=txt.strip())
    return llm.invoke(prompt).content

tools = [
    Tool(
        name=dim,
        func=lambda input, dim=dim: assess_agile_dimension(f"{dim}||{input}"),
        description=f"Assess Agile Maturity for {dim}"
    )
    for dim in dimensions
]

# -------------------------------------
# ✅ Agent Execution
# -------------------------------------
agent = initialize_agent(
    tools=tools,
    llm=llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True,
    handle_parsing_errors=True
)

results = {}
for dim in dimensions:
    print(f"\n🔍 Evaluating: {dim}")
    result = agent.run(f"Evaluate {dim} based on this transcript: {transcript}")
    results[dim] = result

# -------------------------------------
# ✅ Improved Final Output Parsing
# -------------------------------------
def extract_final_answer_v2(text):
    """
    Extracts Score and Justification from LLM output text, handling various formatting issues and edge cases.
    """
    if not text or "Agent stopped" in text or "error" in text.lower():
        return None, "❌ Agent failed or timed out."

    # Normalize text: remove extra whitespace and markdown symbols
    #text = re.sub(r'\s+', ' ', text.strip())
    text = re.sub(r'[\*\#]+', '', text)  # Remove markdown symbols like ** or #

    # 1. Extract Score: Try multiple formats
    score = None
    # Primary format: Score: X or score X
    score_match = re.search(r'score\s*[:\-]?\s*(\d(\.\d)?)\b', text, re.IGNORECASE)
    if score_match:
        score = float(score_match.group(1))
    
    # Fallback 1: Search for score in text like "is X" or "level of X"
    if not score_match:
        score_match = re.search(r'\b(is|level\s+of|rated\s+a)\s*(\d(\.\d)?)\b', text, re.IGNORECASE)
        if score_match:
            score = float(score_match.group(2))
    
    # Fallback 2: Any standalone number between 1 and 5 before justification/reasoning
    if not score_match:
        score_match = re.search(r'\b(\d(\.\d)?)\b(?=.*?(?:justification|reasoning))', text, re.IGNORECASE)
        if score_match:
            score = float(score_match.group(1))

    # Validate score: Ensure it's between 1 and 5
    if score is not None and not 1 <= score <= 5:
        score = None

    # 2. Extract Justification
    justification = "Justification not found"
    justification_match = re.search(r'(?:justification|reasoning)\s*[:\-]?\s*(.+?)(?=\s*(?:score|$))', text, re.IGNORECASE | re.DOTALL)
    
    if justification_match:
        justification = justification_match.group(1).strip()
    else:
        # Fallback: Take text after score or last non-empty paragraph
        if score_match:
            post_score = text[score_match.end():].strip()
            if post_score:
                justification = post_score
        else:
            # If no score, take the last non-empty paragraph
            paragraphs = [p.strip() for p in text.split('\n') if p.strip()]
            justification = paragraphs[-1] if paragraphs else justification

    # Clean justification: Remove embedded score and normalize
    if score is not None:
        justification = re.sub(rf'\b{score}\b', '', justification).strip()
    justification = re.sub(r'\s+', ' ', justification.strip())

    # Special case: If no practices mentioned (e.g., CI/CD & DevOps), assign score 1
    if score is None and "does not contain relevant information" in justification.lower():
        score = 1.0
        justification = justification.replace("Invalid or missing score: ", "")

    # If no valid score was found, update justification to indicate the issue
    if score is None and "failed or timed out" not in justification.lower():
        justification = f"Invalid or missing score: {justification}"

    # Ensure score is None if justification indicates failure
    if "failed or timed out" in justification.lower():
        score = None

    return score, justification

# -------------------------------------
# ✅ Format & Save to JSON
# -------------------------------------
output_data = []
for dim, result in results.items():
    score, justification = extract_final_answer_v2(result)
    output_data.append({
        "Dimension": dim,
        "Score": score,
        "Justification": justification
    })

# Save JSON output to file
with open(OUTPUT_JSON_FILE, "w") as f:
    json.dump(output_data, f, indent=4)

print(f"\n✅ JSON saved at: {OUTPUT_JSON_FILE}")

# -------------------------------------
# ✅ Optional: Save to Excel
# -------------------------------------
df = pd.DataFrame(output_data)
df.to_excel(OUTPUT_EXCEL_FILE, index=False)
print(f"\n✅ Excel saved at: {OUTPUT_EXCEL_FILE}")

In [0]:
# -------------------------------------
# ✅ Convert JSON to Excel
# -------------------------------------
df_from_json = pd.read_json(OUTPUT_JSON_FILE)
df_from_json.to_excel(OUTPUT_EXCEL_FILE, index=False)

print(f"✅ Excel also saved at: {OUTPUT_EXCEL_FILE}")