## Agent 1 - Orchestration - Work to identify the intent and choose the correct file 

In [1]:
intent_examples = {
    "client_program": [
        "Which learners are enrolled in client X's program?",
        "List programs mapped to each learner",
    ],
    "attendance": [
        "What is the attendance of learner John?",
        "Show me attendance data for March",
    ],
    "content_progress": [
        "Which learners have completed module 2?",
        "How many learners are in progress with the content?",
    ],
    "progress_report": [
        "Give me the progress status of each learner",
        "Who hasn't completed the required modules yet?",
    ],
    "score_report": [
        "What are the scores for learners in the final test?",
        "Show me the average score per learner",
    ]
}


In [2]:
# Use a pipeline as a high-level helper
from transformers import pipeline

classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

def get_intent(query):
    labels = list(intent_examples.keys())
    result = classifier(query, labels)
    return result['labels'][0]  # Most likely intent


  from .autonotebook import tqdm as notebook_tqdm
Device set to use cpu


In [3]:
def route_to_file(intent):
    file_mapping = {
        "client_program": "client_program_learner.csv",
        "attendance": "learner_attendence.csv",
        "content_progress": "progress_content_deploy.csv",
        "progress_report": "progress_report.csv",
        "score_report": "score_report.csv"
    }
    return f"..//preparing_data/{file_mapping[intent]}"


In [4]:
def handle_query(query):
    intent = get_intent(query)
    file_path = route_to_file(intent)
    return {
        "intent": intent,
        "file_path": file_path
    }


In [5]:
Client_name = 'Fractal Analytics'
Program_code = 'data-science-hackathon-march-25'

query = "How many learners are enrolled?"

prompt = f"""
    Client: {Client_name}
    Program: {Program_code}
    Query: {query}
    """

print(handle_query(prompt))

{'intent': 'client_program', 'file_path': '..//preparing_data/client_program_learner.csv'}


In [25]:
import json
import os
from transformers import pipeline

def handle_query(query):
    # Load the intent examples from the JSON file
    with open(r"C:\Users\shuchismita_mallick.Shuchismita\GenAI-Projects\Operation AI Agent\data\intent.json", 'r') as file:
        intent_examples = json.load(file)

    # Initialize the zero-shot classifier
    classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

    # Function to get the most likely intent based on the query
    def get_intent(query):
        labels = list(intent_examples.keys())  # Labels (intents) from the loaded JSON
        result = classifier(query, labels)
        return result['labels'][0]  # Most likely intent

    # Function to map the intent to the corresponding file path
    def route_to_file(intent):
        file_mapping = {
            "client_program": "client_program_learner.csv",
            "attendance": "learner_attendence.csv",
            "content_progress": "progress_content_deploy.csv",
            "progress_report": "progress_report.csv",
            "score_report": "score_report.csv"
        }
        
        # Base directory
        base_dir = r"C:\Users\shuchismita_mallick.Shuchismita\GenAI-Projects\Operation AI Agent\data"
        
        # Use os.path.join() to correctly construct the full file path
        return os.path.join(base_dir, file_mapping.get(intent, 'default_file.csv'))

    # Get the intent from the query
    intent = get_intent(query)
    
    # Get the file path for the identified intent
    file_path = route_to_file(intent)
    
    # Return the intent and corresponding file path
    return {
        "intent": intent,
        "file_path": file_path
    }


In [26]:
# Example usage:
query = "What is the status of content progress?"
response = handle_query(query)
print(response)

Device set to use cpu


{'intent': 'content_progress', 'file_path': 'C:\\Users\\shuchismita_mallick.Shuchismita\\GenAI-Projects\\Operation AI Agent\\data\\progress_content_deploy.csv'}


In [None]:
import json
import os
from transformers import pipeline

def handle_query(query):
    # Load the intent examples from the JSON file
    intent_examples = {
        "client_program": [
            "Which learners are enrolled in client X's program?",
            "List programs mapped to each learner",
        ],
        "attendance": [
            "What is the attendance of learner John?",
            "Show me attendance data for March",
        ],
        "content_progress": [
            "Which learners have completed module 2?",
            "How many learners are in progress with the content?",
        ],
        "progress_report": [
            "Give me the progress status of each learner",
            "Who hasn't completed the required modules yet?",
        ],
        "score_report": [
            "What are the scores for learners in the final test?",
            "Show me the average score per learner",
        ]
    }

    # Initialize the zero-shot classifier
    classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

    # Function to get the most likely intent based on the query
    def get_intent(query):
        # Flatten all example questions into a list of labels
        labels = []
        for intent, examples in intent_examples.items():
            labels.extend(examples)  # Add all example questions for each intent
        
        # Perform zero-shot classification
        result = classifier(query, labels)

        # Match the most likely label to an intent
        matched_label = result['labels'][0]
        
        # Map the matched label back to the intent
        for intent, examples in intent_examples.items():
            if matched_label in examples:
                return intent  # Return the matching intent
        
        return "Unknown"  # If no match is found, return Unknown

    # Function to map the intent to the corresponding file path
    def route_to_file(intent):
        file_mapping = {
            "client_program": "client_program_learner.csv",
            "attendance": "learner_attendence.csv",
            "content_progress": "progress_content_deploy.csv",
            "progress_report": "progress_report.csv",
            "score_report": "score_report.csv"
        }

        # Base directory
        base_dir = r"C:\Users\shuchismita_mallick.Shuchismita\GenAI-Projects\Operation AI Agent\data"

        # Use os.path.join() to correctly construct the full file path
        return os.path.join(base_dir, file_mapping.get(intent, 'default_file.csv'))

    # Get the intent from the query
    intent = get_intent(query)

    # Get the file path for the identified intent
    file_path = route_to_file(intent)

    # Return the intent and corresponding file path
    return {
        "intent": intent,
        "file_path": file_path
    }


In [51]:
import json
import os
from transformers import pipeline

def handle_query(query):
    # Load the intent examples from the JSON file
    intent_examples = {
        "client_program": [
            "Which learners are enrolled in client X's program?",
            "List programs mapped to each learner",
        ],
        "attendance": [
            "What is the attendance of learner John?",
            "Show me attendance data for March",
        ],
        "content_progress": [
            "Which learners have completed module 2?",
            "How many learners are in progress with the content?",
        ],
        "progress_report": [
            "Give me the progress status of each learner",
            "Who hasn't completed the required modules yet?",
        ],
        "score_report": [
            "What are the scores for learners in the final test?",
            "Show me the average score per learner",
        ]
    }

    # Initialize the zero-shot classifier
    classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

    # Function to get the most likely intent based on the query
    def get_intent(query):
        # Flatten all example questions into a list of labels
        labels = []
        for intent, examples in intent_examples.items():
            labels.extend(examples)  # Add all example questions for each intent
        
        # Perform zero-shot classification
        result = classifier(query, labels)

        # Match the most likely label to an intent
        matched_label = result['labels'][0]
        
        # Map the matched label back to the intent
        for intent, examples in intent_examples.items():
            if matched_label in examples:
                return intent  # Return the matching intent
        
        return "Unknown"  # If no match is found, return Unknown

    # Function to map the intent to the corresponding file path
    def route_to_file(intent):
        file_mapping = {
            "client_program": "client_program_learner.csv",
            "attendance": "learner_attendence.csv",
            "content_progress": "progress_content_deploy.csv",
            "progress_report": "progress_report.csv",
            "score_report": "score_report.csv"
        }

        # Base directory
        base_dir = r"C:\Users\shuchismita_mallick.Shuchismita\GenAI-Projects\Operation AI Agent\data"

        # Use os.path.join() to correctly construct the full file path
        return os.path.join(base_dir, file_mapping.get(intent, 'default_file.csv'))

    # Get the intent from the query
    intent = get_intent(query)

    # Get the file path for the identified intent
    file_path = route_to_file(intent)

    # Return the intent and corresponding file path
    return {
        "intent": intent,
        "file_path": file_path
    }


In [52]:
Client_name = 'Fractal Analytics'
Program_code = 'data-science-hackathon-march-25'

query = "Show me the average score per learner"

prompt = f"""
    Client: {Client_name}
    Program: {Program_code}
    Query: {query}
    """

print(handle_query(prompt))

Device set to use cpu


{'intent': 'score_report', 'file_path': 'C:\\Users\\shuchismita_mallick.Shuchismita\\GenAI-Projects\\Operation AI Agent\\data\\score_report.csv'}


## Agent 2 - Query Agent - Work to implement the query and provide the raw response

In [6]:
import os

In [34]:
import os
#from dotenv import load_dotenv
from langchain_experimental.agents import create_csv_agent
from langchain_anthropic import ChatAnthropic

#load_dotenv()

In [None]:
# Initialize the LLM
llm = ChatAnthropic(model="claude-3-5-sonnet-20241022")

In [18]:
import pandas as pd
df = pd.read_csv(r'C:\Users\shuchismita_mallick.Shuchismita\GenAI-Projects\Operation AI Agent\data\progress_content_deploy.csv')

In [37]:
def agent(prompt):
    # Get the file path based on the intent
    file_path = handle_query(prompt)['file_path']
    
    # Check if the file exists
    if os.path.exists(file_path):
        # File exists, create the agent
        agent = create_csv_agent(llm=llm, path=file_path, verbose=True, allow_dangerous_code=True, handle_parsing_errors=True)
        
        # Use the agent to answer the query
        answer = agent.run(prompt)
        
        return answer
    else:
        # File does not exist, return an error message
        return "Sorry, we don't have any information regarding this."


In [48]:
Client_name = 'Fractal Analytics'
Program_code = 'data-science-hackathon-march-25'

query = "Show me the average score per learner"

prompt = f"""
    Client: {Client_name}
    Program: {Program_code}
    Query: {query}
    """

In [49]:
agent(prompt)



[1m> Entering new AgentExecutor chain...[0m


APIStatusError: {'type': 'error', 'error': {'details': None, 'type': 'overloaded_error', 'message': 'Overloaded'}}

In [50]:
handle_query(prompt)

Device set to use cpu


{'intent': 'client_program',
 'file_path': 'C:\\Users\\shuchismita_mallick.Shuchismita\\GenAI-Projects\\Operation AI Agent\\data\\client_program_learner.csv'}

In [45]:

file_path = r'C:\Users\shuchismita_mallick.Shuchismita\GenAI-Projects\Operation AI Agent\data\progress_report.csv'
agent = create_csv_agent(llm=llm, path=file_path, verbose=True, allow_dangerous_code=True, handle_parsing_errors=True)
answer = agent.run(prompt)
answer





[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI notice I need to:
1. Filter the data for Fractal Analytics client
2. Filter for specific program (looking for program code containing "data-science-hackathon-march-25")
3. Count how many learners have status "SUBMITTED"

Let me execute these steps:

Thought: First, let's check if we have any entries for this specific program for Fractal Analytics

Action: python_repl_ast
Action Input: df[(df['Client_name'] == 'Fractal Analytics') & (df['code'].str.contains('data-science-hackathon-march-25', na=False))].shape[0]
[32;1m[1;3mLet me help you complete this analysis. I can see there are entries for Fractal Analytics, but let's count specifically how many learners have "SUBMITTED" status for this specific program.

Action: python_repl_ast
Action Input: df[(df['Client_name'] == 'Fractal Analytics') & 
   (df['code'].str.contains('data-science-hackathon-march-25', na=False)) & 
   (df['status'] == 'SUBMITTED')]['learner_id'].nuniq

'For Fractal Analytics\' data-science-hackathon-march-25 program, 35 learners have submitted their content.\n\nThis was determined by:\n1. Filtering for Fractal Analytics clients\n2. Filtering for the specific program code containing "data-science-hackathon-march-25"\n3. Counting unique learners (using learner_id) who have a "SUBMITTED" status\n4. The result shows 35 unique learners have submitted their content in this program.'

## Testing the agent

In [53]:
import json
import os
from transformers import pipeline

def handle_query(query):
    # Load the intent examples from the JSON file
    intent_examples = {
        "client_program": [
            "Which learners are enrolled in client X's program?",
            "List programs mapped to each learner",
        ],
        "attendance": [
            "What is the attendance of learner John?",
            "Show me attendance data for March",
        ],
        "content_progress": [
            "Which learners have completed module 2?",
            "How many learners are in progress with the content?",
        ],
        "progress_report": [
            "Give me the progress status of each learner",
            "Who hasn't completed the required modules yet?",
        ],
        "score_report": [
            "What are the scores for learners in the final test?",
            "Show me the average score per learner",
        ]
    }

    # Initialize the zero-shot classifier
    classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

    # Function to get the most likely intent based on the query
    def get_intent(query):
        # Flatten all example questions into a list of labels
        labels = []
        for intent, examples in intent_examples.items():
            labels.extend(examples)  # Add all example questions for each intent
        
        # Perform zero-shot classification
        result = classifier(query, labels)

        # Match the most likely label to an intent
        matched_label = result['labels'][0]
        
        # Map the matched label back to the intent
        for intent, examples in intent_examples.items():
            if matched_label in examples:
                return intent  # Return the matching intent
        
        return "Unknown"  # If no match is found, return Unknown

    # Function to map the intent to the corresponding file path
    def route_to_file(intent):
        file_mapping = {
            "client_program": "client_program_learner.csv",
            "attendance": "learner_attendence.csv",
            "content_progress": "progress_content_deploy.csv",
            "progress_report": "progress_report.csv",
            "score_report": "score_report.csv"
        }

        # Base directory
        base_dir = r"C:\Users\shuchismita_mallick.Shuchismita\GenAI-Projects\Operation AI Agent\data"

        # Use os.path.join() to correctly construct the full file path
        return os.path.join(base_dir, file_mapping.get(intent, 'default_file.csv'))

    # Get the intent from the query
    intent = get_intent(query)

    # Get the file path for the identified intent
    file_path = route_to_file(intent)

    # Return the intent and corresponding file path
    return {
        "intent": intent,
        "file_path": file_path
    }


In [54]:
Client_name = 'Fractal Analytics'
Program_code = 'data-science-hackathon-march-25'

query = "Show me the average score per learner"

prompt = f"""
    Client: {Client_name}
    Program: {Program_code}
    Query: {query}
    """

print(handle_query(prompt))

Device set to use cpu


{'intent': 'score_report', 'file_path': 'C:\\Users\\shuchismita_mallick.Shuchismita\\GenAI-Projects\\Operation AI Agent\\data\\score_report.csv'}


In [None]:
import os
from langchain_experimental.agents import create_csv_agent
from langchain_anthropic import ChatAnthropic


# Initialize the LLM (Claude 3)
llm = ChatAnthropic(model="claude-3-5-sonnet-20241022")

def agent(prompt):
    """
    This function handles the incoming prompt, fetches the corresponding file path based on intent,
    and returns the answer by utilizing the agent.
    """
    # Get the file path based on the intent extracted from the query
    file_path = handle_query(prompt)['file_path']
    
    # Check if the file exists at the determined path
    if os.path.exists(file_path):
        # File exists, create the CSV agent
        agent = create_csv_agent(
            llm=llm, 
            path=file_path, 
            verbose=True, 
            allow_dangerous_code=True, 
            handle_parsing_errors=True
        )
        
        # Use the agent to answer the query
        answer = agent.run(prompt)
        
        return answer
    else:
        # File does not exist, return an error message
        return "Sorry, we don't have any information regarding this."

In [56]:
agent(prompt)

Device set to use cpu




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mLet me help you analyze the average scores for learners in the data science hackathon program for Fractal Analytics.

Thought: I need to:
1. Filter the dataframe for Fractal Analytics and data-science-hackathon-march-25
2. Calculate the average of total_overall_score for each learner_id

Action: python_repl_ast
Action Input: 
df_filtered = df[
    (df['Client_name'] == 'Fractal Analytics') & 
    (df['code'] == 'data-science-hackathon-march-25')
]
avg_scores = df_filtered.groupby('learner_name')['total_overall_score'].mean().reset_index()
print(avg_scores)
[0m[36;1m[1;3m                     learner_name  total_overall_score
0                Aaradhya  Wadhwa                 80.0
1                Abhilash  Biswas                 80.0
2              Abira  Chakrabarty                 80.0
3                Aditya  Lahariya                 80.0
4   Adityanarayanan  Madhusudanan                 80.0
..                           

'For the data science hackathon program at Fractal Analytics, all 96 learners received a score of 80.0, making the average score 80.0 across all participants. This uniform scoring suggests it might be a participation score or initial assessment score given to all participants.'