<a href="https://colab.research.google.com/github/passionforcodez/Python/blob/main/BuildingFileAgent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import pandas as pd
from IPython.display import Markdown, HTML, display

In [None]:
%pip install -q -U google-genai

In [None]:
import os
from google.colab import userdata # Colab utility to access secrets

# --- 1. Load API Key Securely ---
# This line loads the key named GEMINI_API_KEY from your Colab secrets
api_key = userdata.get('GEMINI_API_KEY')

In [None]:
import os
from google import genai

# Ensure your GEMINI_API_KEY is set in os.environ before this cell runs!

# 1. Initialize the client (assuming GEMINI_API_KEY is set in environment)
client = genai.Client()

# 2. Define the translation prompt
original_sentence = "I like red cars and blue houses, but my dog is yellow."
prompt = (
    "Translate the following English sentence to both Hindi and Marathi. "
    "Format your response clearly, labeling each translation:\n\n"
    f"English: {original_sentence}"
)

# 3. Call the API using the correct path: client.models.generate_content
response = client.models.generate_content(
    model="gemini-2.5-flash",
    contents=prompt
)

# 4. Print the result
print(response.text)

Here are the translations of your English sentence into Hindi and Marathi:

**English:** I like red cars and blue houses, but my dog is yellow.

---

**Hindi Translation:**

मुझे लाल गाड़ियाँ और नीले घर पसंद हैं, लेकिन मेरा कुत्ता पीला है।
*(Mujhe laal gaadiyan aur neele ghar pasand hain, lekin mera kutta peela hai.)*

---

**Marathi Translation:**

मला लाल गाड्या आणि निळी घरे आवडतात, पण माझा कुत्रा पिवळा आहे.
*(Mala laal gaadya aani nili ghare aavadtat, pan maajha kutra pivla aahe.)*


In [None]:
 #Lesson 2: Interacting with a CSV Data

In [None]:
# 1. Load the CSV Data using the specific file path
file_path = 'sample_data/sales_data.csv'
df = pd.read_csv(file_path)

print("File loaded successfully!")

File loaded successfully!


In [None]:
df = pd.read_csv(file_path) .fillna(value = 0)

In [None]:
# using langchain
# agent = create_pandas_dataframe_agent(llm=model,df=df,verbose=True)

In [None]:
# Assuming your df is loaded in a previous cell
print(df.head())
print(df.columns)

   order_id customer_id  customer_name    product_name         category  \
0      1001       C2501     John Smith          Laptop      Electronics   
1      1002       C2502   Emma Johnson      Desk Chair        Furniture   
2      1003       C2503  Michael Brown    Coffee Maker       Appliances   
3      1004       C2501     John Smith  Wireless Mouse      Electronics   
4      1005       C2504    Sarah Davis    Notebook Set  Office Supplies   

   quantity  unit_price  total_price  order_date region      status  
0         1      899.99       899.99  2024-01-15  North   Delivered  
1         2      149.99       299.98  2024-01-16  South   Delivered  
2         1       79.99        79.99  2024-01-17   East     Shipped  
3         3       24.99        74.97  2024-01-18  North   Delivered  
4         5       12.99        64.95  2024-01-19   West  Processing  
Index(['order_id', 'customer_id', 'customer_name', 'product_name', 'category',
       'quantity', 'unit_price', 'total_price', 'o

In [None]:
import pandas as pd
from google import genai
from google.genai import types
import os
import json

# Assume df and execute_dataframe_code are correctly defined in your Colab session
# You must run the code block that defines df and the function first!

# --- 3. Initialize Client and Agent Logic (The critical part) ---
client = genai.Client()

# --- REVISED SYSTEM INSTRUCTION (Kept strong) ---
system_instruction = (
    "You are an expert data analyst. For ALL data analysis questions, "
    "you MUST use the 'execute_dataframe_code' tool. DO NOT answer questions "
    "with text until you receive the output from the tool."
    "The data is in a pandas DataFrame named 'df'. The relevant column names are: "
    "The price of a single item is in the column 'unit_price'. "
    "The revenue/sales amount is in the column 'total_price'. "
    "Write a short, complete Python script that calculates ALL requested metrics. "
    "The final calculated result MUST be assigned to a variable named 'result' "
    "and should be a single dictionary or string containing all values."
)

user_question = "What is the total revenue and the median price of all products?"

# First call: Ask the model to generate the code
response = client.models.generate_content(
    model='gemini-2.5-flash',
    contents=user_question,
    config=types.GenerateContentConfig(
        tools=[execute_dataframe_code],
        system_instruction=system_instruction,
        # 🎯 THE CRUCIAL ADDITION TO FORCE TOOL USE:
        tool_config=types.ToolConfig(
            function_calling_config=types.FunctionCallingConfig(
                mode="ANY"
            )
        )
    )
)

# Check if the model generated a tool call
if response.function_calls:
    function_call = response.function_calls[0]

    # Execute the code generated by the model
    tool_output = execute_dataframe_code(function_call.args['code'])
    print(f"\n🧠 Model Generated Code:\n{function_call.args['code']}\n")
    print(f"🛠️ Tool Output:\n{tool_output}\n")

    # Second call: Send the output back to the model for a final, natural language response
    final_response = client.models.generate_content(
      model='gemini-2.5-flash',
      contents=[
        # 1. User Turn: (Unchanged, uses simple text dictionary)
        {"role": "user", "parts": [{"text": user_question}]},

        # 2. Model Turn: **CRITICAL FIX HERE** - Explicitly format the function call dictionary
        {
            "role": "model",
            "parts": [
                {
                    "functionCall": {
                        "name": function_call.name,
                        "args": function_call.args
                    }
                }
            ]
        },

        # 3. Tool Turn: (The function response structure is correct)
        {
            "role": "tool",
            "parts": [
                {
                    "functionResponse": {
                        "name": "execute_dataframe_code",
                        "response": {"result": tool_output}
                    }
                }
              ]
          }
      ]
    )
    print(f"✅ Agent's Final Answer: {final_response.text}")

else:
    print(f"\n❌ Tool Use Failed: Agent returned direct text response (Model skipped tool use).")
    print(f"🤖 Direct Response: {response.text}")


🧠 Model Generated Code:
total_revenue = df["total_price"].sum()
median_price = df["unit_price"].median()
result = {"total_revenue": total_revenue, "median_price": median_price}

🛠️ Tool Output:
{'total_revenue': np.float64(3280.54), 'median_price': 59.989999999999995}

✅ Agent's Final Answer: The total revenue of all products is $3280.54 and the median price is $59.99.


In [None]:
import pandas as pd
from google import genai
from google.genai import types
import os
import json

# Assume df and execute_dataframe_code are correctly defined in your Colab session
# You must run the code block that defines df and the function first!

# --- 3. Initialize Client and Agent Logic (The critical part) ---
client = genai.Client()

# --- REVISED SYSTEM INSTRUCTION (Kept strong) ---
system_instruction = (
    "You are an expert data analyst. For ALL data analysis questions, "
    "you MUST use the 'execute_dataframe_code' tool. DO NOT answer questions "
    "with text until you receive the output from the tool."
    "The data is in a pandas DataFrame named 'df'. The relevant column names are: "
    "The price of a single item is in the column 'unit_price'. "
    "The revenue/sales amount is in the column 'total_price'. "
    "Write a short, complete Python script that calculates ALL requested metrics. "
    "The final calculated result MUST be assigned to a variable named 'result' "
    "and should be a single dictionary or string containing all values."
)

user_question = "List the total revenue broken down by category - Electronics."

# First call: Ask the model to generate the code
response = client.models.generate_content(
    model='gemini-2.5-flash',
    contents=user_question,
    config=types.GenerateContentConfig(
        tools=[execute_dataframe_code],
        system_instruction=system_instruction,
        # 🎯 THE CRUCIAL ADDITION TO FORCE TOOL USE:
        tool_config=types.ToolConfig(
            function_calling_config=types.FunctionCallingConfig(
                mode="ANY"
            )
        )
    )
)

# Check if the model generated a tool call
if response.function_calls:
    function_call = response.function_calls[0]

    # Execute the code generated by the model
    tool_output = execute_dataframe_code(function_call.args['code'])
    print(f"\n🧠 Model Generated Code:\n{function_call.args['code']}\n")
    print(f"🛠️ Tool Output:\n{tool_output}\n")

    # Second call: Send the output back to the model for a final, natural language response
    final_response = client.models.generate_content(
      model='gemini-2.5-flash',
      contents=[
        # 1. User Turn: (Unchanged, uses simple text dictionary)
        {"role": "user", "parts": [{"text": user_question}]},

        # 2. Model Turn: **CRITICAL FIX HERE** - Explicitly format the function call dictionary
        {
            "role": "model",
            "parts": [
                {
                    "functionCall": {
                        "name": function_call.name,
                        "args": function_call.args
                    }
                }
            ]
        },

        # 3. Tool Turn: (The function response structure is correct)
        {
            "role": "tool",
            "parts": [
                {
                    "functionResponse": {
                        "name": "execute_dataframe_code",
                        "response": {"result": tool_output}
                    }
                }
              ]
          }
      ]
    )
    print(f"✅ Agent's Final Answer: {final_response.text}")

else:
    print(f"\n❌ Tool Use Failed: Agent returned direct text response (Model skipped tool use).")
    print(f"🤖 Direct Response: {response.text}")


🧠 Model Generated Code:
result = {"total_revenue_electronics": df[df["category"] == "Electronics"]["total_price"].sum()}

🛠️ Tool Output:
{'total_revenue_electronics': np.float64(1814.8600000000001)}

✅ Agent's Final Answer: The total revenue for the Electronics category is $1814.86.


In [None]:
# integrating langChain with Gemini for dataframe automates.

In [None]:
%pip install -U langchain langchain-google-genai pandas



In [None]:
%pip install langchain-experimental



In [None]:
%pip install langchain



In [None]:
import pandas as pd
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
import os
from google.colab import userdata # Colab utility to access secrets

# --- 1. Load API Key Securely ---
# This line loads the key named GEMINI_API_KEY from your Colab secrets
api_key = userdata.get('GEMINI_API_KEY')

if not api_key:
    # Fallback/Debug: If the secret isn't set, try to get it from the environment (less secure)
    api_key = os.environ.get('GEMINI_API_KEY')
    if not api_key:
        raise ValueError("GEMINI_API_KEY not found. Please set it in Colab Secrets or as an environment variable.")

# 1. Load the DataFrame
# NOTE: Using 'sales_data.csv' as the file path, as 'sample_data/' is the default Colab path.
file_path = 'sample_data/sales_data.csv'
df = pd.read_csv(file_path)

# 2. Define the LLM (LangChain wrapper for Gemini)
# 🎯 FIX: Explicitly pass the API key to bypass the failed GCE credential lookup
llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",
    temperature=0.0,
    google_api_key=api_key # Pass the retrieved key directly
)

# 3. Create the Agent
agent = create_pandas_dataframe_agent(
    llm=llm,
    df=df,
    verbose=True,
    allow_dangerous_code=True
)

# 4. Run a Question
new_question = "List the total revenue broken down by category - Electronics."

# The .invoke() method executes the full thought-process and tool-use loop
response = agent.invoke({"input": new_question})

# Print the final result
print("-" * 30)
print(f"✅ LangChain Agent Answer: {response['output']}")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: python_repl_ast
Action Input: print(df[df['category'] == 'Electronics']['total_price'].sum())[0m[36;1m[1;3m1814.8600000000001
[0m[32;1m[1;3mI now know the final answer
Final Answer: The total revenue for the 'Electronics' category is 1814.86.[0m

[1m> Finished chain.[0m
------------------------------
✅ LangChain Agent Answer: The total revenue for the 'Electronics' category is 1814.86.


**Approach 1: Manual/Official SDK**

This method gave you maximal control over the entire agent loop.

Component	What You Implemented

The Tool	The custom execute_dataframe_code Python function. This function held the DataFrame (df) and executed the AI-generated script against it.

Orchestration	The two-step API call process (request → tool call → execution → response) which required manual construction of the conversation history (types.Content objects).

Authentication	You used the official genai.Client() and types objects.

Key Takeaway	You learned exactly how Gemini's function calling works and how to structure complex multi-turn API conversations.


**Approach 2: LangChain Inbuilt Agent**

This method prioritized simplicity and speed, leveraging LangChain's abstractions.

Component	What LangChain Handled

The Tool	LangChain automatically created and managed the Python REPL tool to execute code against the df.

Orchestration	The entire multi-step process was abstracted away by the agent.invoke() method.

Authentication	You used the ChatGoogleGenerativeAI wrapper, which required careful handling of the google_api_key to avoid the Colab GCE authentication errors.

Key Takeaway	You demonstrated rapid development capabilities, achieving the same result with significantly less custom code once the libraries were correctly configured.


In [None]:
# interactive AI Agent

In [None]:
import pandas as pd
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
import os
from google.colab import userdata # Colab utility to access secrets

# --- 1. SETUP: Load API Key, DataFrame, and LLM (Done only ONCE) ---

print("Initializing Gemini Data Agent...")

try:
    # Load API Key Securely
    api_key = userdata.get('GEMINI_API_KEY')
    if not api_key:
        api_key = os.environ.get('GEMINI_API_KEY')
        if not api_key:
            raise ValueError("GEMINI_API_KEY not found. Please set it in Colab Secrets or as an environment variable.")

    # Load the DataFrame
    file_path = 'sample_data/sales_data.csv'
    df = pd.read_csv(file_path)

    # Define the LLM
    llm = ChatGoogleGenerativeAI(
        model="gemini-2.5-flash",
        temperature=0.0,
        google_api_key=api_key
    )

    # Create the Agent
    agent = create_pandas_dataframe_agent(
        llm=llm,
        df=df,
        verbose=False, # Set to False to keep the chat clean
        allow_dangerous_code=True
    )

    print("✅ Agent ready. Type 'exit' or 'quit' to end the session.")
    print("-" * 30)

except Exception as e:
    print(f"❌ Initialization Failed: {e}")
    # Stop the script if setup fails
    agent = None

# --- 2. INTERACTIVE LOOP (Runs until you type 'exit') ---

if agent:
    while True:
        # Get user input
        user_input = input("❓ Your Data Question: ")

        # Check for exit commands
        if user_input.lower() in ["quit", "exit"]:
            print("👋 Session ended. Goodbye!")
            break

        try:
            # The agent.invoke() call handles the entire question-answering process
            response = agent.invoke({"input": user_input})

            # Print the final answer
            print(f"\n💡 Agent Answer: {response['output']}\n")
            print("-" * 30)

        except Exception as e:
            print(f"\n❌ An error occurred during analysis: {e}\n")
            print("-" * 30)

Initializing Gemini Data Agent...
✅ Agent ready. Type 'exit' or 'quit' to end the session.
------------------------------
❓ Your Data Question: List the total revenue broken down by category - Electronics.

💡 Agent Answer: The total revenue for the 'Electronics' category is 1814.86.

------------------------------
❓ Your Data Question: What is the average total price?

💡 Agent Answer: The average total price is 164.027.

------------------------------
❓ Your Data Question: exit
👋 Session ended. Goodbye!
