In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import superpandas as spd
import matplotlib.pyplot as plt

# Create a sample DataFrame
data = {
    'name': ['Alice', 'Bob', 'Charlie', 'Diana', 'Eve'],
    'age': [25, 30, 35, 28, 32],
    'salary': [50000, 60000, 75000, 55000, 70000],
    'department': ['Engineering', 'Marketing', 'Engineering', 'Sales', 'Engineering']
}

df = pd.DataFrame(data)

# Add metadata using SuperPandas
df.super.name = "Employee Data"
df.super.description = "Sample employee data with names, ages, salaries, and departments"
df.super.set_column_descriptions({
    'name': 'Employee name',
    'age': 'Employee age in years',
    'salary': 'Annual salary in USD',
    'department': 'Employee department'
})

print("DataFrame created with metadata:")
print(f"Name: {df.super.name}")
print(f"Description: {df.super.description}")
print("\nData:")
print(df)


In [None]:
# Example 1: Basic analysis
print("Query: Show the first 3 rows")

result = df.super.analyze_with_agent(
    query="Show the first 3 rows",
    max_iterations=2
)

print("\nAgent Result:")
print(f"Generated Code:\n{result.get('generated_code', 'N/A')}")
print(f"\nResult:\n{result.get('result', 'N/A')}")
print(f"\nFormatted Response:\n{result.get('formatted_response', 'N/A')}")


In [None]:
# Example 2: Statistical analysis
print("Query: Calculate the average salary by department")

result = df.super.analyze_with_agent(
    query="Calculate the average salary by department",
    max_iterations=3
)

print("\nAgent Result:")
print(f"Generated Code:\n{result.get('generated_code', 'N/A')}")
print(f"\nResult:\n{result.get('result', 'N/A')}")
print(f"\nFormatted Response:\n{result.get('formatted_response', 'N/A')}")


In [None]:
# Example 3: Visualization
print("Query: Create a bar chart of average age by department")

result = df.super.analyze_with_agent(
    query="Create a bar chart of average age by department",
    max_iterations=3
)

print("\nAgent Result:")
print(f"Generated Code:\n{result.get('generated_code', 'N/A')}")
print(f"\nResult:\n{result.get('result', 'N/A')}")
print(f"\nFormatted Response:\n{result.get('formatted_response', 'N/A')}")

if result.get('fig'):
    plt.show()


In [None]:
# Example 4: Complex analysis
print("Query: Find employees with salary above the median and age below 30")

result = df.super.analyze_with_agent(
    query="Find employees with salary above the median and age below 30",
    max_iterations=3
)

print("\nAgent Result:")
print(f"Generated Code:\n{result.get('generated_code', 'N/A')}")
print(f"\nResult:\n{result.get('result', 'N/A')}")
print(f"\nFormatted Response:\n{result.get('formatted_response', 'N/A')}")


In [None]:
# Direct agent usage example
from superpandas.langgraph_agent import run_agent

print("Query: Find the highest paid employee in each department")

result = run_agent(
    query="Find the highest paid employee in each department",
    dataframe=df,
    max_iterations=3
)

print("\nAgent Result:")
print(f"Generated Code:\n{result.get('generated_code', 'N/A')}")
print(f"\nResult:\n{result.get('result', 'N/A')}")
print(f"\nFormatted Response:\n{result.get('formatted_response', 'N/A')}")


In [None]:
# Error handling example
print("Query: Calculate the percentage of total salary for a non-existent column")

result = df.super.analyze_with_agent(
    query="Calculate the percentage of total bonus for each employee",
    max_iterations=3
)

print("\nAgent Result:")
print(f"Generated Code:\n{result.get('generated_code', 'N/A')}")
print(f"\nError:\n{result.get('error', 'N/A')}")
print(f"\nFormatted Response:\n{result.get('formatted_response', 'N/A')}")
