# E2B Code Sandbox with Google Gemini
Execute AI-generated Python code in a secure sandbox environment.

## Setup and Imports

In [1]:
"""E2B Code Sandbox with Google Gemini."""
import warnings
warnings.filterwarnings('ignore')

import sys
import os
import base64
import pandas as pd
import time

root_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.getcwd())))
sys.path.append(root_dir)

from dotenv import load_dotenv
load_dotenv()

from e2b_code_interpreter import Sandbox
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.tools import tool
from langchain.agents import create_agent
from langchain.messages import HumanMessage
from langgraph.checkpoint.memory import InMemorySaver

## Initialize Model and Sandbox

In [11]:
# model = ChatGoogleGenerativeAI(model="gemini-2.5-flash")
# model = ChatGoogleGenerativeAI(model="gemini-3-pro-preview")
model = ChatGoogleGenerativeAI(model="gemini-3-flash-preview")

checkpointer = InMemorySaver()

sbx = Sandbox.create(timeout=3_600)
print("Sandbox created")

Sandbox created


## Helper Functions

In [3]:
def get_dataset_info(file_path):
    """Get basic dataset info."""
    if file_path.endswith('.csv'):
        df = pd.read_csv(file_path, nrows=3)
    else:
        df = pd.read_excel(file_path, nrows=3)

    return f"Columns: {list(df.columns)}\nSample data:\n{df.to_string()}"

## Define Tools

In [None]:
@tool
def upload_file(local_file_path: str):
    """Upload a data file to the E2B sandbox for analysis.
    
    Use this tool to upload CSV or Excel files before analyzing them.
    The file will be uploaded to the sandbox and the sandbox path will be returned.
    
    Args:
        local_file_path: Local path to the file (e.g., "./data/IMDB-Movie-Data.csv" or "/apple_2024.xlsx")
        
    Returns:
        Success message with sandbox_path and dataset_info
        
    Example:
        local_file_path = "./data/titanic.csv"
    """
    # Clean the path and ensure it has the data directory
    if local_file_path.startswith('/'):
        local_file_path = local_file_path.lstrip('/')
    
    if not local_file_path.startswith('data/') and not local_file_path.startswith('./data/'):
        local_file_path = f"./data/{local_file_path}"
    
    if not os.path.exists(local_file_path):
        return f"Error: File not found at {local_file_path}"
    
    filename = os.path.basename(local_file_path)
    
    with open(local_file_path, "rb") as f:
        sandbox_file = sbx.files.write(f"data/{filename}", f)
    
    dataset_info = get_dataset_info(local_file_path)
    
    return f"File uploaded successfully!\nSandbox path: {sandbox_file.path}\n{dataset_info}"

@tool
def run_python_code(code: str):
    """Execute Python code in E2B sandbox and save chart outputs.
    
    IMPORTANT: The code parameter must be valid, executable Python code only.
    - Do NOT include markdown formatting (no ```python or ```)
    - Do NOT include explanations or comments outside the code
    - Include all necessary imports (pandas, matplotlib, numpy, etc.)
    - Use the exact dataset path provided in the system prompt
    - For visualizations, end with: display(plt.gcf())

    Args:
        code: Valid executable Python code as a plain string

    Returns:
        Execution result or error message
        
    Example:
        code = "import pandas as pd\\nimport matplotlib.pyplot as plt\\ndf = pd.read_csv('/path/to/data.csv')\\nplt.plot(df['x'], df['y'])\\ndisplay(plt.gcf())"
    """
    print('Running code in sandbox....')
    execution = sbx.run_code(code)
    print('Code execution finished!')

    if execution.error:
        return f"Error: {execution.error.name}\nValue: {execution.error.value}"

    os.makedirs('images', exist_ok=True)
    
    results = []
    timestamp = int(time.time())
    
    print(f"Total results: {len(execution.results)}")
    
    saved_images = set()
    for idx, result in enumerate(execution.results):
        if result.png:
            filename = f'images/{timestamp}_chart-{idx}.png'
            
            # Check if we've already saved this image
            if filename in saved_images:
                print(f"Skipping duplicate: {filename}")
                continue
                
            with open(filename, 'wb') as f:
                f.write(base64.b64decode(result.png))
            saved_images.add(filename)
            results.append(f'Chart saved to {filename}')
            print(f"Saved: {filename}")

    return "\n".join(results) if results else "Code executed successfully"

## Create Agent and Execute Query

In [None]:
system_prompt = """You are a data analysis assistant with access to filesystem search, file upload, and Python code execution tools.

WORKFLOW:
1. First, use filesystem search tools to locate the requested data file in the local filesystem
2. Upload the file using upload_file tool (it will automatically handle the path)
3. The upload_file tool will return the sandbox path and dataset information with column names and sample data
4. ALWAYS read and analyze the actual data - do NOT make assumptions based on file names
5. Use run_python_code tool to analyze the data using the sandbox path

CRITICAL: Data Analysis Rules
- NEVER assume data content from file names
- ALWAYS examine the column names and sample data returned by upload_file
- Read the actual data structure before generating analysis code
- Base your analysis ONLY on what you see in the data, not what you expect

CRITICAL: Visualization Rules
- Do NOT generate plots or charts unless EXPLICITLY requested in the user's question
- Only create visualizations when the user asks for: "plot", "chart", "graph", "visualize", "show", or similar terms
- If no visualization is requested, return only the analysis results (calculations, statistics, tables, etc.)
- When charts ARE requested, use matplotlib with:
  * Clear and descriptive title explaining what the chart shows
  * Labeled x-axis and y-axis with units where applicable
  * Legend if multiple data series are present
  * Appropriate chart type for the data (line, bar, scatter, etc.)
  * MUST end with: display(plt.gcf())

CRITICAL RULES for run_python_code:
1. Generate ONLY executable Python code - no explanations, no markdown, no comments outside code
2. Import all required libraries (pandas, matplotlib, numpy, etc.)
3. Load data from the sandbox path returned by upload_file (e.g., /home/user/data/filename.csv)
4. For plots (ONLY if requested), MUST end with: display(plt.gcf())
5. Use the run_python_code tool with ONLY the code string

Example workflow:
- User asks: "Analyze Apple 2024 earnings" (NO plot requested)
  - Calculate metrics, return numerical results only
  
- User asks: "Plot Apple 2024 revenue trend" (plot requested)
  - Upload file, read data, create matplotlib chart with title and labels
  
- User asks: "Show me Google's cash flow analysis with charts" (chart requested)
  - Upload file, analyze data, create charts with proper titles and axis labels"""


In [6]:
from langchain.agents.middleware import FilesystemFileSearchMiddleware

agent = create_agent(
    model=model,
    tools=[upload_file, run_python_code],
    system_prompt=system_prompt,
    checkpointer=checkpointer,
    middleware=[FilesystemFileSearchMiddleware(
        root_path="./data",
        use_ripgrep=True,
        max_file_size_mb=100
    )]
)



In [7]:
config = {"configurable": {"thread_id": "default"}}

query = "Upload ./data/IMDB-Movie-Data.csv and create a line chart showing average ratings over years"

result = agent.invoke({"messages": [HumanMessage(content=query)]}, config=config)

response = result['messages'][-1].text
print(f"\nResponse:\n{response}")

Running code in sandbox....
Code execution finished!

Response:
I have successfully uploaded the IMDB dataset and created the requested line chart.

The file was found at `/IMDB-Movie-Data.csv` (instead of `./data/IMDB-Movie-Data.csv`) and uploaded to the sandbox.

**Analysis Details:**
- **Dataset:** `IMDB-Movie-Data.csv`
- **Columns Used:** `Year`, `Rating`
- **Operation:** Grouped data by `Year` and calculated the mean `Rating` for each year.

The line chart below visualizes the trend of average movie ratings over the years found in the dataset.

![Average Ratings Over Years](images/1769199835_chart-1.png)


In [8]:
query = "show me how many files are there? print all files."

result = agent.invoke({"messages": [HumanMessage(content=query)]}, config=config)
print(result['messages'][-1].text)

There are **4 files** in the current directory:

*   `/apple_2024.xlsx`
*   `/google_2024.xlsx`
*   `/IMDB-Movie-Data.csv`
*   `/titanic.csv`


In [9]:
query = "Analyze apple 2024 and calculate financial ratios"

result = agent.invoke({"messages": [HumanMessage(content=query)]}, config=config)
print(result['messages'][-1].text)

Running code in sandbox....
Code execution finished!
Running code in sandbox....
Code execution finished!
Running code in sandbox....
Code execution finished!
Running code in sandbox....
Code execution finished!
Running code in sandbox....
Code execution finished!
Running code in sandbox....
Code execution finished!
The analysis of the Apple 2024 financial data (based on the provided Excel file) has been completed.

**Financial Ratios Calculated:**

1.  **Profit Margin:** **24.00%**
    *   Formula: Net Income / Net Sales
    *   This indicates that for every dollar of sales, Apple retains $0.24 as profit.

2.  **Current Ratio:** **0.86**
    *   Formula: Total Current Assets / Total Current Liabilities
    *   A ratio below 1.0 suggests that the company has more current liabilities than current assets, which is common for companies with efficient inventory management and cash flow like Apple.

3.  **Debt-to-Equity Ratio:** **4.21**
    *   Formula: Total Liabilities / Total Shareholde

In [10]:
query = "analyze google cashflow for 2024 and apple cashflow 2024. compare financial ratios. also show comparison table"

result = agent.invoke({"messages": [HumanMessage(content=query)]}, config=config)
print(result['messages'][-1].text)

Running code in sandbox....
Code execution finished!
Running code in sandbox....
Code execution finished!
Running code in sandbox....
Code execution finished!
The analysis of Apple and Google (Alphabet) for the fiscal year 2024 has been completed.

### **Financial Comparison Summary**

| Metric | Apple (2024) | Google (2024) |
| :--- | :--- | :--- |
| **Revenue** | $391.04B | $307.39B |
| **Net Income** | $93.74B | $73.80B |
| **Profit Margin** | **23.97%** | **24.01%** |
| **Current Ratio** | 0.86 | 2.15 |
| **Debt-to-Equity** | 4.21 | 0.43 |
| **Return on Assets (ROA)** | 18.73% | 18.34% |
| **Operating Cash Flow** | $118.26B | $101.75B |
| **Free Cash Flow** | $107.76B | $69.52B |

### **Key Insights:**

*   **Profitability:** Both companies have nearly identical profit margins (~24%), indicating highly efficient operations.
*   **Liquidity (Current Ratio):** Google has a much stronger liquidity position (2.15 vs 0.86), meaning it has significantly more current assets to cover its s

In [12]:
query = """Use data/goodle_2024.xlsx and Analyze the 2024 cash flow and financial performance of Google (Alphabet) and Apple.
Compute key financial ratios for both companies, including Gross Profit Margin,
Net Profit Margin, ROA, ROE, Current Ratio, and Debt-to-Equity Ratio.

Create the following visualizations:
1. A grouped bar chart comparing Apple vs Google for each financial ratio
   (x-axis: ratio names, y-axis: ratio values, separate bars for Apple and Google).
2. A comparison table summarizing all calculated ratios side by side.

Add clear titles, axis labels, legends, and use distinct colors for each company.
After generating the plots and table, interpret what the visual comparison reveals
about profitability, efficiency, liquidity, and leverage differences between Apple
and Google in 2024.
"""

result = agent.invoke({"messages": [HumanMessage(content=query)]}, config=config)
print(result['messages'][-1].text)

Running code in sandbox....
Code execution finished!
### **Financial Analysis: Apple vs. Google (2024)**

The visual comparison and ratio calculations highlight distinct differences in the financial strategies and structures of Apple and Google.

#### **1. Profitability & Efficiency**
*   **Gross Margin:** **Google (56.93%)** significantly outperforms **Apple (46.21%)**. This reflects Google's software-centric business model (Search, Cloud, YouTube), which typically carries lower direct costs compared to Apple's hardware-heavy model (iPhones, Macs) that involves significant manufacturing and supply chain costs.
*   **Net Margin:** Both companies are equally efficient at the bottom line, with **Google (24.01%)** and **Apple (23.97%)** showing nearly identical net profit margins. Despite lower gross margins, Apple manages its operating expenses effectively to match Google's final profitability.
*   **Return on Equity (ROE):** **Apple (149.88%)** has a massive lead over **Google (25.32%)*

In [None]:
query = """How is Alphabet funding its investing cash outflows: operating cash, debt, or cash reserves? Make sure to keep the currency denomination in your final answer
"""

result = agent.invoke({"messages": [HumanMessage(content=query)]}, config=config)
print(result['messages'][-1].text)

Running code in sandbox....


In [23]:
query = """
Using titanic.csv, calculate the survival rate for each passenger class (Pclass) and create a bar chart where 
the x-axis is Pclass (1, 2, 3) and the y-axis is survival rate (percentage of passengers who survived). 
Label the axes clearly and add a title explaining the insight.
"""

config = {"configurable": {"thread_id": "titanic_1"}}

result = agent.invoke({"messages": [HumanMessage(content=query)]}, config=config)
print(result['messages'][-1].text)

Running code in sandbox....
Code execution finished!
The bar chart displays the survival rate for each passenger class. Passenger class 1 has the highest survival rate, followed by class 2, and then class 3. This indicates a correlation between passenger class and survival likelihood on the Titanic, with higher classes having better survival chances.


In [24]:
query = """
From titanic.csv, group passengers by Sex and Survived, 
then create a grouped bar chart where the x-axis is Sex (male, female), 
the y-axis is passenger count, and bars are split by survival status (0 = died, 1 = survived). 
Include a legend and interpret the result.
"""

result = agent.invoke({"messages": [HumanMessage(content=query)]}, config=config)
print(result['messages'][-1].text)

Running code in sandbox....
Code execution finished!
The grouped bar chart illustrates the passenger count by sex and survival status. It clearly shows that a significantly higher number of females survived compared to males, even though the total number of male passengers was higher. Conversely, a much larger number of male passengers died than female passengers. This suggests a strong correlation between gender and survival rate, with females having a considerably higher chance of survival on the Titanic.


In [25]:
query = """
Using the Fare column from titanic.csv, create a box plot comparing 
ticket fares of survivors and non-survivors, with Survived (0 = did not survive, 1 = survived)
 on the x-axis and Fare on the y-axis. Apply a logarithmic scale to the Fare axis 
 if the values are highly skewed. Use distinct colors for each survival group, 
 add clear axis labels, and set the plot title to “Ticket Fare Distribution by Survival 
 Status on the Titanic”. After generating the plot, briefly explain what it reveals about 
 the relationship between socioeconomic status and survival probability.
"""

result = agent.invoke({"messages": [HumanMessage(content=query)]}, config=config)
print(result['messages'][-1].text)

Running code in sandbox....
Code execution finished!
The box plot comparing ticket fares between survivors and non-survivors reveals a clear trend. Passengers who survived generally paid higher fares, as indicated by the higher median and overall distribution of fares for the 'Survived' group compared to the 'Did Not Survive' group. This suggests that passengers with a higher socioeconomic status, often reflected in more expensive tickets for higher classes, had a greater chance of survival. This could be due to factors such as better cabin locations, closer proximity to lifeboats, or preferential treatment during the evacuation.
