# E2B Code Sandbox with Google Gemini
Execute AI-generated Python code in a secure sandbox environment.

## Setup and Imports

In [1]:
"""E2B Code Sandbox with Google Gemini."""
import warnings
warnings.filterwarnings('ignore')

import sys
import os
import base64
import pandas as pd
import time

root_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.getcwd())))
sys.path.append(root_dir)

from dotenv import load_dotenv
load_dotenv()

from e2b_code_interpreter import Sandbox
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.tools import tool
from langchain.agents import create_agent
from langchain.messages import HumanMessage
from langgraph.checkpoint.memory import InMemorySaver

## Initialize Model and Sandbox

In [None]:
# model = ChatGoogleGenerativeAI(model="gemini-2.5-flash")
model = ChatGoogleGenerativeAI(model="gemini-3-pro-preview")

checkpointer = InMemorySaver()

sbx = Sandbox.create(timeout=3_600)
print("Sandbox created")

Sandbox created


## Helper Functions

In [3]:
def get_dataset_info(file_path):
    """Get basic dataset info."""
    if file_path.endswith('.csv'):
        df = pd.read_csv(file_path, nrows=3)
    else:
        df = pd.read_excel(file_path, nrows=3)

    return f"Columns: {list(df.columns)}\nSample data:\n{df.to_string()}"

## Define Tools

In [4]:
@tool
def upload_file(local_file_path: str):
    """Upload a data file to the E2B sandbox for analysis.
    
    Use this tool to upload CSV or Excel files before analyzing them.
    The file will be uploaded to the sandbox and the sandbox path will be returned.
    
    Args:
        local_file_path: Local path to the file (e.g., "./data/IMDB-Movie-Data.csv" or "/apple_2024.xlsx")
        
    Returns:
        Success message with sandbox_path and dataset_info
        
    Example:
        local_file_path = "./data/titanic.csv"
    """
    # Clean the path and ensure it has the data directory
    if local_file_path.startswith('/'):
        local_file_path = local_file_path.lstrip('/')
    
    if not local_file_path.startswith('data/') and not local_file_path.startswith('./data/'):
        local_file_path = f"./data/{local_file_path}"
    
    if not os.path.exists(local_file_path):
        return f"Error: File not found at {local_file_path}"
    
    filename = os.path.basename(local_file_path)
    
    with open(local_file_path, "rb") as f:
        sandbox_file = sbx.files.write(f"data/{filename}", f)
    
    dataset_info = get_dataset_info(local_file_path)
    
    return f"File uploaded successfully!\nSandbox path: {sandbox_file.path}\n{dataset_info}"

@tool
def run_python_code(code: str):
    """Execute Python code in E2B sandbox and save chart outputs.
    
    IMPORTANT: The code parameter must be valid, executable Python code only.
    - Do NOT include markdown formatting (no ```python or ```)
    - Do NOT include explanations or comments outside the code
    - Include all necessary imports (pandas, matplotlib, numpy, etc.)
    - Use the exact dataset path provided in the system prompt
    - For visualizations, end with: display(plt.gcf())

    Args:
        code: Valid executable Python code as a plain string

    Returns:
        Execution result or error message
        
    Example:
        code = "import pandas as pd\\nimport matplotlib.pyplot as plt\\ndf = pd.read_csv('/path/to/data.csv')\\nplt.plot(df['x'], df['y'])\\ndisplay(plt.gcf())"
    """
    print('Running code in sandbox....')
    execution = sbx.run_code(code)
    print('Code execution finished!')

    if execution.error:
        return f"Error: {execution.error.name}\nValue: {execution.error.value}"

    os.makedirs('images', exist_ok=True)
    
    results = []
    timestamp = int(time.time())
    for idx, result in enumerate(execution.results):
        if result.png:
            filename = f'images/{timestamp}_chart-{idx}.png'
            with open(filename, 'wb') as f:
                f.write(base64.b64decode(result.png))
            results.append(f'Chart saved to {filename}')

    return "\n".join(results) if results else "Code executed successfully"

## Create Agent and Execute Query

In [5]:
system_prompt = """You are a data analysis assistant with access to filesystem search, file upload, and Python code execution tools.

WORKFLOW:
1. First, use filesystem search tools to locate the requested data file in the local filesystem
2. Upload the file using upload_file tool (it will automatically handle the path)
3. The upload_file tool will return the sandbox path and dataset information with column names and sample data
4. ALWAYS read and analyze the actual data - do NOT make assumptions based on file names
5. Use run_python_code tool to analyze the data using the sandbox path

CRITICAL: Data Analysis Rules
- NEVER assume data content from file names
- ALWAYS examine the column names and sample data returned by upload_file
- Read the actual data structure before generating analysis code
- Base your analysis ONLY on what you see in the data, not what you expect

CRITICAL RULES for run_python_code:
1. Generate ONLY executable Python code - no explanations, no markdown, no comments outside code
2. Import all required libraries (pandas, matplotlib, numpy, etc.)
3. Load data from the sandbox path returned by upload_file (e.g., /home/user/data/filename.csv)
4. For plots, MUST end with: display(plt.gcf())
5. Use the run_python_code tool with ONLY the code string

Example workflow:
- User asks: "Analyze Apple 2024 earnings"
- You search filesystem and find: /apple_2024.xlsx
- You upload using: upload_file("/apple_2024.xlsx")
- You receive column names and sample data - READ THEM CAREFULLY
- You analyze what columns actually exist (e.g., Revenue, Operating Income, Net Income, etc.)
- You generate Python code based on ACTUAL columns, not assumptions
- You use the exact sandbox path returned"""


In [6]:
from langchain.agents.middleware import FilesystemFileSearchMiddleware

agent = create_agent(
    model=model,
    tools=[upload_file, run_python_code],
    system_prompt=system_prompt,
    checkpointer=checkpointer,
    middleware=[FilesystemFileSearchMiddleware(
        root_path="./data",
        use_ripgrep=True,
        max_file_size_mb=100
    )]
)



In [7]:
config = {"configurable": {"thread_id": "default"}}

query = "Upload ./data/IMDB-Movie-Data.csv and create a line chart showing average ratings over years"

result = agent.invoke({"messages": [HumanMessage(content=query)]}, config=config)

response = result['messages'][-1].text
print(f"\nResponse:\n{response}")

Running code in sandbox....
Code execution finished!

Response:
The line chart showing average movie ratings over years has been successfully created.


In [8]:
query = "show me how many files are there? print all files."

result = agent.invoke({"messages": [HumanMessage(content=query)]}, config=config)
print(result['messages'][-1].text)

There are 4 files in total:
/apple_2024.xlsx
/google_2024.xlsx
/IMDB-Movie-Data.csv
/titanic.csv


In [16]:
query = "Analyze apple 2024 and calculate financial ratios"

result = agent.invoke({"messages": [HumanMessage(content=query)]}, config=config)
print(result['messages'][-1].text)

I have already analyzed Apple's 2024 financial data and calculated the financial ratios in the previous turn. Here are the results again for your convenience:

**Extracted Financial Data for Apple 2024:**
*   Net Sales: 383285.0
*   Cost of Sales: 205480.0
*   Net Income: 100388.0
*   Total Assets: 352583.0
*   Total Liabilities: 254929.0
*   Total Shareholders' Equity: 97654.0
*   Total Current Assets: 143564.0
*   Total Current Liabilities: 145391.0

**Calculated Financial Ratios for Apple 2024:**
*   **Gross Profit Margin:** 46.39%
*   **Net Profit Margin:** 26.19%
*   **Return on Assets (ROA):** 28.47%
*   **Return on Equity (ROE):** 102.80%
*   **Current Ratio:** 0.99
*   **Debt-to-Equity Ratio:** 2.61

**Summary:**

Apple's 2024 financial performance, based on these ratios, indicates a highly profitable company with strong operational efficiency (high profit margins, ROA, and ROE). While its current ratio is slightly below 1, it's likely manageable given its business model and fi

In [17]:
query = "analyze google cashflow for 2024 and apple cashflow 2024. compare financial ratios. also show comparison table"

result = agent.invoke({"messages": [HumanMessage(content=query)]}, config=config)
print(result['messages'][-1].text)

Running code in sandbox....
Code execution finished!
I have analyzed the financial data for both Apple and Google (Alphabet) for 2024 and calculated several key financial ratios. Here is the comparison table:

**Financial Ratios Comparison (Apple vs. Google 2024):**

| Ratio                 | Apple (2024) | Google (2024) |
| :-------------------- | :----------- | :------------ |
| Gross Profit Margin   | 46.39%       | 56.45%        |
| Net Profit Margin     | 26.19%       | 25.77%        |
| Return on Assets (ROA) | 28.47%       | 16.29%        |
| Return on Equity (ROE) | 102.80%      | 22.84%        |
| Current Ratio         | 0.99         | 1.63          |
| Debt-to-Equity Ratio  | 2.61         | 0.39          |

**Comparison Analysis:**

*   **Gross Profit Margin:** Google has a higher gross profit margin (56.45%) compared to Apple (46.39%). This suggests that Google retains a larger proportion of revenue after accounting for the cost of revenues, which might be indicative of its 

In [29]:
query = """Use data/goodle_2024.xlsx and Analyze the 2024 cash flow and financial performance of Google (Alphabet) and Apple.
Compute key financial ratios for both companies, including Gross Profit Margin,
Net Profit Margin, ROA, ROE, Current Ratio, and Debt-to-Equity Ratio.

Create the following visualizations:
1. A grouped bar chart comparing Apple vs Google for each financial ratio
   (x-axis: ratio names, y-axis: ratio values, separate bars for Apple and Google).
2. A comparison table summarizing all calculated ratios side by side.

Add clear titles, axis labels, legends, and use distinct colors for each company.
After generating the plots and table, interpret what the visual comparison reveals
about profitability, efficiency, liquidity, and leverage differences between Apple
and Google in 2024.
"""

result = agent.invoke({"messages": [HumanMessage(content=query)]}, config=config)
print(result['messages'][-1].text)

I couldn't find the file `data/goodle_2024.xlsx`. Please make sure the file path and name are correct.

Also, to perform a comparative analysis between Google and Apple, I will need the financial data for Apple as well. Could you please provide the file path for Apple's 2024 financial data once the Google file is correctly identified?


In [18]:
query = """How is Alphabet funding its investing cash outflows: operating cash, debt, or cash reserves? Make sure to keep the currency denomination in your final answer
"""

result = agent.invoke({"messages": [HumanMessage(content=query)]}, config=config)
print(result['messages'][-1].text)

Running code in sandbox....
Code execution finished!
For 2024, Alphabet's cash flow data is as follows:

*   **Operating Cash Flow:** $99,426.00
*   **Investing Cash Flow:** $-29,470.00
*   **Financing Cash Flow:** $-27,993.00

Alphabet's investing cash outflows for 2024 were **$29,470.00**.

Alphabet is primarily funding its investing cash outflows through its **operating cash flow**, as the operating cash flow of $99,426.00 is significantly greater than the absolute value of its investing cash outflows. This indicates that the company's core business operations are generating more than enough cash to cover its investments.


In [23]:
query = """
Using titanic.csv, calculate the survival rate for each passenger class (Pclass) and create a bar chart where 
the x-axis is Pclass (1, 2, 3) and the y-axis is survival rate (percentage of passengers who survived). 
Label the axes clearly and add a title explaining the insight.
"""

config = {"configurable": {"thread_id": "titanic_1"}}

result = agent.invoke({"messages": [HumanMessage(content=query)]}, config=config)
print(result['messages'][-1].text)

Running code in sandbox....
Code execution finished!
The bar chart displays the survival rate for each passenger class. Passenger class 1 has the highest survival rate, followed by class 2, and then class 3. This indicates a correlation between passenger class and survival likelihood on the Titanic, with higher classes having better survival chances.


In [24]:
query = """
From titanic.csv, group passengers by Sex and Survived, 
then create a grouped bar chart where the x-axis is Sex (male, female), 
the y-axis is passenger count, and bars are split by survival status (0 = died, 1 = survived). 
Include a legend and interpret the result.
"""

result = agent.invoke({"messages": [HumanMessage(content=query)]}, config=config)
print(result['messages'][-1].text)

Running code in sandbox....
Code execution finished!
The grouped bar chart illustrates the passenger count by sex and survival status. It clearly shows that a significantly higher number of females survived compared to males, even though the total number of male passengers was higher. Conversely, a much larger number of male passengers died than female passengers. This suggests a strong correlation between gender and survival rate, with females having a considerably higher chance of survival on the Titanic.


In [25]:
query = """
Using the Fare column from titanic.csv, create a box plot comparing 
ticket fares of survivors and non-survivors, with Survived (0 = did not survive, 1 = survived)
 on the x-axis and Fare on the y-axis. Apply a logarithmic scale to the Fare axis 
 if the values are highly skewed. Use distinct colors for each survival group, 
 add clear axis labels, and set the plot title to “Ticket Fare Distribution by Survival 
 Status on the Titanic”. After generating the plot, briefly explain what it reveals about 
 the relationship between socioeconomic status and survival probability.
"""

result = agent.invoke({"messages": [HumanMessage(content=query)]}, config=config)
print(result['messages'][-1].text)

Running code in sandbox....
Code execution finished!
The box plot comparing ticket fares between survivors and non-survivors reveals a clear trend. Passengers who survived generally paid higher fares, as indicated by the higher median and overall distribution of fares for the 'Survived' group compared to the 'Did Not Survive' group. This suggests that passengers with a higher socioeconomic status, often reflected in more expensive tickets for higher classes, had a greater chance of survival. This could be due to factors such as better cabin locations, closer proximity to lifeboats, or preferential treatment during the evacuation.
