# E2B Code Sandbox with Google Gemini
Execute AI-generated Python code in a secure sandbox environment.

## Setup and Imports

In [1]:
"""E2B Code Sandbox with Google Gemini."""
import warnings
warnings.filterwarnings('ignore')

import sys
import os
import base64
import pandas as pd
import time

root_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.getcwd())))
sys.path.append(root_dir)

from dotenv import load_dotenv
load_dotenv()

from e2b_code_interpreter import Sandbox
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.tools import tool
from langchain.agents import create_agent
from langchain.messages import HumanMessage
from langgraph.checkpoint.memory import InMemorySaver

## Initialize Model and Sandbox

In [2]:
model = ChatGoogleGenerativeAI(model="gemini-3-flash-preview")

checkpointer = InMemorySaver()

sbx = Sandbox.create(timeout=1200)
print("Sandbox created")

Sandbox created


In [3]:
execution = sbx.run_code('2+2')
execution

Execution(Results: [Result(4)], Logs: Logs(stdout: [], stderr: []), Error: None)

## Helper Functions

In [4]:
def get_dataset_info(file_path):
    """Get basic dataset info."""
    if file_path.endswith('.csv'):
        df = pd.read_csv(file_path, nrows=3)
    else:
        df = pd.read_excel(file_path, nrows=3)

    return f"Columns: {list(df.columns)}\nSample data:\n{df.to_string()}"

## Define Tools

In [5]:
@tool
def upload_file(local_file_path: str):
    """Upload a data file to the E2B sandbox for analysis.
    
    Args:
        local_file_path: Local path to the file (e.g., "./data/IMDB-Movie-Data.csv")
        
    Returns:
        Success message with sandbox_path and dataset_info
    """
    if local_file_path.startswith('/'):
        local_file_path = local_file_path.lstrip('/')
    
    if not local_file_path.startswith('data/') and not local_file_path.startswith('./data/'):
        local_file_path = f"./data/{local_file_path}"
    
    if not os.path.exists(local_file_path):
        return f"Error: File not found at {local_file_path}"
    
    filename = os.path.basename(local_file_path)
    
    with open(local_file_path, "rb") as f:
        sandbox_file = sbx.files.write(f"data/{filename}", f)
    
    dataset_info = get_dataset_info(local_file_path)
    
    return f"File uploaded successfully!\nSandbox path: {sandbox_file.path}\n{dataset_info}"

@tool
def run_python_code(code: str):
    """Execute Python code in E2B sandbox.
    
    Args:
        code: Valid executable Python code. Use print() for output, display(plt.gcf()) for plots.
        
    Returns:
        Execution result
    """
    print('Running code in sandbox....')
    execution = sbx.run_code(code)
    print('Code execution finished!')

    if execution.error:
        return f"Error: {execution.error.name}\nValue: {execution.error.value}"

    os.makedirs('images', exist_ok=True)
    
    output = []
    timestamp = int(time.time())

    output.append(str(execution))
    
    # Save images
    for idx, result in enumerate(execution.results):
        if result.png:
            filename = f'images/{timestamp}_chart-{idx}.png'
            with open(filename, 'wb') as f:
                f.write(base64.b64decode(result.png))
            output.append(f'Chart saved to {filename}')


    # print('Output:', output)
    
    return "\n".join(output) if output else "Code executed successfully"

## Create Agent and Execute Query

In [6]:
system_prompt = """You are a data analysis assistant. You MUST use the available tools to complete tasks.

AVAILABLE TOOLS:
1. glob_search - Search for files in the filesystem
2. upload_file - Upload files to sandbox
3. run_python_code - Execute Python code

WORKFLOW - Follow these steps:
1. Search for data files using glob_search
2. Upload file using upload_file 
3. Generate and execute Python code using run_python_code

VISUALIZATION RULES:
- Only create plots if user explicitly asks for: "plot", "chart", "graph", "visualize", "show"
- If plots requested: use matplotlib, add title, axis labels, display(plt.gcf())
- Otherwise: use print() for results

CRITICAL: You MUST call the appropriate tool for each step. Do not just think - ACT by calling tools."""


In [7]:
from langchain.agents.middleware import FilesystemFileSearchMiddleware, TodoListMiddleware

agent = create_agent(
    model=model,
    tools=[upload_file, run_python_code],
    system_prompt=system_prompt,
    checkpointer=checkpointer,
    middleware=[FilesystemFileSearchMiddleware(
        root_path="./data",
        use_ripgrep=True,
        max_file_size_mb=100
    )]
)



In [8]:
config = {"configurable": {"thread_id": "movie-1"}}

query = "Upload ./data/IMDB-Movie-Data.csv and create a line chart showing average ratings over years"

result = agent.invoke({"messages": [HumanMessage(content=query)]}, config=config)

response = result['messages'][-1].text
print(f"\nResponse:\n{response}")


Response:



In [9]:
query = "show me how many files are there? print all files."

result = agent.invoke({"messages": [HumanMessage(content=query)]}, config=config)
print(result['messages'][-1].text)

Running code in sandbox....
Code execution finished!
The IMDB Movie Data has been uploaded and analyzed. I have created a line chart showing the average movie ratings from 2006 to 2016.

**Summary of the analysis:**
- The dataset was grouped by the 'Year' column.
- The mean 'Rating' was calculated for each year.
- A line chart was generated to visualize the trend of ratings over time.

The chart illustrates how the average rating of movies in the dataset has fluctuated over the decade.


In [10]:
config = {"configurable": {"thread_id": "finance-1"}}

query = "Analyze apple 2024 and calculate financial ratios"

result = agent.invoke({"messages": [HumanMessage(content=query)]}, config=config)
print(result['messages'][-1].text)

Running code in sandbox....
Code execution finished!
Running code in sandbox....
Code execution finished!
Based on the financial statements for the fiscal year ended September 30, 2024, here is the financial ratio analysis for Apple Inc.

### **Apple 2024 Financial Ratio Analysis**

| Ratio Category | Financial Ratio | Value |
| :--- | :--- | :--- |
| **Liquidity Ratios** | Current Ratio | **0.87** |
| | Quick Ratio | **0.83** |
| | Cash Ratio | **0.37** |
| **Profitability Ratios** | Gross Margin (%) | **46.21%** |
| | Operating Margin (%) | **31.51%** |
| | Net Profit Margin (%) | **23.97%** |
| | Return on Assets (ROA) | **25.68%** |
| | Return on Equity (ROE) | **164.59%** |
| **Solvency Ratios** | Debt-to-Equity Ratio | **1.87** |
| | Debt-to-Assets Ratio | **0.29** |

---

### **Key Insights:**

1.  **Profitability Excellence:** Apple continues to demonstrate high profitability with a **Gross Margin of 46.21%** and a **Net Profit Margin of nearly 24%**. These figures reflect stro

In [11]:
query = "analyze google cashflow for 2024 and apple cashflow 2024. compare financial ratios. also show comparison table"

result = agent.invoke({"messages": [HumanMessage(content=query)]}, config=config)
print(result['messages'][-1].text)

Running code in sandbox....
Code execution finished!
Running code in sandbox....
Code execution finished!
### **Cash Flow Analysis: Apple vs. Google (2024)**

The cash flow statements for both tech giants in 2024 reveal distinct operational strategies and capital priorities.

#### **Cash Flow Highlights (USD Billions)**
| Metric | Apple (FY 2024) | Google (FY 2024) |
| :--- | :--- | :--- |
| **Operating Cash Flow (OCF)** | $118.25 B | $125.30 B |
| **Capital Expenditure (CapEx)** | $9.45 B | $52.54 B |
| **Free Cash Flow (FCF)** | $108.81 B | $72.76 B |
| **Net Common Stock Buybacks** | $94.95 B | $62.22 B |
| **Dividends Paid** | $15.23 B | $7.36 B |

**Key Findings:**
*   **Investment Intensity:** Google is significantly more capital-intensive, spending **$52.5B** on CapEx (primarily AI infrastructure and data centers), compared to Apple's **$9.5B**.
*   **Cash Generation:** While Google generates more raw Operating Cash Flow, Apple’s lower CapEx requirements result in a much higher 

In [12]:
query = """Use data/goodle_2024.xlsx and Analyze the 2024 cash flow and financial performance of Google (Alphabet) and Apple.
Compute key financial ratios for both companies, including Gross Profit Margin,
Net Profit Margin, ROA, ROE, Current Ratio, and Debt-to-Equity Ratio.

Create the following visualizations:
1. A grouped bar chart comparing Apple vs Google for each financial ratio
   (x-axis: ratio names, y-axis: ratio values, separate bars for Apple and Google).
2. A comparison table summarizing all calculated ratios side by side.

Add clear titles, axis labels, legends, and use distinct colors for each company.
After generating the plots and table, interpret what the visual comparison reveals
about profitability, efficiency, liquidity, and leverage differences between Apple
and Google in 2024.
"""

config = {"configurable": {"thread_id": "plots-1"}}

result = agent.invoke({"messages": [HumanMessage(content=query)]}, config=config)
print(result['messages'][-1].text)

Running code in sandbox....
Code execution finished!
Running code in sandbox....
Code execution finished!



In [13]:
result

{'messages': [HumanMessage(content='Use data/goodle_2024.xlsx and Analyze the 2024 cash flow and financial performance of Google (Alphabet) and Apple.\nCompute key financial ratios for both companies, including Gross Profit Margin,\nNet Profit Margin, ROA, ROE, Current Ratio, and Debt-to-Equity Ratio.\n\nCreate the following visualizations:\n1. A grouped bar chart comparing Apple vs Google for each financial ratio\n   (x-axis: ratio names, y-axis: ratio values, separate bars for Apple and Google).\n2. A comparison table summarizing all calculated ratios side by side.\n\nAdd clear titles, axis labels, legends, and use distinct colors for each company.\nAfter generating the plots and table, interpret what the visual comparison reveals\nabout profitability, efficiency, liquidity, and leverage differences between Apple\nand Google in 2024.\n', additional_kwargs={}, response_metadata={}, id='72ec05a0-1be6-4e19-bc77-d3bd77cf886a'),
  AIMessage(content=[], additional_kwargs={'function_call': 

In [14]:
query = """How is Alphabet funding its investing cash outflows: operating cash, debt, or cash reserves? Make sure to keep the currency denomination in your final answer
"""

result = agent.invoke({"messages": [HumanMessage(content=query)]}, config=config)
print(result['messages'][-1].text)

Running code in sandbox....
Code execution finished!
Running code in sandbox....
Code execution finished!
Running code in sandbox....
Code execution finished!
Running code in sandbox....
Code execution finished!
Running code in sandbox....
Code execution finished!
Based on the financial data for 2024, Alphabet (Google) is primarily funding its investing cash outflows through its **operating cash flow**.

### Alphabet's 2024 Cash Flow Summary (USD)
*   **Operating Cash Flow:** $125,299,000,000 (Inflow)
*   **Investing Cash Outflow:** $45,536,000,000 (Outflow)
*   **Financing Cash Outflow:** $79,733,000,000 (Outflow)

**Analysis of Funding Sources:**
1.  **Operating Cash:** Alphabet's operations generated **$125.3 billion USD**, which is nearly **2.75 times** the amount needed to cover its **$45.5 billion USD** in investing activities (which includes $52.5 billion in Capital Expenditures, partially offset by sales of investments).
2.  **Debt:** While Alphabet issued $13.59 billion USD in

In [15]:
query = """
Using titanic.csv, calculate the survival rate for each passenger class (Pclass) and create a bar chart where 
the x-axis is Pclass (1, 2, 3) and the y-axis is survival rate (percentage of passengers who survived). 
Label the axes clearly and add a title explaining the insight.
"""

config = {"configurable": {"thread_id": "titanic-1"}}

result = agent.invoke({"messages": [HumanMessage(content=query)]}, config=config)
print(result['messages'][-1].text)

Running code in sandbox....
Code execution finished!



In [16]:
query = """
From titanic.csv, group passengers by Sex and Survived, 
then create a grouped bar chart where the x-axis is Sex (male, female), 
the y-axis is passenger count, and bars are split by survival status (0 = died, 1 = survived). 
Include a legend and interpret the result.
"""

result = agent.invoke({"messages": [HumanMessage(content=query)]}, config=config)
print(result['messages'][-1].text)

Running code in sandbox....
Code execution finished!
The analysis of passenger survival by sex reveals a stark difference in outcomes:

### **Survival Counts by Sex**
| Sex | Died (0) | Survived (1) |
| :--- | :---: | :---: |
| **Female** | 81 | 233 |
| **Male** | 468 | 109 |

### **Interpretation**
- **Higher Female Survival Rate:** Roughly **74%** of females survived, while only about **19%** of males survived. 
- **Gender Disparity:** Despite there being significantly more men on board (577 men vs. 314 women), the absolute number of female survivors (233) is more than double the number of male survivors (109).
- **Historical Context:** This data clearly illustrates the "women and children first" maritime protocol used during the Titanic's evacuation.

The grouped bar chart below visualizes these counts, highlighting the contrast between the two groups.

![Survival by Sex Chart](images/1769241400_chart-0.png)


In [17]:
query = """
Using the Fare column from titanic.csv, create a box plot comparing 
ticket fares of survivors and non-survivors, with Survived (0 = did not survive, 1 = survived)
 on the x-axis and Fare on the y-axis. Apply a logarithmic scale to the Fare axis 
 if the values are highly skewed. Use distinct colors for each survival group, 
 add clear axis labels, and set the plot title to “Ticket Fare Distribution by Survival 
 Status on the Titanic”. After generating the plot, briefly explain what it reveals about 
 the relationship between socioeconomic status and survival probability.
"""

result = agent.invoke({"messages": [HumanMessage(content=query)]}, config=config)
print(result['messages'][-1].text)

Running code in sandbox....
Code execution finished!
The box plot reveals a strong correlation between **socioeconomic status** (proxied by ticket fare) and **survival probability**:

### **Key Insights**
- **Higher Median Fare for Survivors:** The median fare for survivors (approx. 26.0) is significantly higher than that of non-survivors (approx. 10.5).
- **Wealth and Safety:** The upper quartiles and whiskers for survivors extend much further, indicating that passengers who paid premium fares were much more likely to survive.
- **Socioeconomic Advantage:** The use of a log scale highlights that even at the lower end of the spectrum, those who survived generally paid slightly higher fares than those who did not. This suggests that passengers in higher-class cabins (located higher up on the ship and closer to lifeboats) had a distinct advantage.

### **Summary Statistics**
| Survival Status | Count | Mean Fare | Median Fare | Max Fare |
| :--- | :---: | :---: | :---: | :---: |
| **Died