# Setting up LLM with Ollama for Data Analysis Agent

This notebook sets up a Large Language Model (LLM) using llama3.1:70b via Ollama, and creates an agent with tools for data analysis, including a Python interpreter.

## Prerequisites
- Ollama installed and running locally.
- The model `llama3.1:70b` pulled: Run `ollama pull llama3.1:70b` in your terminal.
- Python packages installed (already done in this environment).

In [None]:
%pip install docker requests langchain langchain-community langchain-ollama

In [None]:
%pip install langgraph langchain-hub

In [None]:
import requests
url = "http://localhost:8765/plots/sales_by_region.png"
try:
    r = requests.get(url)
    print(f"Status Code: {r.status_code}")
    print(f"Content Type: {r.headers.get('Content-Type')}")
    print(f"Content Length: {len(r.content)}")
except Exception as e:
    print(f"Error: {e}")


In [None]:
%%writefile requirements.txt
fastmcp<3
pandas>=2.2
numpy>=1.26
matplotlib>=3.9
starlette


In [None]:
%%writefile Dockerfile
FROM python:3.12-slim

# Create a non-root user
RUN useradd -m sandboxuser

WORKDIR /app

COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

COPY server.py /app/server.py

# Create directories and set permissions
RUN mkdir -p /app/data /app/plots && \
    chown -R sandboxuser:sandboxuser /app/data /app/plots

# Switch to non-root user
USER sandboxuser

EXPOSE 8765

CMD ["python", "server.py"]


In [None]:
%%writefile server.py
import os
import sys
import io
import json
from contextlib import redirect_stdout
from typing import Dict, Any, List

from fastmcp import FastMCP
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

DATA_DIR = "/app/data"
PLOTS_DIR = "/app/plots"

mcp = FastMCP(
    name="Data Analyst Sandbox",
    instructions="Secure Python execution environment for data analysis",
    version="2025.12"
)

@mcp.tool
def execute_python_code(code: str) -> dict:
    """Execute Python code in a restricted environment"""
    # Allow some safe builtins
    safe_builtins = {
        "len": len,
        "range": range,
        "print": print,
        "int": int,
        "float": float,
        "str": str,
        "list": list,
        "dict": dict,
        "set": set,
        "tuple": tuple,
        "enumerate": enumerate,
        "zip": zip,
        "min": min,
        "max": max,
        "sum": sum,
    }
    
    namespace: Dict[str, Any] = {
        "pd": pd,
        "np": np,
        "plt": plt,
        "os": os,
        "DATA_DIR": DATA_DIR,
        "PLOTS_DIR": PLOTS_DIR,
        "__builtins__": safe_builtins,
    }

    output = io.StringIO()
    try:
        # Clear previous plots to ensure we only return new ones
        for f in os.listdir(PLOTS_DIR):
            file_path = os.path.join(PLOTS_DIR, f)
            try:
                if os.path.isfile(file_path):
                    os.unlink(file_path)
            except Exception:
                pass

        with redirect_stdout(output):
            exec(code, namespace, namespace)

        # Find newly created plots
        plots = [
            f"http://localhost:8765/plots/{f}"
            for f in os.listdir(PLOTS_DIR)
            if f.endswith(('.png', '.jpg', '.jpeg'))
        ]

        return {
            "output": output.getvalue(),
            "plots": plots,
            "success": True
        }

    except Exception as e:
        return {
            "output": None,
            "error": f"{type(e).__name__}: {str(e)}",
            "success": False
        }

# Plot serving endpoint (using the underlying Starlette app)
from starlette.responses import FileResponse, JSONResponse
@mcp.http_app().route("/plots/{filename}")
async def serve_plot(request):
    filename = request.path_params["filename"]
    file_path = os.path.join(PLOTS_DIR, filename)
    if os.path.exists(file_path):
        return FileResponse(file_path)
    return JSONResponse({"error": "File not found"}, status_code=404)

if __name__ == "__main__":
    # Use HTTP transport to allow plot serving via the same port
    mcp.run(transport="http", host="0.0.0.0", port=8765)


In [None]:
!docker build -t mcp-data-sandbox:2025 .

In [None]:
import os

# First stop & remove if exists
os.system("docker stop mcp-sandbox 2>nul")
os.system("docker rm mcp-sandbox 2>nul")

# Ensure local directories exist
os.makedirs('local-data', exist_ok=True)
os.makedirs('local-plots', exist_ok=True)

# Run new container with volume mapping for both data and plots
current_dir = os.getcwd()
# Use absolute path for volume mapping
docker_cmd = (
    f'docker run -d --name mcp-sandbox -p 8765:8765 '
    f'-v "{current_dir}/local-data:/app/data" '
    f'-v "{current_dir}/local-plots:/app/plots" '
    f'mcp-data-sandbox:2025'
)
print(f"Running: {docker_cmd}")
os.system(docker_cmd)


In [None]:
import subprocess
import os

def upload_file_to_container(local_path: str):
    """Upload a local file to the Docker container's /app/data/ using docker cp."""
    if not os.path.exists(local_path):
        raise FileNotFoundError(f"File not found: {local_path}")
    
    # Create local-data folder if needed (for volume, but docker cp doesn't require it)
    os.makedirs('local-data', exist_ok=True)
    
    # Use docker cp to copy directly to container
    container_path = "mcp-sandbox:/app/data/" + os.path.basename(local_path)
    # Run docker command directly
    subprocess.run(["docker", "cp", local_path, container_path], check=True)
    
    return f"File {os.path.basename(local_path)} uploaded to container at /app/data/"

# Example usage (comment out or run as needed)
upload_file_to_container('synthetic_sales_data.csv')


In [None]:
import mcp
import mcp.types as mcp_types
setattr(mcp, 'types', mcp_types)

from langchain_core.tools import StructuredTool
from pydantic import BaseModel, Field
import asyncio
from fastmcp import Client

class ExecuteCodeSchema(BaseModel):
    code: str = Field(..., description="Python code to execute in the sandbox")

async def call_mcp_execute_async(code: str) -> str:
    """Execute Python code in the MCP sandbox using the FastMCP Client."""
    try:
        # Connect to the MCP server via HTTP (FastMCP v2 uses /mcp endpoint)
        async with Client("http://localhost:8765/mcp") as client:
            # Call the tool
            result = await client.call_tool("execute_python_code", {"code": code})
            
            # Extract the text content from the result
            if hasattr(result, 'content') and result.content:
                return result.content[0].text
            return str(result)
    except Exception as e:
        return f"Error calling MCP server: {str(e)}"

def call_mcp_execute(code: str) -> str:
    """Sync wrapper for the async MCP call."""
    try:
        loop = asyncio.get_event_loop()
        if loop.is_running():
            import nest_asyncio
            nest_asyncio.apply()
            return loop.run_until_complete(call_mcp_execute_async(code))
        else:
            return loop.run_until_complete(call_mcp_execute_async(code))
    except Exception:
        return asyncio.run(call_mcp_execute_async(code))

mcp_execute = StructuredTool.from_function(
    func=call_mcp_execute,
    name="sandbox_python_exec",
    description="Execute Python code in the secure remote sandbox. Handles data analysis, plotting (save to /app/plots/), and returns output/plots URLs. Files are available in /app/data/ if uploaded manually.",
    args_schema=ExecuteCodeSchema
)

tools = [mcp_execute]
print("MCP tool defined: sandbox_python_exec (using FastMCP Client SDK)")


In [None]:
from langchain_ollama import ChatOllama
from langgraph.prebuilt import create_react_agent

# LLM: Use llama3.1:70b for superior reasoning and robust tool calling
llm = ChatOllama(
    model="llama3.1:70b",
    temperature=0.1,
    num_predict=2048,
)

# Create the agent with revised tools
agent_executor = create_react_agent(
    llm,
    tools,
    prompt=(
        "You are a helpful data analyst. Use the sandbox_python_exec tool to perform analysis and create plots. "
        "Always save plots to /app/plots/ and load data from /app/data/. "
        "When you create a plot, the tool will return a URL. Mention this URL in your response so the user can see it."
    )
)

print("Data Analyst Agent created with ReAct workflow (using llama3.1:70b).")


In [None]:
from IPython.display import Image, display
import json

def display_agent_response(response):
    """Helper to print agent messages and render any plots found in tool outputs."""
    for message in response['messages']:
        if message.type == 'ai' and message.content:
            print(f"\nAGENT: {message.content}")
        
        if message.type == 'tool':
            try:
                # Parse the tool output (JSON string)
                data = json.loads(message.content)
                if isinstance(data, dict):
                    # Print text output if present
                    if data.get('output'):
                        print(f"\n[Tool Output]:\n{data['output']}")
                    
                    # Display plots
                    if data.get('plots'):
                        for plot_url in data['plots']:
                            print(f"Rendering Plot: {plot_url}")
                            display(Image(url=plot_url))
            except Exception as e:
                # If it's not JSON, try to print raw or ignore
                print(f"\n[Raw Tool Output]:\n{message.content}")


In [None]:
# Final Agent Test: Comprehensive Data Analysis
import os

# Define a complex question for the agent
question = (
    "Analyze the sales data in 'synthetic_sales_data.csv'. "
    "1. Calculate the total sales revenue (Quantity * Unit_Price). "
    "2. Identify the top 2 products by revenue. "
    "3. Create a bar chart showing total sales by Region."
    "4. Give a summary of findings. "
    "Save the plot as '/app/plots/sales_by_region.png'."
)

print(f"--- Final Agent Test ---\nQuestion: {question}\n")

# Invoke the agent
response = agent_executor.invoke({"messages": [("user", question)]})

# Display results and plots
display_agent_response(response)
