In [1]:
import os
import subprocess
import json
from datetime import datetime
from fastapi import FastAPI, HTTPException, Query
from pydantic import BaseModel
from typing import List, Dict, Any
import sqlite3
import requests
from openai import OpenAI

from fastapi import FastAPI, HTTPException, Query
from fastapi.middleware.cors import CORSMiddleware
import os
import requests
import subprocess
import json
from dotenv import load_dotenv
load_dotenv() 

app = FastAPI()

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

In [2]:
tools = [
    {
        "name": "function",
        "type": "function",
        "function":{
            "name": "script_runner",
            "description": "Install a package and run a script from an URL with provided arguments",
            "parameters": {
                "type": "object",
                "properties": {
                    "script_url": {
                        "type": "string",
                        "description": "The URL of the script to run"
                    },
                    "args": {
                        "type": "array",
                        "items": {
                            "type": "string"
                        },
                        "description": "List of arguments to pass to the script"
                    },   

                }, "required": ["script_url", "args"]
            }
        },
    }
]

In [3]:
AIPROXY_TOKEN = os.getenv("AIPROXY_TOKEN")


In [4]:
@app.get("/")
async def root():
    return {"message": "Hello World"}

In [5]:
@app.get("/read")
def read_file(path:str):
    try:
        with open(path, 'r') as f:
            content = f.read()
        return content
    except Exception as e:
        raise HTTPException(status_code=404, detail=str(e))
    

In [6]:
CACHE_FILE = "api_cache.json"


@app.post("/run")
def task_runner(task:str):
    url = "https://aiproxy.sanand.workers.dev/openai/v1/chat/completions"
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {AIPROXY_TOKEN}"
    }

    data = {   
        "model": "gpt-4o-mini",
        "messages": [
            {
                "role": "user",
                "content": task
            },
            {
                "role": "system",
                "content": """
You are an assistant who has to do a variety of tasks
If your task involves running a script, you can use the script_runner tool.
If your task involves writing a code, you can use the task_runner tool.
"""
            }
        ],
        "tools": tools,
        "tool_choice": "auto"
    }

    response = requests.post(url, headers=headers, json=data)
    if not response.ok:
        raise HTTPException(status_code=response.status_code, detail=response.text)
    
    res_json = response.json()
    cache = res_json
    with open(CACHE_FILE, "w") as f:
        json.dump(cache, f)
    try:
        argu = res_json['choices'][0]['message']
    except (KeyError, IndexError) as e:
        raise HTTPException(status_code=500, detail=f"Error parsing response: {e}\nResponse: {res_json}")
    
    try:
        arguments = argu['tool_calls'][0]['function']['arguments']
        # If arguments is a string, parse it; otherwise, assume it's already a dict.
        if isinstance(arguments, str):
            func_args = json.loads(arguments)
        else:
            func_args = arguments
        script_url = func_args['script_url']
        email = func_args['args'][0]
        print(script_url, email)
        command = ["uv","run",script_url, email]
        result = subprocess.run(command, capture_output=True, text=True, check=True)
        return {"output": result.stdout, "error": result.stderr}
    except Exception as ex:
        raise HTTPException(status_code=500, detail=f"Error executing script: {ex}")

In [None]:


# # Run this only once manually in a separate cell
# import uvicorn
# uvicorn.run(app, host="0.0.0.0", port=8000, reload=False)

import uvicorn
import threading

def run_server():
    uvicorn.run(app, host="0.0.0.0", port=8000, reload=False)

# Start the server in a separate thread
server_thread = threading.Thread(target=run_server, daemon=True)
server_thread.start()



INFO:     Started server process [56619]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


In [11]:
BASE_URL = "http://127.0.0.1:8000"


# Test the run endpoint
# task_description = "Count occurrences of Monday in the dataset"
task_description = "Install uv (if required) and run https://raw.githubusercontent.com/sanand0/tools-in-data-science-public/tds-2025-01/project-1/datagen.py with my@gmail.com as the only argument."
response = requests.post(f"{BASE_URL}/run", params={"task": task_description})
print(response.json())  # Should return cached result after first call


https://raw.githubusercontent.com/sanand0/tools-in-data-science-public/tds-2025-01/project-1/datagen.py my@gmail.com
{'detail': "Error executing script: Command '['uv', 'run', 'https://raw.githubusercontent.com/sanand0/tools-in-data-science-public/tds-2025-01/project-1/datagen.py', 'my@gmail.com']' returned non-zero exit status 1."}


In [12]:
response.json()

{'detail': "Error executing script: Command '['uv', 'run', 'https://raw.githubusercontent.com/sanand0/tools-in-data-science-public/tds-2025-01/project-1/datagen.py', 'my@gmail.com']' returned non-zero exit status 1."}

In [17]:
# To kill the threading server

import os
import signal

# Get the process ID of the current Python process
pid = os.getpid()

# Kill the process (forcefully stopping uvicorn)
os.kill(pid, signal.SIGTERM)


: 

In [13]:
CACHE_FILE = "api_cache.json"

# Load and read the JSON file
with open(CACHE_FILE, "r") as f:
    cache_data = json.load(f)

# Print all cached tasks
print(json.dumps(cache_data, indent=4))

{
    "id": "chatcmpl-B0ZOcc1lSFhP78b4NrTBMtfvaxQg6",
    "object": "chat.completion",
    "created": 1739475762,
    "model": "gpt-4o-mini-2024-07-18",
    "choices": [
        {
            "index": 0,
            "message": {
                "role": "assistant",
                "content": null,
                "tool_calls": [
                    {
                        "id": "call_DZOpdqSdYYhWi5ppgusZlpFC",
                        "type": "function",
                        "function": {
                            "name": "script_runner",
                            "arguments": "{\"script_url\":\"https://raw.githubusercontent.com/sanand0/tools-in-data-science-public/tds-2025-01/project-1/datagen.py\",\"args\":[\"my@gmail.com\"]}"
                        }
                    }
                ],
                "refusal": null
            },
            "logprobs": null,
            "finish_reason": "tool_calls"
        }
    ],
    "usage": {
        "prompt_tokens": 170,
     

In [14]:
arguments = cache_data['choices'][0]['message']['tool_calls'][0]['function']['arguments']
# If arguments is a string, parse it; otherwise, assume it's already a dict.
if isinstance(arguments, str):
    func_args = json.loads(arguments)
else:
    func_args = arguments
script_url = func_args['script_url']
email = func_args['args'][0]

In [15]:
script_url

'https://raw.githubusercontent.com/sanand0/tools-in-data-science-public/tds-2025-01/project-1/datagen.py'

In [16]:
email

'my@gmail.com'

In [5]:
EMAIL_SENDER = {
        "type": "function",
        "function": {
            "name": "email_sender",
            "description": "Extract the sender's email address and in response return just the sender's email address",
            "parameters": {
                "type": "object",
                "properties": {
                    "input_location": {
                        "type": "string", 
                        "description": "The relative input image location on user's device"
                    },
                    "output_location": {
                        "type": "string", 
                        "description": "The relative output location on user's device"
                    },
                },
                "required": ["input_location","output_location"],
                "additionalProperties": False,
            },
            "strict": True,
        }
    }


In [6]:
def email_sender(input_location:str, output_location:str):
    with open(input_location,"rb") as f:
        text = f.read().decode("utf-8")
        print(text)
        print("********"*5)
    f.close()
    url = "https://aiproxy.sanand.workers.dev/openai/v1/chat/completions"
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {AIPROXY_TOKEN}"
    }
    data = {
        "model": "gpt-4o-mini",
        "messages": [
            {
                "role": "system", 
                "content": "Extract just the sender's email address from this email and return just the sender's email address."
            },
            {
                "role": "user",
                "content": text
            }
        ]
    }
    response = requests.post(url, headers=headers, data=json.dumps(data)).json()
    print(response)
    with open(output_location,"w") as f:
        f.write(response["choices"][0]["message"]["content"].replace(" ",""))
    f.close()
    return {"status": "Successfully Created", "output_file destination": output_location}

In [9]:
email_sender("data/email.txt","data/output.txt")

Delivered-To: xescobar@example.com
MIME-Version: 1.0
From: "Anne Curry" <ashleyquinn@example.com>
Date: Thu, 17 Aug 1995 21:48:50 +0000
Subject: Race couple save approach.
To: "Cynthia Vincent" <xescobar@example.com>
Cc: "Cindy Hoffman" <shepardstephen@example.com>, "Tara Ramirez" <danielbrown@example.org>, "Daniel Robinson" <wintersscott@example.com>
Content-Type: multipart/alternative; boundary="00000000000091a0ba062bcdefca"

--00000000000091a0ba062bcdefca
Content-Type: text/plain; charset="UTF-8"
Content-Transfer-Encoding: quoted-printable

Democratic effort concern room test to school. Media care hundred happen line grow.
If fast purpose evening. Sign three huge policy sport receive. Speak exactly rate meet involve.

--00000000000091a0ba062bcdefca--

****************************************
{'id': 'chatcmpl-B0ymgjb3gn8mFBcUXltFhXqLvi5Wc', 'object': 'chat.completion', 'created': 1739573354, 'model': 'gpt-4o-mini-2024-07-18', 'choices': [{'index': 0, 'message': {'role': 'assistant', 

{'status': 'Successfully Created',
 'output_file destination': 'data/output.txt'}

In [2]:
AIPROXY_TOKEN = os.getenv("AIPROXY_TOKEN")

In [3]:
FORMAT_FILE = {
        "type": "function",
        "function": {
            "name": "format_file",
            "description": "Format a file using Prettier with specified version",
            "parameters": {
                "type": "object",
                "properties": {
                    "path": {
                        "type": "string",
                        "description": "Path to the file to format"
                    },
                "prettier_version": {
                    "type": "string",
                    "description": "Prettier version to use (e.g. '3.4.2')"
                    }
                }, "required": ["path", "prettier_version"]
            }
        }
    }

In [4]:
def task_runner(task:str):
    url = "https://aiproxy.sanand.workers.dev/openai/v1/chat/completions"
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {AIPROXY_TOKEN}"
    }
 
    data = {   
        "model": "gpt-4o-mini",
        "messages": [
            {
                "role": "user",
                "content": task
            },
            {
                "role": "system",
                "content": """You are an assistant who has to do a variety of tasks.
- Use script_runner for installing packages and running scripts from URLs
- Use format_file for formatting files with Prettier
- Use task_runner for tasks involving writing a code
- Use sort_contacts for sorting contacts in a JSON file by last name and then by first name
- Use get_completions_image to run the get_completions_image function and save output to a file"""
            }
        ],
        "tools": tools,
        "tool_choice": "auto"
    }

    try:
        response = requests.post(url, headers=headers, json=data)
        response.raise_for_status()
    except requests.RequestException as e:
        raise HTTPException(status_code=500, detail=f"API request failed: {str(e)}")
    # if not response.ok:
    #     raise HTTPException(status_code=response.status_code, detail=response.text)
    
    try:
        res_json = response.json()
        message = res_json['choices'][0]['message']
        # tool_calls = message.get('tool_calls', [])
    except (KeyError, IndexError) as e:
        raise HTTPException(status_code=500, detail=f"Error parsing response: {e}\nResponse: {res_json}")
    
    try:
        function_name = message['tool_calls'][0]['function']['name'] # Extract the function name
        if function_name == "format_file":
            arguments = message['tool_calls'][0]['function']['arguments']
            path = arguments['path']
            version = arguments['prettier_version']
            # Checking if the file exists
            if not os.path.exists(path):
                    raise HTTPException(status_code=400, detail=f"File not found: {path}")
            command = ["npx", f"prettier@{version}", "--write", path]
            result = subprocess.run(command, capture_output=True, text=True, check=True)
            return {"output": result.stdout, "error": result.stderr}

SyntaxError: incomplete input (842141944.py, line 55)