diff --git a/.env.example b/.env.example new file mode 100644 index 00000000..47cbf4c8 --- /dev/null +++ b/.env.example @@ -0,0 +1,6 @@ +# Copy this file to .env and customize as needed + +# Gitea Service Configuration +GITEA_URL=http://host.docker.internal:3000 +GITEA_USERNAME=gitea +GITEA_PASSWORD=gitea123 diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 53061397..8cba8b47 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -77,6 +77,8 @@ jobs: dockerfile: src/envs/sumo_rl_env/server/Dockerfile - name: atari-env dockerfile: src/envs/atari_env/server/Dockerfile + - name: git-env + dockerfile: src/envs/git_env/server/Dockerfile steps: - name: Checkout code diff --git a/examples/local_git_env.py b/examples/local_git_env.py new file mode 100644 index 00000000..8cb49bce --- /dev/null +++ b/examples/local_git_env.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python3 +""" +Simple test showing how users will use GitEnv.from_docker_image(). + +This is the simplest possible usage. + +Prerequisites: + 1. .env file configured (copy from .env.example) + 2. Shared Gitea running: ./scripts/setup_shared_gitea.sh + 3. OpenEnv repo migrated to Gitea (see README) +""" + +import os +import sys +from pathlib import Path + +# Load environment variables from .env file +from dotenv import load_dotenv +load_dotenv() + +# Add src to path +sys.path.insert(0, str(Path(__file__).parent.parent / "src")) + +from envs.git_env import GitAction, GitEnv + + +def main(): + """Test GitEnv.from_docker_image().""" + print("=" * 60) + print("GitEnv.from_docker_image() Test") + print("=" * 60) + print() + + try: + # Pass environment variables from .env to container + env_vars = { + "GITEA_URL": os.getenv("GITEA_URL"), + "GITEA_USERNAME": os.getenv("GITEA_USERNAME"), + "GITEA_PASSWORD": os.getenv("GITEA_PASSWORD"), + } + + # Verify env vars are loaded + if not all(env_vars.values()): + print("❌ Error: Required environment variables not found in .env") + print(" Make sure .env file exists (copy from .env.example)") + return False + + print("Creating client from Docker image with .env credentials...") + print(" Using GitEnv.from_docker_image() factory method") + print() + + # Create client using from_docker_image factory method + client = GitEnv.from_docker_image("git-env:latest", env_vars=env_vars) + + print("✓ Client created and container started!\n") + + # Now use it like any other client + print("Testing the environment:") + print("-" * 60) + + # Reset + print("\n1. Reset:") + result = client.reset() + print(f" Message: {result.observation.message}") + print(f" Success: {result.observation.success}") + + # Get initial state + state = client.state() + print(f" State: episode_id={state.episode_id}, step_count={state.step_count}") + print(f" Gitea ready: {state.gitea_ready}") + + # List repositories + print("\n2. List repositories:") + result = client.step(GitAction(action_type="list_repos")) + print(f" Success: {result.observation.success}") + print(f" Found {len(result.observation.repos)} repositories") + for repo in result.observation.repos: + print(f" - {repo['name']}") + + # Clone repository + print("\n3. Clone repository:") + result = client.step(GitAction(action_type="clone_repo", repo_name="OpenEnv")) + print(f" Success: {result.observation.success}") + print(f" Message: {result.observation.message}") + print(f" Output: {result.observation.output}") + + # Execute git commands + print("\n4. Execute git commands:") + + git_commands = [ + "status", + "log --oneline -5", + "branch -a", + ] + + for cmd in git_commands: + result = client.step( + GitAction(action_type="execute_git_command", command=cmd, working_dir="OpenEnv") + ) + print(f"\n git {cmd}:") + print(f" Success: {result.observation.success}") + if result.observation.output: + # Show first few lines + lines = result.observation.output.strip().split("\n")[:5] + for line in lines: + print(f" {line}") + if len(result.observation.output.strip().split("\n")) > 5: + print(" ...") + + # Check final state + print("\n5. Check final state:") + state = client.state() + print(f" episode_id: {state.episode_id}") + print(f" step_count: {state.step_count}") + print(f" gitea_ready: {state.gitea_ready}") + + print("\n" + "-" * 60) + print("\n✓ All operations successful!") + print() + + print("Cleaning up...") + client.close() + print("✓ Container stopped and removed") + print() + + print("=" * 60) + print("Test completed successfully!") + print("=" * 60) + + return True + + except Exception as e: + print(f"\n❌ Test failed: {e}") + import traceback + + traceback.print_exc() + return False + + +if __name__ == "__main__": + success = main() + exit(0 if success else 1) diff --git a/scripts/setup_shared_gitea.sh b/scripts/setup_shared_gitea.sh new file mode 100755 index 00000000..ccc98bb1 --- /dev/null +++ b/scripts/setup_shared_gitea.sh @@ -0,0 +1,83 @@ +#!/bin/bash +# Setup script for shared Gitea instance +# This script starts Gitea, waits for it to be ready, and creates the admin user +# Requires: .env file with GITEA_USERNAME and GITEA_PASSWORD + +set -e + +# Load credentials from .env file +if [ -f .env ]; then + export $(cat .env | grep -E '^(GITEA_USERNAME|GITEA_PASSWORD)=' | xargs) +else + echo "❌ Error: .env file not found" + echo " Please copy .env.example to .env and configure credentials" + exit 1 +fi + +echo "=====================================" +echo "Setting up shared Gitea instance" +echo "=====================================" +echo + +# Start Gitea with docker-compose +echo "1. Starting Gitea container..." +docker-compose -f src/envs/git_env/docker-compose.gitea.yml up -d + +# Wait for Gitea to be healthy +echo "2. Waiting for Gitea to be ready..." +timeout=60 +elapsed=0 +while [ $elapsed -lt $timeout ]; do + if docker exec openenv-gitea curl -sf http://localhost:3000/ > /dev/null 2>&1; then + echo " ✓ Gitea is ready!" + break + fi + echo " Waiting... (${elapsed}s/${timeout}s)" + sleep 2 + elapsed=$((elapsed + 2)) +done + +if [ $elapsed -ge $timeout ]; then + echo " ✗ Timeout waiting for Gitea" + exit 1 +fi + +# Initialize Gitea (POST to root URL) +echo "3. Initializing Gitea configuration..." +docker exec openenv-gitea curl -s -X POST \ + -H "Content-Type: application/x-www-form-urlencoded" \ + -d "db_type=sqlite3" \ + -d "db_path=%2Fdata%2Fgitea%2Fgitea.db" \ + -d "app_name=Gitea" \ + -d "repo_root_path=%2Fdata%2Fgit%2Frepositories" \ + -d "run_user=git" \ + -d "domain=gitea" \ + -d "http_port=3000" \ + -d "app_url=http%3A%2F%2Fgitea%3A3000%2F" \ + -d "log_root_path=%2Fdata%2Fgitea%2Flog" \ + -d "offline_mode=on" \ + http://localhost:3000/ > /dev/null || echo " (Config may already exist)" + +# Create admin user +echo "4. Creating admin user ($GITEA_USERNAME)..." +docker exec openenv-gitea su git -c \ + "gitea admin user create --username $GITEA_USERNAME --password $GITEA_PASSWORD --email ${GITEA_USERNAME}@local.env --admin" \ + 2>&1 | grep -q "already exists" && echo " ✓ User already exists" || echo " ✓ User created" + +echo +echo "=====================================" +echo "✓ Gitea setup complete!" +echo "=====================================" +echo +echo "Gitea is now available at:" +echo " - Web UI: http://localhost:3000" +echo " - From containers: http://gitea:3000" +echo +echo "Admin credentials are configured from .env file" +echo +echo "To stop Gitea:" +echo " docker-compose -f src/envs/git_env/docker-compose.gitea.yml down" +echo +echo "To remove all data:" +echo " docker-compose -f src/envs/git_env/docker-compose.gitea.yml down -v" +echo diff --git a/src/core/http_env_client.py b/src/core/http_env_client.py index 905cad9a..b304e088 100644 --- a/src/core/http_env_client.py +++ b/src/core/http_env_client.py @@ -46,6 +46,7 @@ def from_docker_image( cls: Type[EnvClientT], image: str, provider: Optional["ContainerProvider"] = None, + **kwargs: Any, ) -> EnvClientT: """ Create an environment client by spinning up a Docker container locally. @@ -61,6 +62,8 @@ def from_docker_image( Args: image: Docker image name to run (e.g., "echo-env:latest") provider: Container provider to use (defaults to LocalDockerProvider) + **kwargs: Additional arguments to pass to provider.start_container() + (e.g., env_vars, port) Returns: An instance of the client class connected to the running container @@ -72,6 +75,12 @@ def from_docker_image( >>> # Create environment from image >>> env = CodingEnv.from_docker_image("coding-env:latest") >>> + >>> # Create environment with custom env vars + >>> env = CodingEnv.from_docker_image( + ... "coding-env:latest", + ... env_vars={"MY_VAR": "value"} + ... ) + >>> >>> # Use the environment >>> result = env.reset() >>> print(result.observation) @@ -87,8 +96,8 @@ def from_docker_image( if provider is None: provider = LocalDockerProvider() - # 1. Start container - base_url = provider.start_container(image) + # 1. Start container with optional kwargs (e.g., env_vars, port) + base_url = provider.start_container(image, **kwargs) # 2. Wait for server to be ready provider.wait_for_ready(base_url) diff --git a/src/core/tools/__init__.py b/src/core/tools/__init__.py index 5690ae83..034e7f06 100644 --- a/src/core/tools/__init__.py +++ b/src/core/tools/__init__.py @@ -6,6 +6,11 @@ """Core tools for code execution and other utilities.""" +from .git_server_client import GitServerClient, RepoInfo from .local_python_executor import PyExecutor -__all__ = ["PyExecutor"] +__all__ = [ + "PyExecutor", + "GitServerClient", + "RepoInfo", +] \ No newline at end of file diff --git a/src/core/tools/git_server_client.py b/src/core/tools/git_server_client.py new file mode 100644 index 00000000..31b1ed4c --- /dev/null +++ b/src/core/tools/git_server_client.py @@ -0,0 +1,362 @@ +#!/usr/bin/env python3 +""" +Git Server Client for connecting to external Gitea instance. + +This module provides a lightweight client for interacting with a shared +Gitea service, optimized for task-based isolation where multiple environment +instances share the same Gitea server but have isolated workspaces. +""" + +import json +import os +import shutil +import subprocess +import time +from dataclasses import dataclass +from pathlib import Path +from urllib.parse import urlparse + + +@dataclass +class RepoInfo: + """Information about a repository.""" + + name: str + url: str + commit: str + clone_url: str + + +class GitServerClient: + """ + Client for connecting to an external Gitea server. + + This client is optimized for task-based isolation where: + - Multiple tasks share the same Gitea instance + - Each task has its own isolated workspace + - Fast reset() via git operations (no server restart) + - Repos are pre-migrated to Gitea once + + Args: + gitea_url: URL of the Gitea server (e.g., "http://gitea:3000") + username: Gitea username for authentication + password: Gitea password for authentication + workspace_dir: Local workspace directory for cloning repos + + Example: + >>> # Connect to shared Gitea (credentials from environment) + >>> import os + >>> client = GitServerClient( + ... gitea_url=os.getenv("GITEA_URL"), + ... username=os.getenv("GITEA_USERNAME"), + ... password=os.getenv("GITEA_PASSWORD") + ... ) + >>> client.wait_for_ready() + >>> # Clone repo to workspace + >>> path = client.clone_to_workspace("my-repo", commit="abc123") + >>> # Fast reset to base state + >>> client.reset_workspace("my-repo", commit="abc123") + """ + + def __init__( + self, + gitea_url: str, + username: str, + password: str, + workspace_dir: str = "/workspace", + ): + """Initialize Git Server Client.""" + self.gitea_url = gitea_url.rstrip("/") + self.username = username + self.password = password + self.workspace_dir = Path(workspace_dir) + self.is_ready = False + + # Parse Gitea URL + parsed = urlparse(self.gitea_url) + self.domain = parsed.hostname or "localhost" + self.port = parsed.port or 3000 + + # Ensure workspace exists + os.makedirs(self.workspace_dir, exist_ok=True) + + # Configure git credentials + self._configure_git() + + def _configure_git(self): + """Configure git credentials for automatic authentication.""" + home_dir = Path.home() + + # Git config + git_config = f"""[user] + name = {self.username} + email = {self.username}@local.env +[init] + defaultBranch = main +[credential] + helper = store +""" + gitconfig_path = home_dir / ".gitconfig" + gitconfig_path.write_text(git_config) + + # Git credentials + git_credentials = f"http://{self.username}:{self.password}@{self.domain}:{self.port}\n" + gitcreds_path = home_dir / ".git-credentials" + gitcreds_path.write_text(git_credentials) + gitcreds_path.chmod(0o600) + + def wait_for_ready(self, timeout: int = 30) -> bool: + """ + Wait for Gitea server to be ready. + + Args: + timeout: Maximum seconds to wait + + Returns: + True if server is ready, False otherwise + """ + start_time = time.time() + while time.time() - start_time < timeout: + try: + result = subprocess.run( + ["curl", "-sf", f"{self.gitea_url}/"], + capture_output=True, + timeout=5, + ) + if result.returncode == 0: + self.is_ready = True + return True + except subprocess.TimeoutExpired: + pass + except Exception: + pass + + time.sleep(1) + + return False + + def list_repositories(self) -> list[dict[str, str]]: + """ + List all repositories in Gitea. + + Returns: + List of repository information dictionaries + """ + if not self.is_ready: + raise RuntimeError("Gitea server is not ready") + + result = subprocess.run( + [ + "curl", + "-s", + f"{self.gitea_url}/api/v1/user/repos", + "-u", + f"{self.username}:{self.password}", + ], + capture_output=True, + text=True, + ) + + if result.returncode != 0: + return [] + + try: + repos = json.loads(result.stdout) + return [ + { + "name": repo["name"], + "full_name": repo["full_name"], + "clone_url": repo["clone_url"], + "description": repo.get("description", ""), + } + for repo in repos + ] + except (json.JSONDecodeError, KeyError): + return [] + + def clone_to_workspace( + self, repo_name: str, target_dir: str | None = None, commit: str = "main" + ) -> str: + """ + Clone a repository to the workspace at a specific commit. + + This creates a fresh clone optimized for task isolation. + + Args: + repo_name: Name of repository to clone + target_dir: Target directory name (defaults to repo_name) + commit: Commit hash or branch to checkout + + Returns: + Path to cloned repository + + Raises: + RuntimeError: If clone fails + """ + if not self.is_ready: + raise RuntimeError("Gitea server is not ready") + + target_dir = target_dir or repo_name + target_path = self.workspace_dir / target_dir + + # Remove existing directory if present + if target_path.exists(): + shutil.rmtree(target_path) + + clone_url = f"{self.gitea_url}/{self.username}/{repo_name}.git" + + # Clone repository + result = subprocess.run( + ["git", "clone", clone_url, str(target_path)], + capture_output=True, + text=True, + ) + + if result.returncode != 0: + raise RuntimeError(f"Clone failed: {result.stderr}") + + # Checkout specific commit + if commit != "main": + result = subprocess.run( + ["git", "checkout", commit], + cwd=str(target_path), + capture_output=True, + text=True, + ) + + if result.returncode != 0: + raise RuntimeError(f"Checkout failed: {result.stderr}") + + return str(target_path) + + def reset_workspace(self, repo_name: str, commit: str = "main") -> bool: + """ + Fast reset of workspace to base state (optimized for task resets). + + This is much faster than re-cloning. It: + 1. Checks out the target commit + 2. Resets to that commit (hard) + 3. Cleans untracked files + + Args: + repo_name: Name of repository (directory in workspace) + commit: Commit hash or branch to reset to + + Returns: + True if reset successful + + Raises: + RuntimeError: If reset fails + """ + repo_path = self.workspace_dir / repo_name + + if not repo_path.exists(): + raise RuntimeError(f"Repository not found in workspace: {repo_name}") + + # Fetch latest (in case commit is new) + subprocess.run( + ["git", "fetch", "--all"], + cwd=str(repo_path), + capture_output=True, + ) + + # Checkout and hard reset to commit + result = subprocess.run( + ["git", "checkout", commit], + cwd=str(repo_path), + capture_output=True, + text=True, + ) + + if result.returncode != 0: + raise RuntimeError(f"Checkout failed: {result.stderr}") + + result = subprocess.run( + ["git", "reset", "--hard", f"origin/{commit}" if commit != "main" else commit], + cwd=str(repo_path), + capture_output=True, + text=True, + ) + + if result.returncode != 0: + # Try without origin/ prefix + result = subprocess.run( + ["git", "reset", "--hard", commit], + cwd=str(repo_path), + capture_output=True, + text=True, + ) + if result.returncode != 0: + raise RuntimeError(f"Reset failed: {result.stderr}") + + # Clean untracked files and directories + subprocess.run( + ["git", "clean", "-fdx"], + cwd=str(repo_path), + capture_output=True, + ) + + return True + + def execute_git_command( + self, command: str, working_dir: str = "" + ) -> tuple[int, str, str]: + """ + Execute a git command in the workspace. + + Args: + command: Git command to execute (without 'git' prefix) + working_dir: Working directory relative to workspace + + Returns: + Tuple of (exit_code, stdout, stderr) + """ + work_path = ( + self.workspace_dir / working_dir if working_dir else self.workspace_dir + ) + + if not work_path.exists(): + return (1, "", f"Working directory does not exist: {work_path}") + + # Split command safely + cmd_parts = ["git"] + command.split() + + result = subprocess.run( + cmd_parts, + cwd=str(work_path), + capture_output=True, + text=True, + ) + + return (result.returncode, result.stdout, result.stderr) + + def get_current_commit(self, repo_name: str) -> str: + """ + Get current commit hash of a workspace repository. + + Args: + repo_name: Name of repository in workspace + + Returns: + Commit hash + """ + repo_path = self.workspace_dir / repo_name + + if not repo_path.exists(): + raise RuntimeError(f"Repository not found: {repo_name}") + + result = subprocess.run( + ["git", "rev-parse", "HEAD"], + cwd=str(repo_path), + capture_output=True, + text=True, + ) + + if result.returncode != 0: + raise RuntimeError(f"Failed to get commit: {result.stderr}") + + return result.stdout.strip() + + def workspace_exists(self, repo_name: str) -> bool: + """Check if a repository exists in workspace.""" + return (self.workspace_dir / repo_name).exists() diff --git a/src/envs/git_env/README.md b/src/envs/git_env/README.md new file mode 100644 index 00000000..aed850ee --- /dev/null +++ b/src/envs/git_env/README.md @@ -0,0 +1,229 @@ +# Git Environment + +A Git server environment using Gitea that provides isolated Git repository management optimized for task-based RL training. Perfect for training agents on Git operations with fast reset capabilities. + +## Overview + +The Git Environment connects to a **shared external Gitea service** for optimal task-based isolation. **Perfect for**: RL training, task-based workflows, parallel execution + +### Architecture + +``` +┌────────────────────────────────────┐ +│ Shared Gitea (start once) │ +│ Port 3000 │ +│ - Pre-migrated repositories │ +└──────────────┬─────────────────────┘ + │ HTTP API + ┾────────┼────────┾ + │ │ │ + ┌───▼──┐ ┌──▼───┐ ┌──▼───┐ + │Env 1 │ │Env 2 │ │Env 3 │ + │Task A│ │Task B│ │Task A│ + │@abc │ │@def │ │@abc │ + └──────┘ └──────┘ └──────┘ + Isolated workspaces +``` + +## Quick Start + +```python +from envs.git_env import GitAction, GitEnv + +# Create environment from Docker image +git_env = GitEnv.from_docker_image("git-env:latest") + +# Reset environment +result = git_env.reset() +print(result.observation.message) + +# List available repositories (pre-migrated to shared Gitea) +result = git_env.step(GitAction(action_type="list_repos")) +for repo in result.observation.repos: + print(f"{repo['name']}: {repo['clone_url']}") + +# Clone to workspace +result = git_env.step(GitAction(action_type="clone_repo", repo_name="OpenEnv")) +print(result.observation.output) # Cloned to: /workspace/OpenEnv + +# Execute git commands +result = git_env.step(GitAction( + action_type="execute_git_command", + command="status", + working_dir="OpenEnv" +)) +print(result.observation.output) + +# Cleanup +git_env.close() +``` + +## Setup and Running the Example + +Complete setup (run these steps in order): + +```bash +# 0. Configure environment variables +cp .env.example .env +# Edit .env and set your Gitea credentials if needed + +# 1. Start shared Gitea service (one-time) +./scripts/setup_shared_gitea.sh + +# 2. Migrate a test repository to Gitea (one-time) +docker exec openenv-gitea curl -X POST \ + http://localhost:3000/api/v1/repos/migrate \ + -u gitea:gitea123 \ + -H 'Content-Type: application/json' \ + -d '{ + "clone_addr": "https://github.com/meta-pytorch/OpenEnv", + "repo_name": "OpenEnv", + "repo_owner": "gitea", + "service": "github" + }' + +# 3. Build Docker images +docker build -t openenv-base:latest -f src/core/containers/images/Dockerfile . +docker build -t git-env:latest -f src/envs/git_env/server/Dockerfile . + +# 4. Install Python dependencies +uv pip install -e . + +# 5. Run the example (loads credentials from .env) +python3 examples/local_git_env.py +``` + +**Note**: +- Steps 1-3 are one-time setup +- Make sure `.env` file exists with your Gitea credentials +- After initial setup, you only need step 5 to run the example + +## Environment Details + +### Actions + +**GitAction**: Unified action class for all Git operations + +```python +@dataclass +class GitAction(Action): + action_type: str # Operation type + repo_name: str # Repository name (for clone/execute) + target_dir: Optional[str] # Target directory (for clone) + command: str # Git command (for execute) + working_dir: str # Working directory (for execute) +``` + +**Supported action_type values:** + +#### "clone_repo" - Clone repository to workspace +```python +GitAction(action_type="clone_repo", repo_name="OpenEnv") +GitAction(action_type="clone_repo", repo_name="OpenEnv", target_dir="custom-dir") +``` + +#### "list_repos" - List available repositories +```python +GitAction(action_type="list_repos") +``` + +#### "execute_git_command" - Execute git command +```python +GitAction( + action_type="execute_git_command", + command="status", + working_dir="OpenEnv" +) +``` + +### Observation + +**GitObservation**: Contains results of Git operations + +```python +@dataclass +class GitObservation(Observation): + success: bool # Whether operation succeeded + message: str # Human-readable message + output: str # Command output or detailed result + error: str # Error message if failed + repos: list[dict] # List of repositories (for list_repos) +``` + +### State + +**GitState**: Tracks environment state + +```python +@dataclass +class GitState(State): + episode_id: str # Unique episode identifier + step_count: int # Number of steps taken + gitea_ready: bool # Whether Gitea is accessible + workspace_path: str # Path to workspace directory +``` + +## Advanced: Task-Based Training + +For RL training scenarios where you need fast resets to specific repository states, you can configure task-specific base states in the environment. This is done by setting environment variables before starting containers: + +```bash +# Example: Configure tasks for your training setup +docker run \ + -e GITEA_URL=http://host.docker.internal:3000 \ + -e TASK_REPOS='{"bug_fix": ["my-repo", "abc123"], "feature": ["my-repo", "def456"]}' \ + git-env:latest +``` + +Then in your training code, environments automatically reset to the configured state. + +See [`examples/local_git_env.py`](../../../examples/local_git_env.py) for complete working example. + +## Project Structure + +``` +git_env/ +├── README.md # This file +├── __init__.py # Exports +├── models.py # Action, Observation, State definitions +├── client.py # GitEnv HTTP client +├── docker-compose.gitea.yml # Shared Gitea service +└── server/ + ├── __init__.py + ├── git_task_environment.py # Task-optimized environment + ├── app.py # FastAPI application + └── Dockerfile # Lightweight container image +``` + +## Troubleshooting + +### Gitea Not Ready + +If environment can't connect to Gitea: +1. Ensure Gitea is running: `docker ps | grep gitea` +2. Check Gitea URL in environment: `GITEA_URL=http://gitea:3000` +3. Verify network connectivity: `docker network ls | grep openenv` + +### Repository Not Found + +Ensure repository is migrated to Gitea: +```bash +# List repos +curl -u gitea:gitea123 http://localhost:3000/api/v1/user/repos +``` + +### Slow Clone/Reset + +- First clone is slower (~5-10s) - downloads from Gitea +- Subsequent resets are fast (<1s) - just git operations +- Use task-based mode with `task_repos` for optimal performance + + +## Security Notes + +- **Never commit `.env` file** - it contains credentials (already in .gitignore) +- Use `.env.example` as a template and create your own `.env` +- Gitea credentials are for local development only +- For production, use proper secret management (Docker secrets, k8s secrets, etc.) +- All workspaces are isolated per container +- Only public repositories supported (no private repo auth) \ No newline at end of file diff --git a/src/envs/git_env/__init__.py b/src/envs/git_env/__init__.py new file mode 100644 index 00000000..5f4ce574 --- /dev/null +++ b/src/envs/git_env/__init__.py @@ -0,0 +1,18 @@ +""" +Git Environment - Git server with Gitea support. + +This environment connects to a shared Gitea service for task-based isolation, +allowing agents to clone repositories, execute git commands, and manage workspaces. + +Note: Repository migration is done externally via Gitea API before environment use. +""" + +from .client import GitEnv +from .models import GitAction, GitObservation, GitState + +__all__ = [ + "GitEnv", + "GitAction", + "GitObservation", + "GitState", +] diff --git a/src/envs/git_env/client.py b/src/envs/git_env/client.py new file mode 100644 index 00000000..6857b0c2 --- /dev/null +++ b/src/envs/git_env/client.py @@ -0,0 +1,115 @@ +#!/usr/bin/env python3 +""" +GitEnv Client +------------- +Client-side wrapper for the Git environment server. +Talks HTTP to a single base_url exposing: /reset and /step. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from core.client_types import StepResult +from core.http_env_client import HTTPEnvClient + +from .models import GitAction, GitObservation, GitState + +if TYPE_CHECKING: + from core.containers.runtime import ContainerProvider + + +class GitEnv(HTTPEnvClient[GitAction, GitObservation]): + """ + Client for Git Environment with Gitea server. + + This client communicates with the Git environment server over HTTP, + allowing agents to perform Git operations through a simple API. + + The environment connects to a shared external Gitea service. Repositories + must be pre-migrated to Gitea before use. + + Example: + >>> # From Docker image + >>> client = GitEnv.from_docker_image("git-env:latest") + >>> result = client.reset() + >>> + >>> # List available repositories + >>> from envs.git_env import GitAction + >>> result = client.step(GitAction(action_type="list_repos")) + >>> print(result.observation.repos) + >>> + >>> # Clone repository to workspace + >>> result = client.step(GitAction(action_type="clone_repo", repo_name="OpenEnv")) + >>> + >>> # Execute git commands + >>> result = client.step(GitAction( + ... action_type="execute_git_command", + ... command="status", + ... working_dir="OpenEnv" + ... )) + >>> + >>> # Cleanup + >>> client.close() + """ + + def _step_payload(self, action: GitAction) -> dict: + """ + Convert action to payload for server's /step endpoint. + + Args: + action: GitAction to send to server + + Returns: + Dictionary payload for HTTP request + """ + # Convert action to dictionary + payload = { + "action_type": action.action_type, + } + + # Add type-specific fields for supported actions + if hasattr(action, "repo_name"): + payload["repo_name"] = action.repo_name + if hasattr(action, "target_dir"): + payload["target_dir"] = action.target_dir + if hasattr(action, "command"): + payload["command"] = action.command + if hasattr(action, "working_dir"): + payload["working_dir"] = action.working_dir + + return payload + + def _parse_result(self, payload: dict) -> StepResult[GitObservation]: + """ + Parse server response into StepResult. + + Args: + payload: JSON response from /step endpoint + + Returns: + StepResult containing GitObservation + """ + obs = GitObservation(**payload["observation"]) + return StepResult( + observation=obs, + reward=payload.get("reward"), + done=bool(payload.get("done", False)), + ) + + def _parse_state(self, payload: dict) -> GitState: + """ + Parse server response into GitState object. + + Args: + payload: JSON response from /state endpoint + + Returns: + GitState object with environment state + """ + return GitState( + episode_id=payload.get("episode_id"), + step_count=payload.get("step_count", 0), + gitea_ready=payload.get("gitea_ready", False), + workspace_path=payload.get("workspace_path", "/workspace"), + ) diff --git a/src/envs/git_env/docker-compose.gitea.yml b/src/envs/git_env/docker-compose.gitea.yml new file mode 100644 index 00000000..4afc5385 --- /dev/null +++ b/src/envs/git_env/docker-compose.gitea.yml @@ -0,0 +1,49 @@ +# Docker Compose configuration for shared Gitea service +# This runs a single Gitea instance that can be shared by multiple +# Git environment containers for optimal task-based isolation. +# +# Usage: +# docker-compose -f docker-compose.gitea.yml up -d +# +# The Gitea service will be available at: +# - http://localhost:3000 (web interface) +# - http://gitea:3000 (from other containers on the same network) + +version: '3.8' + +services: + gitea: + image: gitea/gitea:1.24 + container_name: openenv-gitea + hostname: gitea + environment: + - USER_UID=1000 + - USER_GID=1000 + - GITEA__database__DB_TYPE=sqlite3 + - GITEA__database__PATH=/data/gitea/gitea.db + - GITEA__server__DOMAIN=gitea + - GITEA__server__HTTP_PORT=3000 + - GITEA__server__ROOT_URL=http://gitea:3000/ + - GITEA__server__OFFLINE_MODE=true + restart: unless-stopped + networks: + - openenv-network + ports: + - "3000:3000" + volumes: + - gitea-data:/data + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:3000/"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 30s + +networks: + openenv-network: + name: openenv-network + driver: bridge + +volumes: + gitea-data: + name: openenv-gitea-data diff --git a/src/envs/git_env/models.py b/src/envs/git_env/models.py new file mode 100644 index 00000000..76d0d733 --- /dev/null +++ b/src/envs/git_env/models.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python3 + +""" +envs/git_env/models.py +-------------------------------- +Action/Observation types for the Git environment with Gitea server. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Optional + +from core.env_server import Action, Observation, State + + +@dataclass +class GitAction(Action): + """ + Action for Git environment operations. + + This unified action class supports multiple operation types: + - clone_repo: Clone a repository from Gitea to workspace + - list_repos: List all available repositories + - execute_git_command: Execute a git command in workspace + + Attributes: + action_type: Type of operation ("clone_repo", "list_repos", "execute_git_command") + repo_name: Name of repository (for clone_repo, execute_git_command) + target_dir: Target directory for clone (optional) + command: Git command to execute (for execute_git_command) + working_dir: Working directory relative to workspace (for execute_git_command) + """ + + action_type: str = "list_repos" + repo_name: str = "" + target_dir: Optional[str] = None + command: str = "" + working_dir: str = "" + + +@dataclass +class GitObservation(Observation): + """ + Result of executing a Git action. + + Attributes: + success: Whether the action was successful + message: Human-readable message about the result + output: Command output or detailed result + error: Error message if action failed + repos: List of repositories (for list_repos action) + """ + + success: bool = False + message: str = "" + output: str = "" + error: str = "" + repos: list[dict[str, str]] = field(default_factory=list) + + +@dataclass +class GitState(State): + """ + State for Git environment. + + Attributes: + episode_id: Unique identifier for the episode + step_count: Number of steps taken + gitea_ready: Whether Gitea server is accessible + workspace_path: Path to the workspace directory + """ + + gitea_ready: bool = False + workspace_path: str = "/workspace" diff --git a/src/envs/git_env/server/Dockerfile b/src/envs/git_env/server/Dockerfile new file mode 100644 index 00000000..47e74755 --- /dev/null +++ b/src/envs/git_env/server/Dockerfile @@ -0,0 +1,36 @@ +# Dockerfile for Git Environment +# Connects to an external shared Gitea service for task-based isolation +# Optimized for fast resets and minimal resource usage + +# Use the standard openenv base image +ARG BASE_IMAGE=openenv-base:latest +FROM ${BASE_IMAGE} + +# Install git and curl (no Gitea binary needed - connects to external service) +RUN apt-get update && apt-get install -y \ + git \ + curl \ + ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +# Install smolagents (required by core.tools.PyExecutor, even though git_env doesn't use it directly) +RUN pip install --no-cache-dir smolagents + +# Create workspace directory for git operations +RUN mkdir -p /workspace && chmod 777 /workspace + +# Copy core and environment code +COPY src/core/ /app/src/core/ +COPY src/envs/git_env/ /app/src/envs/git_env/ + +# Environment variables for Gitea connection +# These MUST be provided at runtime via -e flags or --env-file +# See .env.example for required variables +ENV WORKSPACE_DIR=/workspace + +# Health check +HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \ + CMD curl -f http://localhost:8000/health || exit 1 + +# Run the FastAPI server +CMD ["uvicorn", "envs.git_env.server.app:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/src/envs/git_env/server/__init__.py b/src/envs/git_env/server/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/envs/git_env/server/app.py b/src/envs/git_env/server/app.py new file mode 100644 index 00000000..6434c881 --- /dev/null +++ b/src/envs/git_env/server/app.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 + +""" +FastAPI application for Git Environment. + +This module creates an HTTP server for the Git environment that connects +to a shared external Gitea service for fast, isolated task resets. + +Environment variables (required): + GITEA_URL: URL of shared Gitea service + GITEA_USERNAME: Gitea username + GITEA_PASSWORD: Gitea password + WORKSPACE_DIR: Workspace directory (optional, default: /workspace) + +Usage: + # Development (with auto-reload): + uvicorn envs.git_env.server.app:app --reload --host 0.0.0.0 --port 8000 + + # Production: + uvicorn envs.git_env.server.app:app --host 0.0.0.0 --port 8000 --workers 4 + + # With custom Gitea: + GITEA_URL=http://my-gitea:3000 uvicorn envs.git_env.server.app:app --host 0.0.0.0 --port 8000 +""" + +import os + +from core.env_server import create_app + +from ..models import GitAction, GitObservation +from .git_task_environment import GitTaskEnvironment + +# Read configuration from environment variables +gitea_url = os.getenv("GITEA_URL") +gitea_username = os.getenv("GITEA_USERNAME") +gitea_password = os.getenv("GITEA_PASSWORD") +workspace_dir = os.getenv("WORKSPACE_DIR", "/workspace") + +# Validate required environment variables +if not gitea_url: + raise RuntimeError("GITEA_URL environment variable is required") +if not gitea_username: + raise RuntimeError("GITEA_USERNAME environment variable is required") +if not gitea_password: + raise RuntimeError("GITEA_PASSWORD environment variable is required") + +# Create the environment instance (connects to external Gitea) +env = GitTaskEnvironment( + gitea_url=gitea_url, + username=gitea_username, + password=gitea_password, + workspace_dir=workspace_dir, +) + +# Create the app with web interface and README integration +app = create_app(env, GitAction, GitObservation, env_name="git_env") + + +if __name__ == "__main__": + import uvicorn + + uvicorn.run(app, host="0.0.0.0", port=8000) diff --git a/src/envs/git_env/server/git_task_environment.py b/src/envs/git_env/server/git_task_environment.py new file mode 100644 index 00000000..c2113eb6 --- /dev/null +++ b/src/envs/git_env/server/git_task_environment.py @@ -0,0 +1,282 @@ +#!/usr/bin/env python3 + +""" +Git Task Environment - Optimized for task-based isolation. + +This module provides an optimized Git environment for scenarios where: +- Multiple tasks share the same base repository states +- Tasks need fast reset() to reproducible states +- Each task has an isolated workspace +- A shared Gitea service provides repository storage +""" + +import uuid + +from core.env_server import Action, Environment, Observation +from core.tools import GitServerClient + +from ..models import GitAction, GitObservation, GitState + + +class GitTaskEnvironment(Environment): + """ + Git Environment optimized for task-based isolation. + + This environment connects to a shared Gitea service and provides: + - Fast reset() via git operations (no server restart) + - Isolated workspace per environment instance + - Shared repository cache across tasks + - Reproducible base states from specific commits + + Architecture: + Shared Gitea Service (external) + ↓ + GitTaskEnvironment instances (many) + ↓ + Isolated workspaces (/workspace) + + Args: + gitea_url: URL of shared Gitea service (e.g., "http://gitea:3000") + username: Gitea username for authentication + password: Gitea password for authentication + workspace_dir: Directory for git operations (default: /workspace) + task_repos: Dict mapping task names to (repo_name, commit) tuples + for pre-configuring task base states + + Example (Basic): + >>> env = GitTaskEnvironment(gitea_url="http://localhost:3000") + >>> obs = env.reset() + >>> # Clone and work + >>> from ..models import GitAction + >>> obs = env.step(GitAction(action_type="clone_repo", repo_name="my-repo")) + >>> obs = env.step(GitAction(action_type="execute_git_command", command="status", working_dir="my-repo")) + + Example (Task-based): + >>> # Pre-configure tasks with specific repo states + >>> env = GitTaskEnvironment( + ... gitea_url="http://localhost:3000", + ... task_repos={ + ... "task1": ("my-repo", "abc123"), # Specific commit + ... "task2": ("my-repo", "def456"), # Different commit + ... } + ... ) + >>> # Reset to task1 base state + >>> obs = env.reset(task_id="task1") # Fast! Just git reset + >>> # Work on task... + >>> # Reset to task2 base state + >>> obs = env.reset(task_id="task2") # Fast reset to different state + """ + + def __init__( + self, + gitea_url: str, + username: str, + password: str, + workspace_dir: str = "/workspace", + task_repos: dict[str, tuple[str, str]] | None = None, + ): + """Initialize Git Task Environment.""" + super().__init__() + self.workspace_dir = workspace_dir + self.task_repos = task_repos or {} + + # Initialize Git server client (connects to external Gitea) + self._git_client = GitServerClient( + gitea_url=gitea_url, + username=username, + password=password, + workspace_dir=workspace_dir, + ) + + # Initialize state + self._state = GitState(workspace_path=workspace_dir) + self._current_task_id: str | None = None + + # Wait for Gitea to be ready + if self._git_client.wait_for_ready(): + self._state.gitea_ready = True + else: + print("Warning: Gitea server not ready") + self._state.gitea_ready = False + + def reset(self, task_id: str | None = None) -> Observation: + """ + Reset environment to clean state. + + This is optimized for task-based workflows: + - If task_id specified and configured: fast reset to that task's base state + - If workspace exists: git reset --hard (very fast, <1s) + - Otherwise: clone from Gitea (slower, ~5-10s) + + Args: + task_id: Optional task identifier for task-specific base states + + Returns: + Initial observation indicating environment is ready + """ + # Initialize fresh state + self._state = GitState( + episode_id=str(uuid.uuid4()), + step_count=0, + gitea_ready=self._git_client.is_ready, + workspace_path=self.workspace_dir, + ) + + self._current_task_id = task_id + + # If task_id provided and configured, set up task base state + if task_id and task_id in self.task_repos: + repo_name, commit = self.task_repos[task_id] + + try: + if self._git_client.workspace_exists(repo_name): + # Fast path: workspace exists, just reset + self._git_client.reset_workspace(repo_name, commit) + message = f"Reset to task '{task_id}' base state (repo: {repo_name}@{commit})" + else: + # Slower path: clone fresh + self._git_client.clone_to_workspace(repo_name, commit=commit) + message = f"Initialized task '{task_id}' (repo: {repo_name}@{commit})" + + current_commit = self._git_client.get_current_commit(repo_name) + + return GitObservation( + success=True, + message=message, + output=f"Workspace: {self.workspace_dir}/{repo_name}\nCommit: {current_commit}\nTask: {task_id}", + ) + except Exception as e: + return GitObservation( + success=False, + message=f"Failed to reset task '{task_id}'", + error=str(e), + ) + + # Default reset: just ready state, no pre-configured repos + return GitObservation( + success=True, + message="Git task environment ready.", + output=f"Workspace: {self.workspace_dir}\nGitea: {self._git_client.gitea_url}\nUse GitAction with action_type='clone_repo' to clone repositories.", + ) + + def step(self, action: Action) -> Observation: + """ + Execute a Git action and return observation. + + Supported action types: + - "clone_repo": Clone repository to workspace + - "execute_git_command": Execute git command + - "list_repos": List available repositories + + Args: + action: GitAction to execute + + Returns: + GitObservation with execution results + """ + if not isinstance(action, GitAction): + raise ValueError(f"Expected GitAction, got {type(action)}") + + # Update step count + self._state.step_count += 1 + + # Route to appropriate handler based on action_type + try: + if action.action_type == "clone_repo": + return self._handle_clone_repo(action) + elif action.action_type == "list_repos": + return self._handle_list_repos(action) + elif action.action_type == "execute_git_command": + return self._handle_git_command(action) + else: + return GitObservation( + success=False, + message=f"Action not supported in task mode: {type(action).__name__}", + error="Use shared Gitea for repository migration/creation", + ) + except Exception as e: + return GitObservation( + success=False, message=f"Action failed: {str(e)}", error=str(e) + ) + + def _handle_clone_repo(self, action: GitAction) -> GitObservation: + """Handle repository clone action.""" + try: + # Determine commit to use + commit = "main" # Default + + # If this repo is part of current task config, use that commit + if ( + self._current_task_id + and self._current_task_id in self.task_repos + ): + task_repo, task_commit = self.task_repos[self._current_task_id] + if task_repo == action.repo_name: + commit = task_commit + + clone_path = self._git_client.clone_to_workspace( + action.repo_name, action.target_dir, commit=commit + ) + + return GitObservation( + success=True, + message=f"Successfully cloned {action.repo_name}", + output=f"Cloned to: {clone_path}\nCommit: {commit}", + ) + except Exception as e: + return GitObservation( + success=False, + message=f"Failed to clone repository: {action.repo_name}", + error=str(e), + ) + + def _handle_list_repos(self, action: GitAction) -> GitObservation: + """Handle list repositories action.""" + try: + repos = self._git_client.list_repositories() + + # Format output + if not repos: + output = "No repositories available." + else: + output = "Available repositories:\n" + for repo in repos: + output += f" - {repo['name']}: {repo['clone_url']}\n" + if repo.get("description"): + output += f" {repo['description']}\n" + + return GitObservation( + success=True, + message=f"Found {len(repos)} repositories", + output=output, + repos=repos, + ) + except Exception as e: + return GitObservation( + success=False, message="Failed to list repositories", error=str(e) + ) + + def _handle_git_command(self, action: GitAction) -> GitObservation: + """Handle git command execution action.""" + try: + exit_code, stdout, stderr = self._git_client.execute_git_command( + action.command, action.working_dir + ) + + success = exit_code == 0 + message = f"Git command {'succeeded' if success else 'failed'}" + + return GitObservation( + success=success, message=message, output=stdout, error=stderr + ) + except Exception as e: + return GitObservation( + success=False, + message=f"Failed to execute git command: {action.command}", + error=str(e), + ) + + @property + def state(self) -> GitState: + """Get current environment state.""" + return self._state