microsoft · KavyaSree2610 · Oct 10, 2025 · Oct 2, 2025 · Oct 5, 2025 · Oct 6, 2025
diff --git a/.codecov.yml b/.codecov.yml
@@ -0,0 +1,26 @@
+coverage:
+  status:
+    project:
+      default:
+        target: auto
+        threshold: 1%
+        informational: true
+    patch:
+      default:
+        target: 100%
+        threshold: 1%
+        informational: false
+
+comment:
+  layout: "reach,diff,flags,tree"
+  behavior: default
+  require_changes: false
+
+ignore:
+  - "docs/"
+  - "test/"
+  - "**/test_*.py"
+  - "setup.py"
+  - "conftest.py"
+  - "README.md"
+  - "LICENSE"
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -0,0 +1,129 @@
+name: Run Tests
+
+on:
+  pull_request:
+    branches: [ "main" ]
+  workflow_dispatch:
+    # Allow manual triggering
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        test-type: ["integration"]
+        include:
+          - test-type: "integration" 
+            pytest-args: "-m 'integration'"
+
+    services:
+      docker:
+        image: docker:dind
+        options: --privileged
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Set up Python 3.12
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Set up Docker Buildx
+        if: matrix.test-type == 'integration'
+        uses: docker/setup-buildx-action@v3
+
+      - name: Cache pip dependencies
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
+          restore-keys: |
+            ${{ runner.os }}-pip-
+
+      - name: Install system dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y build-essential
+
+      - name: Install Python dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+          pip install pytest pytest-cov pytest-mock pytest-asyncio
+
+      - name: Install package in development mode
+        run: |
+          pip install -e .
+
+      - name: Build Docker images for integration tests
+        if: matrix.test-type == 'integration'
+        run: |
+          # Build the shell server image needed for Docker tests
+          docker build -f src/microbots/environment/local_docker/image_builder/Dockerfile -t kavyasree261002/shell_server:latest .
+
+      - name: Run ${{ matrix.test-type }} tests
+        env:
+          # OpenAI API Configuration
+          OPEN_AI_KEY: ${{ secrets.OPEN_AI_KEY }}
+          OPEN_AI_DEPLOYMENT_NAME: ${{ secrets.OPEN_AI_DEPLOYMENT_NAME }}
+          OPEN_AI_END_POINT: ${{ secrets.OPEN_AI_END_POINT }}
+          # Azure OpenAI API Configuration
+          AZURE_OPENAI_API_VERSION: ${{ secrets.AZURE_OPENAI_API_VERSION }}
+          AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
+          AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }}
+        run: |
+          python -m pytest ${{ matrix.pytest-args }} \
+            --cov=src \
+            --cov-report=xml \
+            --cov-report=term-missing \
+            --junitxml=test-results-${{ matrix.test-type }}.xml \
+            -v
+
+      - name: Upload test results
+        uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: test-results-${{ matrix.test-type }}
+          path: test-results-*.xml
+
+      - name: Upload coverage reports
+        uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: coverage-${{ matrix.test-type }}
+          path: coverage.xml
+
+      - name: Upload coverage to Codecov
+        uses: codecov/codecov-action@v4
+        if: always()
+        with:
+          token: ${{ secrets.CODECOV_TOKEN }}
+          file: ./coverage.xml
+          flags: ${{ matrix.test-type }}
+          name: codecov-${{ matrix.test-type }}
+          fail_ci_if_error: false
+
+  test-summary:
+    runs-on: ubuntu-latest
+    needs: [test]
+    if: always()
+    steps:
+      - name: Download all test results
+        uses: actions/download-artifact@v4
+        with:
+          pattern: test-results-*
+          merge-multiple: true
+
+      - name: Test Summary
+        if: always()
+        run: |
+          echo "## Test Results Summary" >> $GITHUB_STEP_SUMMARY
+          echo "| Test Type | Status |" >> $GITHUB_STEP_SUMMARY
+          echo "|-----------|--------|" >> $GITHUB_STEP_SUMMARY
+          if [ "${{ needs.test.result }}" = "success" ]; then
+            echo "| Integration Tests | ✅ Passed |" >> $GITHUB_STEP_SUMMARY
+          else
+            echo "| Integration Tests | ❌ Failed |" >> $GITHUB_STEP_SUMMARY
+          fi
diff --git a/pytest.ini b/pytest.ini
@@ -1,4 +1,15 @@
+[tool:pytest]
+testpaths = test
+python_files = test_*.py
+python_functions = test_*
+addopts = 
+    -v
+    --tb=short
+    --strict-markers
+
 [pytest]
 markers =
+    unit: Unit tests
+    integration: Integration tests
+    slow: Slow tests
     docker: marks tests that require a running Docker daemon and pull container images
-
diff --git a/src/microbots/environment/local_docker/LocalDockerEnvironment.py b/src/microbots/environment/local_docker/LocalDockerEnvironment.py
@@ -130,10 +130,18 @@ def stop(self):
             except Exception as e:
                 logger.error("❌ Failed to remove working directory: %s", e)
 
+    # Unused function. Keeping for reference or future use
+    def _escape(self, command: str) -> str:
+        # Escape double quotes and special characters for JSON safety
+        command = command.replace('"', '\\"')
+        command = command.replace("<", "&lt;").replace(">", "&gt;")
+        return command
+
     def execute(
         self, command: str, timeout: Optional[int] = 300
     ) -> CmdReturn:  # TODO: Need proper return value
         logger.debug("➡️  Executing command in container: %s", command)
+        # command = self._escape(command)
         try:
             response = requests.post(
                 f"http://localhost:{self.port}/",
@@ -163,11 +171,11 @@ def execute(
     def copy_to_container(self, src_path: str, dest_path: str) -> bool:
         """
         Copy a file or folder from the host machine to the Docker container.
-        
+
         Args:
             src_path: Path to the source file/folder on the host machine
             dest_path: Destination path inside the container
-            
+
         Returns:
             bool: True if copy was successful, False otherwise
         """
@@ -193,7 +201,7 @@ def copy_to_container(self, src_path: str, dest_path: str) -> bool:
                     mkdir_result = self.execute(mkdir_cmd)
 
                     if mkdir_result.return_code != 0:
-                        logger.error("❌ Failed to create destination directory %s: %s", 
+                        logger.error("❌ Failed to create destination directory %s: %s",
                                    dest_dir, mkdir_result.stderr)
                         return False
                     else:
@@ -212,7 +220,6 @@ def copy_to_container(self, src_path: str, dest_path: str) -> bool:
             # Execute the copy command
             result = subprocess.run(
                 cmd,
-                shell=True,
                 capture_output=True,
                 text=True,
                 timeout=300
@@ -235,11 +242,11 @@ def copy_to_container(self, src_path: str, dest_path: str) -> bool:
     def copy_from_container(self, src_path: str, dest_path: str) -> bool:
         """
         Copy a file or folder from the Docker container to the host machine.
-        
+
         Args:
             src_path: Path to the source file/folder inside the container
             dest_path: Destination path on the host machine
-            
+
         Returns:
             bool: True if copy was successful, False otherwise
         """
@@ -271,7 +278,6 @@ def copy_from_container(self, src_path: str, dest_path: str) -> bool:
             # Execute the copy command
             result = subprocess.run(
                 cmd,
-                shell=True,
                 capture_output=True,
                 text=True,
                 timeout=300

diff --git a/src/microbots/environment/local_docker/image_builder/ShellCommunicator.py b/src/microbots/environment/local_docker/image_builder/ShellCommunicator.py
@@ -16,6 +16,7 @@
 from dataclasses import dataclass
 
 logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', filename='/var/log/ShellCommunicator.log')
 
 @dataclass
 class CmdReturn:
@@ -126,12 +127,20 @@ def _monitor_output(self, stream, output_queue: queue.Queue, stream_type: str):
         except Exception as e:
             output_queue.put((stream_type, f"Monitor error: {e}"))
 
+    # Unused function. Keeping for reference and future use
     def _re_escape(self, command: str) -> str:
         # Reverse .replace('"', '\\"')
         command = command.replace('\"', '"')
-        # command = command.replace("&lt;", "<").replace("&gt;", ">")
+        command = command.replace("&lt;", "<").replace("&gt;", ">")
         return command
 
+    # Unused function. Keeping for reference and future use
+    def _is_heredoc_command(self, command: str) -> bool:
+        """Check if command contains heredoc syntax."""
+        import re
+        # Look for heredoc patterns like <<EOF, <<'END', <<"DELIMITER", etc.
+        return bool(re.search(r'<<\s*[\'"]?[A-Za-z_][A-Za-z0-9_]*[\'"]?', command))
+
     def send_command(
         self, command: str, wait_for_output: bool = True, timeout: float = 300
     ) -> CmdReturn:
@@ -151,8 +160,8 @@ def send_command(
             return CmdReturn(stdout="", stderr="No active shell session", return_code=1)
 
         try:
-            command = self._re_escape(command)
-            
+            # command = self._re_escape(command)
+
             if not wait_for_output:
                 # Send the command without marker for async execution
                 self.process.stdin.write(command + "\n")
@@ -163,13 +172,14 @@ def send_command(
             # Generate a unique command completion marker
             marker = f"__COMMAND_COMPLETE_{int(time.time() * 1000000)}__"
 
-            # For bash only: Send command + marker in a single line to capture correct exit code
-            combined_command = f"{command}; echo '{marker}' $?"
-
-            # Send the combined command
-            self.process.stdin.write(combined_command + "\n")
+            self.process.stdin.write(command + "\n")
             self.process.stdin.flush()
+            # Send exit code capture on a new line after user command completes
+            self.process.stdin.write(f"echo '{marker}' $?\n")
+            self.process.stdin.flush()
+
             logger.debug("➡️ Sent command: %s", command)
+            logger.debug("🔖 Waiting for marker: %s", marker)
 
             # Collect output until marker is found or timeout
             output_lines = []
@@ -182,6 +192,7 @@ def send_command(
                 try:
                     # Check for output with a small timeout
                     stream_type, line = self.output_queue.get(timeout=0.1)
+                    logger.debug("⬅️ Received line from %s: %s", stream_type, line)
 
                     # Check if this is our completion marker
                     if marker in line:

diff --git a/src/microbots/tools/tool_definitions/browser-use/browser.py b/src/microbots/tools/tool_definitions/browser-use/browser.py
@@ -28,7 +28,7 @@ async def main(args: list[str]) -> int:
     agent = Agent(
         task=what_to_browse,
         browser=browser,
-        llm=ChatAzureOpenAI(model="gpt-4.1"),
+        llm=ChatAzureOpenAI(model="gpt-5",temperature=1.0), # TODO: Gather it from environmental variable instead of hard coding.
         use_vision=False,
     )
     history: AgentHistoryList = await agent.run()

diff --git a/test/bot/browsing_bot_test.py b/test/bot/browsing_bot_test.py
diff --git a/test/bot/calculator/log_analysis_test.py b/test/bot/calculator/log_analysis_test.py