Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 1 addition & 9 deletions .github/workflows/automation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@ name: Run WDIO Tests with OBOT Docker

on:
workflow_dispatch:
repository_dispatch:
types: [pr-created]

jobs:
wdio-tests:
Expand Down Expand Up @@ -64,13 +62,7 @@ jobs:
run: npm ci

- name: Run WDIO Tests
env:
WP_URL: ${{ secrets.WP_URL }}
WP_USERNAME: ${{ secrets.WP_USERNAME }}
WP_PASSWORD: ${{ secrets.WP_PASSWORD }}
OBOT_URL: ${{ secrets.OBOT_URL }}
GITLAB_TOKEN: ${{ secrets.GITLAB_TOKEN }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY}}
env: ${{ secrets }}
run: |
npm run wdio:byScenario
npm run eval
Expand Down
124 changes: 124 additions & 0 deletions .github/workflows/pr-trigger.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
name: Run WDIO Tests with OBOT Docker

on:
repository_dispatch:
types: [pr-created]

jobs:
prepare-matrix:
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
pr_number: ${{ steps.set-matrix.outputs.pr_number }}
steps:
- name: Set matrix data
id: set-matrix
run: |
if [ "${{ github.event_name }}" = "repository_dispatch" ]; then
echo "matrix=${{ toJson(github.event.client_payload.changed_files) }}" >> $GITHUB_OUTPUT
echo "pr_number=${{ github.event.client_payload.pr_number }}" >> $GITHUB_OUTPUT
else
# Default empty matrix for workflow_dispatch
echo 'matrix=[{"file":"default","containerizedConfig":null,"env":null}]' >> $GITHUB_OUTPUT
echo "pr_number=" >> $GITHUB_OUTPUT
fi

wdio-tests:
needs: prepare-matrix
runs-on: ubuntu-latest
if: needs.prepare-matrix.outputs.matrix != '[]'
strategy:
fail-fast: false
matrix:
server: ${{ fromJson(needs.prepare-matrix.outputs.matrix) }}

permissions:
id-token: write
actions: read
checks: read

steps:
- name: Checkout Repository
uses: actions/checkout@v5

- name: Set up Node.js
uses: actions/setup-node@v6
with:
node-version: 22

- name: Cache NPM Dependencies
uses: actions/cache@v4
with:
path: ~/.npm
key: ${{ runner.os }}-node-${{ hashFiles('package-lock.json') }}
restore-keys: |
${{ runner.os }}-node-

- name: Log in to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Run OBOT container
run: |
docker run -d --name obot \
-p 8080:8080 \
-v /var/run/docker.sock:/var/run/docker.sock \
-e OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }} \
-e OBOT_SERVER_DISABLE_UPDATE_CHECK=true \
ghcr.io/obot-platform/obot:latest

- name: Wait for OBOT to be ready
run: |
echo "Waiting for OBOT container..."
for i in {1..12}; do
if curl -sf http://localhost:8080/api/me | grep -q '"username":"nobody"'; then
echo "OBOT API is ready."
break
fi
echo "Not ready yet... retry #$i"
sleep 5
done

- name: Remove file extension
env:
MATRIX_SERVER_FILE: ${{ matrix.server.file }}
run: |
MCP_SERVER_NAME="${MATRIX_SERVER_FILE/.yaml/}"
echo "MCP_SERVER_NAME=$MCP_SERVER_NAME" >> $GITHUB_ENV

- name: Create catalog entry
run: |
curl localhost:8080/api/mcp-catalogs/default/entries -X POST -H "Content-Type: application/json" \
-d '{
"name": "test-${MCP_SERVER_NAME}",
"description": "testing ${MCP_SERVER_NAME} mcp",
"icon": "https://avatars.githubusercontent.com/u/9919?v=4",
"repoURL": "https://github.com/testing/testing",
"runtime": "containerized",
"containerizedConfig": {
"image": "${{ matrix.server.containerizedConfig.image }}",
"port": ${{ matrix.server.containerizedConfig.port }},
"path": "${{ matrix.server.containerizedConfig.path }}",
"args": ${{ toJson(matrix.server.containerizedConfig.args) }}
},
"metadata": {
"categories": "testing"
}
}'

- name: Install dependencies
run: npm ci

- name: Run WDIO Tests
env:
WP_URL: ${{ secrets.WP_URL }}
WP_USERNAME: ${{ secrets.WP_USERNAME }}
WP_PASSWORD: ${{ secrets.WP_PASSWORD }}
OBOT_URL: ${{ secrets.OBOT_URL }}
GITLAB_TOKEN: ${{ secrets.GITLAB_TOKEN }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: |
npx wdio run wdio.conf.ts --cucumberOpts.tagExpression='@${MCP_SERVER_NAME}'
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@ Once you've installed the dependencies and set up your environment, you can run
2. **Run a specific test**:
If you want to run a specific scenario or feature, use:
```bash
npm run wdio:byScenario --spec src/features/obot.feature:10
npx wdio run wdio.conf.ts --cucumberOpts.tagExpression='@gitlab'
```
Replace `10` with the line number of the scenario you want to execute.
Replace `@gitlab` with the tag corresponding to the scenario you want to execute.

3. **Run in headless mode** (useful for CI/CD):
You can run the tests in headless mode (without opening the browser window) by configuring the `wdio.conf.ts` file to enable headless execution, and then run the tests with:
Expand Down
4 changes: 0 additions & 4 deletions auto_eval.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ interface GradeInfo {

interface ToolData {
responses?: string[];
errors?: string[];
task_done?: boolean | null;
failure_reason?: string[];
status?: string;
Expand Down Expand Up @@ -118,10 +117,7 @@ async function enhanceReportWithEval(
// Merge reasons
const reasons: string[] = [];
if (gradeInfo.reason) reasons.push(gradeInfo.reason);
if (toolData.errors?.length) reasons.push(...toolData.errors);

toolData.failure_reason = reasons;
delete toolData.errors;

// Set status based on grading
if (gradeInfo.result === "FAILURE") toolData.status = "Failure";
Expand Down
32 changes: 6 additions & 26 deletions src/core/mcpFunc.ts
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ export async function sendPromptValidateAndCollect(promptText: string, toolList:

// Send and wait for reply
const reply = await sendPromptAndWaitForReply(promptText);
await browser.pause(2000);
await browser.pause(10000);

// Wait until a new message-content div appears
await browser.waitUntil(async () => {
Expand Down Expand Up @@ -98,28 +98,12 @@ export async function sendPromptValidateAndCollect(promptText: string, toolList:
const currReply = promptReplies[await promptReplies.length - 1];
if (!currReply) throw new Error(`No reply container found even after waiting for prompt: "${promptText}"`);

// Validation regex
const successRegex = /(success|completed|connected|created|retrieved|posted|updated|closed|deleted|functioning|valid|available|ready to use)/i;
const failureRegex = /(not valid|failed|error|cannot access|do not have|insufficient|not available|required|troubleshooting)/i;

const hasSuccess = successRegex.test(reply);
const hasFailure = failureRegex.test(reply);

let errorMessage = '';
if (!hasSuccess && !hasFailure) {
errorMessage = `No success or actionable failure detected in prompt #${index + 1} response.`;
}

console.log(`Prompt #${index + 1}: Tools used: ${toolsTexts.length ? toolsTexts.join(', ') : 'None'} | Status: ${hasSuccess ? 'Success' : (hasFailure ? 'Failure' : 'Unknown')}`);

// Return data for reporting
return {
prompt: promptText,
reply,
replyElement: currReply,
tools: toolsTexts,
status: hasSuccess ? 'Success' : (hasFailure ? 'Failure' : 'Unknown'),
error: errorMessage || null,
};
}

Expand All @@ -131,13 +115,13 @@ function maxStatus(s1: string, s2: string): string {
export function aggregateToolResponses(promptResults: any[]) {
const report: Record<string, {
promptText: string,
tools: Record<string, { responses: string[]; status: string; errors: string[] }>
tools: Record<string, { responses: string[] }>
}> = {};

for (let i = 0; i < promptResults.length; i++) {
const result = promptResults[i];
const { prompt, tools, reply, status, error } = result;
if (!reply && !error) continue;
const { prompt, tools, reply } = result;
if (!reply) continue;

const promptKey = `Prompt #${i + 1}`;

Expand All @@ -153,14 +137,10 @@ export function aggregateToolResponses(promptResults: any[]) {

for (const tool of toolsToUse) {
if (!report[promptKey].tools[tool]) {
report[promptKey].tools[tool] = { responses: [], status: 'Unknown', errors: [] };
report[promptKey].tools[tool] = { responses: []};
}

if (reply) report[promptKey].tools[tool].responses.push(reply);
if (error) report[promptKey].tools[tool].errors.push(error);

report[promptKey].tools[tool].status =
maxStatus(status, report[promptKey].tools[tool].status);
}
}

Expand All @@ -170,7 +150,7 @@ export function aggregateToolResponses(promptResults: any[]) {
export function saveMCPReport(mcpName: string, reportJson: any) {
const folderName = `MCP Server Reports`;
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
const fileName = `${mcpName}_MCP_Report_${timestamp}.json`;
const fileName = `${mcpName.toLowerCase().replace(/\s+/g, '_')}_MCP_Report_${timestamp}.json`;
const dirPath = path.join(process.cwd(), folderName);
const filePath = path.join(dirPath, fileName);

Expand Down
71 changes: 60 additions & 11 deletions src/core/selectors.ts
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,11 @@ const Selectors = {
tableInput:'//pre[contains(@class, "whitespace-pre-wrap")]//span[contains(@class, "text-gray-500")]',
showDetails2: '(//button[text()="Show Details"])[2]',
showDetails3: '(//button[text()="Show Details"])[3]',
connectionsList: `//div[@class="flex flex-col"]`,
currentProjectButton: `//span[text()="Project"]/ancestor::button`,
createNewProjectButton: `//button[text()=" Create New Project"]`,
inputNewProjectName: '//input[@id="project-name"]',
saveButton: '//button[text()="Save"]',

admin:{
oktaLogin:'//button[normalize-space(.//div) = "Sign in with Okta"]',
Expand Down Expand Up @@ -210,22 +215,66 @@ const Selectors = {
clickChatObot: '//button[normalize-space(text())="Chat"]',
connectorbtn: '//p[normalize-space(text())="Connectors"]/following-sibling::button',
mcpSearchInput: '//input[normalize-space(@placeholder)="Search by name..."]',
// selectMCPServer: '//p[normalize-space(text())="WordPress1"]',
selectMCPServer: (option: string) => `//p[normalize-space(text())="${option}"]/ancestor::div[contains(@class, 'flex')]/button`,
wpSiteURL: '//input[normalize-space(@id)="WORDPRESS_SITE"]',
wpUsername: '//input[normalize-space(@id)="WORDPRESS_USERNAME"]',
wpPassword: '//input[normalize-space(@id)="WordPress App Password"]',
btnClick: (option: string) => `//button[normalize-space(text())="${option}"]`,
promptInput: '//div[@class="plaintext-editor text-md relative w-full flex-1 grow resize-none p-2 leading-8 outline-none"]',
// submitPrompt: '//button[@type="submit"]',
// obotInput: '//div[@class="ProseMirror editor"]',
gitlabToken: '//input[@name="GitLab Personal Access Token"]',
// messageContainer: "//div[contains(@class, 'flex-1') and contains(@class, 'flex-col') and contains(@class, 'justify-start') and contains(@class, 'gap-8')]",
obotInput: "//div[contains(@class,'ProseMirror') and @contenteditable='true']",
submitPrompt: '//button[@type="submit"]',
lastBotReply: '//div[@class="message-content"]',
messageContainer: "//div[contains(@class, 'flex-1') and contains(@class, 'flex-col') and contains(@class, 'justify-start') and contains(@class, 'gap-8')]"

messageContainer: "//div[contains(@class, 'flex-1') and contains(@class, 'flex-col') and contains(@class, 'justify-start') and contains(@class, 'gap-8')]",
wordpressMCP:{
wpSiteURL: '//input[normalize-space(@id)="WORDPRESS_SITE"]',
wpUsername: '//input[normalize-space(@id)="WORDPRESS_USERNAME"]',
wpPassword: '//input[normalize-space(@id)="WordPress App Password"]',
},
gitlabMCP:{
gitlabToken: '//input[@name="GitLab Personal Access Token"]',
},
bigQuery:{
googleCloudProjectID: '//input[@id="GOOGLE_CLOUD_PROJECT"]',
googleCloudCredentials: '//input[@name="Google Application Credentials"]//following-sibling::div[1]',
},
datadog:{
datadogAPIKey: `//input[@id="Datadog API Key"]`,
datadogAPPKey: `//input[@id="Datadog App Key"]`,
},
databricks:{
utility:{
workspaceHostname: `//input[@id="DATABRICKS_WORKSPACE_URL"]`,
functionCatalog: `//input[@id="DATABRICKS_FUNCTIONS_CATALOG"]`,
functionalSchema: `//input[@id="DATABRICKS_FUNCTIONS_SCHEMA"]`,
PAT: `//input[@id="Personal Access Token"]`,
},
vector:{
vectorCatalog: `//input[@id="DATABRICKS_VECTOR_SEARCH_CATALOG"]`,
vectorSchema: `//input[@id="DATABRICKS_VECTOR_SEARCH_SCHEMA"]`,
},
genie: {
genieSpaceID: `//input[@id="DATABRICKS_GENIE_SPACE_ID"]`
}
},
brave: {
braveAPIKey: `//input[@id="Brave API Key"]`
},
chromaCloud: {
tenentID: `//input[@id="CHROMA_TENANT"]`,
DBName: `input[@id="CHROMA_DATABASE"]`,
APIKey: `//input[@id="Chroma Cloud API Key"]`
},
fireCrawl: {
API_key: `//input[@id="Firecrawl API Key"]`
},
gitMCP: {
urlLink: `//input[@id="url-manifest-url"]`
},
redis: {
urlLink: `//input[@id="REDIS_URI"]`
},
postman: {
hostURL: `//input[@id="HOST_URL"]`,
toolCOnfig: `//input[@id="TOOL_CONFIGURATION"]`,
postmanAPIKey: `//input[@id="Postman API Key"]`,
}
}
}
export default Selectors;
export default Selectors;
20 changes: 20 additions & 0 deletions src/data/bigquery_toolbox.MCP.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"prompts": [
"Is BigQuery Toolbox connected?",
"Retrieve all columns from the table company_data.Employees where Salary is greater than 60000.",
"List the first 10 employees' first and last names from company_data.Employees.",
"Count the total number of employees in company_data.Employees.",
"Find the average salary of all employees in company_data.Employees.",
"Find all employees from company_data.Employees who were hired after January 1, 2023.",
"Get the total salary amount paid to all employees in company_data.Employees.",
"List employees from company_data.Employees with their email addresses ordered by HireDate descending.",
"Use service account credentials to query the company_data.Employees table.",
"Run SELECT * FROM company_data.Employees LIMIT 5.",
"Find the employee with the highest salary in company_data.Employees.",
"Find the employee with the lowest salary in company_data.Employees.",
"Find the number of employees hired per year from company_data.Employees."
],
"tools": [
"query"
]
}
25 changes: 25 additions & 0 deletions src/data/brave_search.MCP.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"prompts": [
"Is Brave Search MCP server connected?",
"Search the web for 'best productivity apps 2025' and return the top 5 results.",
"Find web pages about 'remote work trends' published in the last 7 days.",
"Search for the latest news about 'AI regulation' and return the 5 most recent articles.",
"Find breaking news on 'electric vehicles' from the last 24 hours.",
"Search for images of 'modern home office setups' and return the top 5 results.",
"Find safe-for-work images of 'healthy breakfast ideas'.",
"Search for videos about 'machine learning tutorials' and return the top 3 results.",
"Find recent videos on '2025 tech conferences' published in the last month.",
"Find coffee shops near 'Denver, CO' with ratings and reviews.",
"Search for top-rated vegan restaurants in 'San Francisco'.",
"Summarize the top web results for 'benefits of meditation' with inline references.",
"Generate a concise summary of news articles about 'renewable energy investments'."
],
"tools": [
"brave_web_search",
"brave_news_search",
"brave_image_search",
"brave_video_search",
"brave_local_search",
"brave_summarizer"
]
}
Loading