diff --git a/.eslintignore b/.eslintignore
deleted file mode 100644
index 67ab7da..0000000
--- a/.eslintignore
+++ /dev/null
@@ -1,2 +0,0 @@
-dist/
-**/*.test.ts 
\ No newline at end of file
diff --git a/.eslintrc.cjs b/.eslintrc.cjs
deleted file mode 100644
index 5929160..0000000
--- a/.eslintrc.cjs
+++ /dev/null
@@ -1,26 +0,0 @@
-module.exports = {
-  parser: '@typescript-eslint/parser',
-  parserOptions: {
-    ecmaVersion: 'latest',
-    sourceType: 'module',
-    project: './tsconfig.json',
-  },
-  plugins: ['@typescript-eslint'],
-  extends: [
-    'eslint:recommended',
-    'plugin:@typescript-eslint/recommended',
-    'plugin:@typescript-eslint/recommended-requiring-type-checking',
-    'plugin:jest/recommended',
-    'prettier',
-  ],
-  env: {
-    node: true,
-    es2022: true,
-    jest: true,
-  },
-  rules: {
-    'no-console': 'warn',
-    '@typescript-eslint/explicit-function-return-type': 'warn',
-    '@typescript-eslint/no-unused-vars': 'error',
-  },
-};
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
deleted file mode 100644
index 71efb1d..0000000
--- a/.github/workflows/publish.yml
+++ /dev/null
@@ -1,27 +0,0 @@
-name: Publish Package
-
-on:
-  push:
-    branches:
-      - main
-
-jobs:
-  publish:
-    runs-on: ubuntu-latest
-
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v2
-
-      - name: Setup Node.js
-        uses: actions/setup-node@v2
-        with:
-          node-version: '20'
-
-      - name: Install dependencies
-        run: npm install
-
-      - name: Publish package
-        run: npm publish
-        env:
-          NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
new file mode 100644
index 0000000..964ab41
--- /dev/null
+++ b/.github/workflows/release.yml
@@ -0,0 +1,52 @@
+# https://semantic-release.gitbook.io/semantic-release/recipes/ci-configurations/github-actions
+name: Release
+on:
+  push:
+    branches:
+      - main
+
+permissions:
+  contents: read
+
+jobs:
+  release:
+    name: Release
+
+    runs-on: ubuntu-latest
+
+    permissions:
+      contents: write
+      issues: write
+      pull-requests: write
+      id-token: write
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: 'lts/*'
+
+      - name: Install pnpm
+        uses: pnpm/action-setup@v2
+        with:
+          version: 10
+
+      - name: Install dependencies
+        run: pnpm install
+
+      - name: Build
+        run: pnpm build
+
+      - name: Verify the integrity of provenance attestations and registry signatures for installed dependencies
+        run: npm audit signatures
+
+      - name: Release
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          NPM_TOKEN: ${{ secrets.NPM_TOKEN }}
+        run: npx semantic-release
diff --git a/.github/workflows/ci.yml b/.github/workflows/test.yml
similarity index 65%
rename from .github/workflows/ci.yml
rename to .github/workflows/test.yml
index baf5bd8..7f515b5 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/test.yml
@@ -1,26 +1,24 @@
-name: CI
+name: Test
 
 on:
-  push:
-    branches: [main]
   pull_request:
-    branches: [main]
+    branches:
+      - main
 
 jobs:
   test:
     runs-on: ubuntu-latest
 
-    strategy:
-      matrix:
-        node-version: [20.x, 22.x]
-
     steps:
-      - uses: actions/checkout@v4
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
 
-      - name: Use Node.js ${{ matrix.node-version }}
+      - name: Setup Node.js
         uses: actions/setup-node@v4
         with:
-          node-version: ${{ matrix.node-version }}
+          node-version: 'lts/*'
 
       - name: Install pnpm
         uses: pnpm/action-setup@v2
@@ -36,8 +34,8 @@ jobs:
       - name: Check formatting
         run: pnpm format:check
 
-      - name: Run tests
-        run: pnpm test
-
       - name: Build
         run: pnpm build
+
+      - name: Run tests
+        run: pnpm test
diff --git a/.releaserc.yml b/.releaserc.yml
new file mode 100644
index 0000000..41810f4
--- /dev/null
+++ b/.releaserc.yml
@@ -0,0 +1,4 @@
+release:
+  branches:
+    - main
+    - next
diff --git a/LICENSE.md b/LICENSE.md
new file mode 100644
index 0000000..c329342
--- /dev/null
+++ b/LICENSE.md
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2025 Thomas Gambet
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..ea964d8
--- /dev/null
+++ b/README.md
@@ -0,0 +1,111 @@
+# Fetch MCP Server
+
+A port of the official [Fetch MCP Server](https://github.com/modelcontextprotocol/servers/tree/main/src/fetch) (python) for Node.js.
+
+## Description
+
+A [Model Context Protocol](https://modelcontextprotocol.io/) server that provides web content fetching capabilities. This server enables LLMs to retrieve and process content from web pages, converting HTML to markdown for easier consumption.
+
+The fetch tool will truncate the response, but by using the `start_index` argument, you can specify where to start the content extraction. This lets models read a webpage in chunks, until they find the information they need.
+
+### Available Tools
+
+- `fetch` - Fetches a URL from the internet and extracts its contents as markdown.
+  - `url` (string, required): URL to fetch
+  - `max_length` (integer, optional): Maximum number of characters to return (default: 5000)
+  - `start_index` (integer, optional): Start content from this character index (default: 0)
+  - `raw` (boolean, optional): Get raw content without markdown conversion (default: false)
+
+### Available Prompts
+
+- `fetch` - Fetch a URL and extract its contents as markdown
+  - `url` (string, required): URL to fetch
+
+## Usage
+
+```json
+"mcpServers": {
+  "fetch": {
+    "command": "npx",
+    "args": ["mcp-fetch-node"]
+  }
+}
+```
+
+```json
+"mcpServers": {
+  "fetch": {
+    "command": "docker",
+    "args": ["run", "-i", "--rm", "tgambet/mcp-fetch-node"]
+  }
+}
+```
+
+### Customization - robots.txt
+
+By default, the server will obey a websites robots.txt file if the request came from the model (via a tool), but not if
+the request was user initiated (via a prompt). This can be disabled by adding the argument `--ignore-robots-txt` to the
+`args` list in the configuration.
+
+### Customization - User-agent
+
+By default, depending on if the request came from the model (via a tool), or was user initiated (via a prompt), the
+server will use either the user-agent
+
+```
+ModelContextProtocol/1.0 (Autonomous; +https://github.com/tgambet/mcp-fetch-node)
+```
+
+or
+
+```
+ModelContextProtocol/1.0 (User-Specified; +https://github.com/tgambet/mcp-fetch-node)
+```
+
+This can be customized by adding the argument `--user-agent=YourUserAgent` to the `args` list in the configuration.
+
+## Features
+
+- [x] Fetch and extract content from a URL
+- [x] Respect `robots.txt` (can be disabled)
+- [x] User-Agent customization
+- [x] Relevant content extraction
+- [x] Raw content or markdown conversion
+- [x] Pagination
+- [ ] In-memory temporary cache for faster responses, especially when paginating
+- [ ] Logs and progress
+
+## Development
+
+```bash
+pnpm install
+pnpm dev
+pnpm lint:fix
+pnpm format
+pnpm test
+pnpm build
+pnpm start
+# test with MCP CLI
+pnpx @wong2/mcp-cli --sse http://localhost:8080/sse
+```
+
+## Contributing
+
+Contributions are welcome! Please feel free to submit a Pull Request.
+
+## License
+
+[MIT](https://choosealicense.com/licenses/mit/)
+
+## TODO
+
+- [ ] Explain key differences with the original mcp/fetch tool
+- [ ] Add LRU cache
+- [ ] Publish to npm
+- [ ] Dockerize and publish to docker hub
+- [ ] Integrate semantic release
+- [ ] Add user logs and progress
+- [ ] Add tests
+- [ ] Add documentation & examples
+- [ ] Add benchmarks for extraction: cf https://github.com/adbar/trafilatura/blob/master/tests/comparison_small.py
+- [ ] Showcase on FastMCP and MCP repositories
diff --git a/eslint.config.js b/eslint.config.js
new file mode 100644
index 0000000..e82809b
--- /dev/null
+++ b/eslint.config.js
@@ -0,0 +1,21 @@
+// @ts-check
+
+import eslint from '@eslint/js';
+import tseslint from 'typescript-eslint';
+
+export default tseslint.config(
+  eslint.configs.recommended,
+  tseslint.configs.strictTypeChecked,
+  tseslint.configs.stylisticTypeChecked,
+  {
+    languageOptions: {
+      parserOptions: {
+        projectService: true,
+        tsconfigRootDir: import.meta.dirname,
+      },
+    },
+    rules: {
+      '@typescript-eslint/no-explicit-any': 'off',
+    },
+  },
+);
diff --git a/jest.config.cjs b/jest.config.cjs
deleted file mode 100644
index c49d554..0000000
--- a/jest.config.cjs
+++ /dev/null
@@ -1,21 +0,0 @@
-module.exports = {
-  preset: 'ts-jest',
-  testEnvironment: 'node',
-  extensionsToTreatAsEsm: ['.ts'],
-  moduleNameMapper: {
-    '^(\\.{1,2}/.*)\\.js$': '$1',
-  },
-  moduleFileExtensions: ['js', 'ts'],
-  transform: {
-    '^.+\\.ts$': [
-      'ts-jest',
-      {
-        useESM: true,
-      },
-    ],
-  },
-  testMatch: ['**/*.test.ts'],
-  collectCoverage: true,
-  coverageDirectory: 'coverage',
-  coverageProvider: 'v8',
-};
diff --git a/package.json b/package.json
index 21e3d01..07744e0 100644
--- a/package.json
+++ b/package.json
@@ -1,39 +1,63 @@
 {
-  "name": "nodejs-project",
-  "version": "1.0.0",
-  "description": "A Node.js project with best practices",
+  "name": "mcp-fetch-node",
+  "version": "0.0.0-development",
+  "description": "A Model Context Protocol server that provides web content fetching capabilities",
   "type": "module",
   "main": "dist/index.js",
   "scripts": {
-    "start": "node dist/index.js",
-    "dev": "tsx watch src/index.ts",
+    "start": "node dist/main.js",
+    "dev": "tsx watch src/main.ts",
     "build": "tsc",
-    "lint": "eslint . --ext .ts",
-    "lint:fix": "eslint . --ext .ts --fix",
-    "format": "prettier --write \"**/*.{ts,json,md}\"",
+    "lint": "eslint src/**",
+    "lint:fix": "eslint src/** --fix",
+    "format": "prettier --write \"**/*.{ts,json,md,yml,js}\"",
     "format:check": "prettier --check .",
-    "test": "node --experimental-vm-modules node_modules/jest/bin/jest.js"
+    "test": "node --import tsx --test tests/**"
   },
-  "keywords": [],
   "author": "Thomas Gambet",
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/tgambet/mcp-fetch-node.git"
+  },
+  "keywords": [
+    "mcp",
+    "fetch"
+  ],
   "license": "MIT",
   "engines": {
-    "node": ">=20",
+    "node": ">=22",
     "pnpm": ">=10"
   },
+  "publishConfig": {
+    "access": "public"
+  },
+  "files": [
+    "dist",
+    "README.md",
+    "LICENSE"
+  ],
+  "dependencies": {
+    "fastmcp": "^1.16.3",
+    "html-minifier": "^4.0.0",
+    "linkedom": "^0.18.9",
+    "robots-parser": "^3.0.1",
+    "sanitize-html": "^2.14.0",
+    "turndown": "^7.2.0",
+    "turndown-plugin-gfm": "^1.0.2",
+    "zod": "^3.24.2"
+  },
   "devDependencies": {
-    "@types/jest": "^29.5.14",
-    "@types/node": "^20.17.19",
-    "@typescript-eslint/eslint-plugin": "^7.18.0",
-    "@typescript-eslint/parser": "^7.18.0",
-    "eslint": "^8.57.1",
-    "eslint-config-prettier": "^9.1.0",
-    "eslint-plugin-jest": "^27.9.0",
-    "jest": "^29.7.0",
+    "@eslint/js": "^9.20.0",
+    "@types/html-minifier": "^4.0.5",
+    "@types/node": "^22.13.4",
+    "@types/sanitize-html": "^2.13.0",
+    "@types/turndown": "^5.0.5",
+    "eslint": "^9.20.1",
     "prettier": "^3.5.1",
-    "ts-jest": "^29.2.5",
+    "semantic-release": "^24.2.3",
     "tsx": "^4.19.2",
-    "typescript": "^5.7.3"
+    "typescript": "^5.7.3",
+    "typescript-eslint": "^8.24.0"
   },
-  "packageManager": "pnpm@10.4.0+sha512.6b849d0787d97f8f4e1f03a9b8ff8f038e79e153d6f11ae539ae7c435ff9e796df6a862c991502695c7f9e8fac8aeafc1ac5a8dab47e36148d183832d886dd52"
+  "packageManager": "pnpm@10.4.1+sha512.c753b6c3ad7afa13af388fa6d808035a008e30ea9993f58c6663e2bc5ff21679aa834db094987129aa4d488b86df57f7b634981b2f827cdcacc698cc0cfb88af"
 }
diff --git a/src/constants.ts b/src/constants.ts
new file mode 100644
index 0000000..7739512
--- /dev/null
+++ b/src/constants.ts
@@ -0,0 +1,5 @@
+export const DEFAULT_USER_AGENT_AUTONOMOUS =
+  'ModelContextProtocol/1.0 (Autonomous; +https://github.com/tgambet/mcp-fetch-node)';
+
+export const DEFAULT_USER_AGENT_MANUAL =
+  'ModelContextProtocol/1.0 (User-Specified; +https://github.com/tgambet/mcp-fetch-node)';
diff --git a/src/index.ts b/src/index.ts
deleted file mode 100644
index 7707744..0000000
--- a/src/index.ts
+++ /dev/null
@@ -1,6 +0,0 @@
-function main(): void {
-  // Your application code here
-  console.log('Hello, World!');
-}
-
-main();
diff --git a/src/main.ts b/src/main.ts
new file mode 100644
index 0000000..c8aeda7
--- /dev/null
+++ b/src/main.ts
@@ -0,0 +1,42 @@
+import { FastMCP } from 'fastmcp';
+import { fetchPrompt } from './prompts/fetch.prompt.js';
+import { fetchTool } from './tools/fetch.tool.js';
+import { parseArgs } from './utils/parse-args.js';
+
+const args = parseArgs();
+
+const userAgent = args['user-agent'] as string | undefined;
+
+const ignoreRobotsTxt = args['ignore-robots-txt'] as boolean | undefined;
+
+export async function serve() {
+  const server = new FastMCP({
+    name: 'mcp-fetch-node',
+    version: '0.0.0', // TODO: use package.json version?
+  });
+
+  server.on('connect', (event) => {
+    console.log('Client connected');
+    event.session.on('error', (event) => {
+      console.error('Session error:', event.error);
+    });
+  });
+
+  server.on('disconnect', () => {
+    console.log('Client disconnected');
+  });
+
+  server.addTool(fetchTool(userAgent, ignoreRobotsTxt));
+
+  server.addPrompt(fetchPrompt(userAgent));
+
+  await server.start({
+    transportType: 'sse',
+    sse: {
+      endpoint: '/sse',
+      port: 8080, // TODO: make this configurable
+    },
+  });
+}
+
+await serve();
diff --git a/src/prompts/fetch.prompt.ts b/src/prompts/fetch.prompt.ts
new file mode 100644
index 0000000..b0c966a
--- /dev/null
+++ b/src/prompts/fetch.prompt.ts
@@ -0,0 +1,23 @@
+import { UserError } from 'fastmcp';
+import { DEFAULT_USER_AGENT_MANUAL } from '../constants.js';
+import { processURL } from '../utils/process-url.js';
+
+export const fetchPrompt = (userAgent?: string) => ({
+  name: 'fetch',
+  description: 'Fetch a URL and extract its contents as markdown',
+  arguments: [
+    {
+      name: 'url',
+      description: 'URL to fetch',
+      required: true,
+    },
+  ],
+  load: async ({ url }: { url?: string }) => {
+    if (!url) {
+      throw new UserError('Missing required argument: url');
+    }
+    const ua = userAgent ?? DEFAULT_USER_AGENT_MANUAL;
+    const [content, prefix] = await processURL(url, ua, false);
+    return [prefix, content].join('\n');
+  },
+});
diff --git a/src/tests/index.test.ts b/src/tests/index.test.ts
deleted file mode 100644
index a167bab..0000000
--- a/src/tests/index.test.ts
+++ /dev/null
@@ -1,7 +0,0 @@
-import '../index.js';
-
-describe('Main application', () => {
-  it('should run without errors', () => {
-    expect(true).toBe(true);
-  });
-});
diff --git a/src/tools/fetch.tool.ts b/src/tools/fetch.tool.ts
new file mode 100644
index 0000000..791cd07
--- /dev/null
+++ b/src/tools/fetch.tool.ts
@@ -0,0 +1,49 @@
+import { z } from 'zod';
+import { paginate } from '../utils/paginate.js';
+import { processURL } from '../utils/process-url.js';
+import { checkRobotsTxt } from '../utils/check-robots-txt.js';
+import { DEFAULT_USER_AGENT_AUTONOMOUS } from '../constants.js';
+
+export const fetchToolSchema = z.object({
+  url: z.string().describe('URL to fetch.'),
+  max_length: z
+    .number()
+    .min(0)
+    .max(1000000)
+    .default(5000)
+    .describe('Maximum number of characters to return.'),
+  start_index: z
+    .number()
+    .min(0)
+    .default(0)
+    .describe(
+      'Return output starting at this character index, useful if a previous fetch was truncated and more context is required.',
+    ),
+  raw: z
+    .boolean()
+    .default(false)
+    .describe(
+      'Get the actual HTML content of the requested page, without simplification.',
+    ),
+});
+
+export const fetchTool = (userAgent?: string, ignoreRobotsTxt?: boolean) => ({
+  name: 'fetch',
+  description: `Fetches a URL from the internet and optionally extracts its contents as markdown.
+
+This tool grants you internet access. You can fetch the most up-to-date information and let the user know that.`,
+  parameters: fetchToolSchema,
+  execute: async ({
+    url,
+    max_length,
+    start_index,
+    raw,
+  }: z.infer<typeof fetchToolSchema>) => {
+    const ua = userAgent ?? DEFAULT_USER_AGENT_AUTONOMOUS;
+    if (!ignoreRobotsTxt) {
+      await checkRobotsTxt(url, ua);
+    }
+    const [content, prefix] = await processURL(url, ua, raw);
+    return paginate(url, content, prefix, start_index, max_length);
+  },
+});
diff --git a/src/utils/check-robots-txt.ts b/src/utils/check-robots-txt.ts
new file mode 100644
index 0000000..68e6310
--- /dev/null
+++ b/src/utils/check-robots-txt.ts
@@ -0,0 +1,62 @@
+import { URL } from 'url';
+import robotsParser, { Robot } from 'robots-parser';
+
+export class RobotsTxtError extends Error {
+  constructor(message: string, cause?: unknown) {
+    super(message, { cause });
+    this.name = 'RobotsTxtError';
+  }
+}
+
+export async function checkRobotsTxt(
+  targetUrl: string,
+  userAgent: string,
+): Promise<void> {
+  // TODO: check if the targetUrl is a valid URL
+  const { protocol, host } = new URL(targetUrl);
+
+  const robotsTxtUrl = `${protocol}//${host}/robots.txt`;
+
+  try {
+    const response = await fetch(robotsTxtUrl, {
+      headers: { 'User-Agent': userAgent },
+      redirect: 'follow',
+    });
+
+    if (response.status === 401 || response.status === 403) {
+      throw new RobotsTxtError(
+        `When fetching robots.txt (${robotsTxtUrl}), received status ${response.status.toString()} so assuming that autonomous fetching is not allowed, the user can try manually fetching by using the fetch prompt`,
+      );
+    } else if (response.status >= 400 && response.status < 500) {
+      return;
+    }
+
+    const robotTxt = await response.text();
+
+    const processedRobotTxt = robotTxt
+      .split('\n')
+      .filter((line) => !line.trim().startsWith('#'))
+      .join('\n');
+
+    // @ts-expect-error : bad types
+    const robotsTxt = robotsParser(robotsTxtUrl, processedRobotTxt) as Robot;
+
+    if (robotsTxt.isDisallowed(targetUrl, userAgent)) {
+      throw new RobotsTxtError(
+        `The sites robots.txt (${robotsTxtUrl}), specifies that autonomous fetching of this page is not allowed, ` +
+          `<useragent>${userAgent}</useragent>\n` +
+          `<url>${targetUrl}</url>` +
+          `<robots>\n${robotTxt}\n</robots>\n` +
+          `The assistant must let the user know that it failed to view the page. The assistant may provide further guidance based on the above information.\n` +
+          `The assistant can tell the user that they can try manually fetching the page by using the fetch prompt within their UI.`,
+      );
+    }
+  } catch (error) {
+    if (error instanceof RobotsTxtError) {
+      throw error;
+    }
+    throw new RobotsTxtError(`Failed to verify ${robotsTxtUrl}`, {
+      cause: error,
+    });
+  }
+}
diff --git a/src/utils/extract.ts b/src/utils/extract.ts
new file mode 100644
index 0000000..cfaeab5
--- /dev/null
+++ b/src/utils/extract.ts
@@ -0,0 +1,118 @@
+import { minify } from 'html-minifier';
+import { parseHTML } from 'linkedom';
+import sanitizeHtml from 'sanitize-html';
+
+/* eslint-disable @typescript-eslint/no-unsafe-member-access */
+/* eslint-disable @typescript-eslint/no-unsafe-call */
+/* eslint-disable @typescript-eslint/no-unsafe-assignment */
+/* eslint-disable @typescript-eslint/no-unsafe-return */
+
+export class ExtractError extends Error {
+  constructor(message: string, cause?: unknown) {
+    super(message, { cause });
+    this.name = 'ExtractError';
+  }
+}
+
+export const preProcessHtml = (html: string) => {
+  return html
+    .replace(/<style[^>]*?\/?>([\S\s]*?)<\/style>/gim, '')
+    .replace(/<script[^>]*?\/?>([\S\s]*?)<\/script>/gim, '')
+    .replace(/<template[^>]*?\/?>([\S\s]*?)<\/template>/gim, '');
+};
+
+const nodesToRemove = [
+  'template',
+  'img',
+  'svg',
+  'nav',
+  'footer',
+  'header',
+  'head',
+  'button',
+  'form',
+  'input',
+  'textarea',
+  'select',
+];
+
+export function extract(html: string) {
+  try {
+    // Pre-sanitize the HTML
+    let result = preProcessHtml(html);
+
+    // Sanitize the HTML
+    result = sanitizeHtml(result, {
+      allowedTags: [
+        'html',
+        'body',
+        ...sanitizeHtml.defaults.allowedTags,
+        ...nodesToRemove,
+      ],
+      allowedAttributes: {
+        '*': ['hidden', 'class', 'type', 'aria-hidden', 'href'],
+      },
+      disallowedTagsMode: 'completelyDiscard',
+    });
+
+    // Parse the HTML
+    const { document } = parseHTML(result);
+
+    // Remove unwanted elements
+    document.body
+      .querySelectorAll(
+        [
+          '[hidden]',
+          '[aria-hidden]',
+          '[type="button"]',
+          '.hide-sm',
+          '.sr-only',
+          '.d-none',
+          '.d-sm-none',
+          // TODO check popular CSS frameworks classes
+          ...nodesToRemove,
+        ].join(', '),
+      )
+      ?.forEach((a: any) => a.remove());
+
+    // Remove nav-liked lists
+    document.querySelectorAll('ul, table, section').forEach((node: any) => {
+      const list = node.cloneNode(true);
+      list.querySelectorAll('a').forEach((a: any) => {
+        a.innerHTML = '';
+      });
+      const htmlLength = list.innerHTML.length;
+      const textLength = list.innerText.length;
+      if (textLength / htmlLength < 0.2) node.remove();
+    });
+
+    // Remove empty links
+    document.querySelectorAll('a').forEach((a: any) => {
+      if (a.textContent.trim() === '') {
+        a.remove();
+      }
+    });
+
+    // Sanitize again
+    result = sanitizeHtml(document.body.innerHTML as string, {
+      allowedAttributes: { a: ['href'] },
+    });
+
+    // Minify
+    result = minify(result, {
+      collapseWhitespace: true,
+      preserveLineBreaks: false,
+      decodeEntities: true,
+      conservativeCollapse: false,
+      collapseInlineTagWhitespace: false,
+      removeEmptyElements: true,
+    });
+
+    return result;
+  } catch (error) {
+    if (error instanceof ExtractError) {
+      throw error;
+    }
+    throw new ExtractError('Failed to extract content', error);
+  }
+}
diff --git a/src/utils/fetch.ts b/src/utils/fetch.ts
new file mode 100644
index 0000000..aaada6b
--- /dev/null
+++ b/src/utils/fetch.ts
@@ -0,0 +1,32 @@
+export class FetchError extends Error {
+  constructor(message: string, cause?: unknown) {
+    super(message, { cause });
+    this.name = 'FetchError';
+  }
+}
+
+export async function fetch(
+  url: string,
+  userAgent: string,
+): Promise<{ content: string; contentType: string | null }> {
+  try {
+    const response = await global.fetch(url, {
+      redirect: 'follow',
+      headers: { 'User-Agent': userAgent },
+    });
+    if (!response.ok) {
+      throw new FetchError(
+        `Failed to fetch ${url} - status code ${response.status.toString()}`,
+      );
+    }
+    return {
+      content: await response.text(),
+      contentType: response.headers.get('content-type'),
+    };
+  } catch (error) {
+    if (error instanceof FetchError) {
+      throw error;
+    }
+    throw new FetchError(`Failed to fetch ${url}`, error);
+  }
+}
diff --git a/src/utils/format.ts b/src/utils/format.ts
new file mode 100644
index 0000000..e8b1bf3
--- /dev/null
+++ b/src/utils/format.ts
@@ -0,0 +1,49 @@
+import TurndownService from 'turndown';
+// @ts-expect-error : missing types
+import turndownPluginGfm from 'turndown-plugin-gfm';
+
+/* eslint-disable @typescript-eslint/no-unsafe-member-access */
+/* eslint-disable @typescript-eslint/no-unsafe-call */
+/* eslint-disable @typescript-eslint/restrict-template-expressions */
+
+export class FormatError extends Error {
+  constructor(message: string, cause?: unknown) {
+    super(message, { cause });
+    this.name = 'FormatError';
+  }
+}
+
+const turndownService = new TurndownService({
+  headingStyle: 'atx',
+  codeBlockStyle: 'fenced',
+  bulletListMarker: '-',
+  hr: '\n',
+});
+
+const tables = turndownPluginGfm.tables as TurndownService.Plugin;
+
+turndownService.use(tables);
+
+turndownService.addRule('pre', {
+  filter: 'pre',
+  replacement: (content) => {
+    return `\`\`\`\n${content}\n\`\`\``;
+  },
+});
+
+turndownService.addRule('a', {
+  filter: 'a',
+  replacement: (_content, node) => {
+    return node.href && node.innerText.trim()
+      ? `[${node.innerText.trim()}](${node.href})`
+      : '';
+  },
+});
+
+export function format(html: string): string {
+  try {
+    return turndownService.turndown(html);
+  } catch (error) {
+    throw new FormatError('Failed to convert HTML to Markdown', error);
+  }
+}
diff --git a/src/utils/paginate.ts b/src/utils/paginate.ts
new file mode 100644
index 0000000..e9a7a45
--- /dev/null
+++ b/src/utils/paginate.ts
@@ -0,0 +1,26 @@
+export function paginate(
+  url: string,
+  content: string,
+  prefix: string,
+  startIndex: number,
+  maxLength: number,
+) {
+  const originalLength = content.length;
+  let result = content;
+  if (startIndex >= originalLength) {
+    result = '<error>No more content available.</error>';
+  } else {
+    result = result.slice(startIndex, startIndex + maxLength);
+    if (!result) {
+      result = '<error>No more content available.</error>';
+    } else {
+      const actualLength = result.length;
+      const remainingLength = originalLength - startIndex - actualLength;
+      if (actualLength === maxLength && remainingLength > 0) {
+        const nextStartIndex = startIndex + actualLength;
+        result += `\n\n<error>Content truncated. Call the fetch tool with a start_index of ${nextStartIndex.toString()} to get more content.</error>`;
+      }
+    }
+  }
+  return [prefix, `Contents of ${url}`, result].join('\n');
+}
diff --git a/src/utils/parse-args.ts b/src/utils/parse-args.ts
new file mode 100644
index 0000000..96cdba0
--- /dev/null
+++ b/src/utils/parse-args.ts
@@ -0,0 +1,22 @@
+type ParsedArgs = Record<string, string | boolean>;
+
+export function parseArgs(args: string[] = process.argv.slice(2)): ParsedArgs {
+  const parsedArgs: ParsedArgs = {};
+
+  for (let i = 0; i < args.length; i++) {
+    const arg = args[i];
+
+    if (arg.startsWith('--')) {
+      const key = arg.slice(2);
+
+      if (i + 1 < args.length && !args[i + 1].startsWith('--')) {
+        parsedArgs[key] = args[i + 1];
+        i++;
+      } else {
+        parsedArgs[key] = true;
+      }
+    }
+  }
+
+  return parsedArgs;
+}
diff --git a/src/utils/process-url.ts b/src/utils/process-url.ts
new file mode 100644
index 0000000..152d828
--- /dev/null
+++ b/src/utils/process-url.ts
@@ -0,0 +1,29 @@
+import { extract } from './extract.js';
+import { format } from './format.js';
+import { fetch } from './fetch.js';
+
+function isHTML(content: string, contentType?: string | null): boolean {
+  return contentType?.includes('text/html') ?? content.includes('<html');
+}
+
+export async function processURL(url: string, userAgent: string, raw: boolean) {
+  const { content, contentType } = await fetch(url, userAgent);
+
+  if (!raw && isHTML(content, contentType)) {
+    const extracted = extract(content);
+    const formatted = format(extracted);
+    if (!formatted) {
+      return ['<error>Page failed to be simplified from HTML</error>', ''];
+    }
+    return [formatted, ''];
+  }
+
+  if (raw) {
+    return [content, `Here is the raw ${contentType ?? 'unknown'} content:`];
+  }
+
+  return [
+    content,
+    `Content type ${contentType ?? 'unknown'} cannot be simplified to markdown, but here is the raw content:`,
+  ];
+}
diff --git a/tests/index.test.ts b/tests/index.test.ts
new file mode 100644
index 0000000..06ec056
--- /dev/null
+++ b/tests/index.test.ts
@@ -0,0 +1,8 @@
+import assert from 'node:assert';
+import { describe, it } from 'node:test';
+
+describe('Main application', () => {
+  it('dummy test', () => {
+    assert.strictEqual(true, true);
+  });
+});
diff --git a/tsconfig.json b/tsconfig.json
index fcf9a74..528ff8b 100644
--- a/tsconfig.json
+++ b/tsconfig.json
@@ -15,5 +15,5 @@
     "declaration": true
   },
   "include": ["src/**/*"],
-  "exclude": ["node_modules", "dist", "**/*.test.ts"]
+  "exclude": ["node_modules", "dist"]
 }