From 10c5f2814ae8e009359752dd51ca73d703b4145e Mon Sep 17 00:00:00 2001 From: Rohan Pandey Date: Wed, 26 Jul 2023 16:31:42 -0700 Subject: [PATCH 1/4] Content Refresher Agent --- docs/development/workflows.mdx | 21 +- .../workflow/node-block-definitions.ts | 28 +++ platform/poetry.lock | 228 +++++++++++++----- platform/pyproject.toml | 2 + .../blocks/agents/content_refresher_agent.py | 183 ++++++++++++++ platform/reworkd_platform/settings.py | 2 + .../web/api/workflow/blocks/web/__init__.py | 5 + 7 files changed, 401 insertions(+), 68 deletions(-) create mode 100644 platform/reworkd_platform/schemas/workflow/blocks/agents/content_refresher_agent.py diff --git a/docs/development/workflows.mdx b/docs/development/workflows.mdx index 75de6e9562..33fb93109b 100644 --- a/docs/development/workflows.mdx +++ b/docs/development/workflows.mdx @@ -17,4 +17,23 @@ The workflow hierarchy follows a graph-like structure. The frontend models only The backend models represent the mechanisms to actually perform work for a given node. Each frontend `Node` will have an associated `Block`. `Node` represents the frontend view / position while the `Block` represents what will actually happen when that `Node` is run. -For example, a "SlackMessageBlock" is a `Block` that, when executed, would send a user a message on "Slack". \ No newline at end of file +For example, a "SlackMessageBlock" is a `Block` that, when executed, would send a user a message on "Slack". + +## Adding a new block +To add a new block, start by updating the frontend: +- open next/src/services/workflow/node-block-definitions.ts +- figure it out +- (soon block definitions on frontend will be set from backend and edits won't be needed here) + +Then update the backend: +- open platform/reworkd_platform/schemas/workflow/blocks + - add a new file for your block + - define the block's input and output types as classes + - add a class for the block with + - attributes: type, description, image_url, and input + - async method: `run` +- install dependencies for your block with `poetry add` +- open platform/reworkd_platform/web/api/workflow/blocks/web/__init__.py + - import your block + - add an if branch to get_block_runner +- `docker compose down; docker compose up --build` \ No newline at end of file diff --git a/next/src/services/workflow/node-block-definitions.ts b/next/src/services/workflow/node-block-definitions.ts index 2b83da6e00..80c053b3ca 100644 --- a/next/src/services/workflow/node-block-definitions.ts +++ b/next/src/services/workflow/node-block-definitions.ts @@ -200,6 +200,33 @@ const WebInteractionAgent: NodeBlockDefinition = { output_fields: [], }; +const ContentRefresherAgent: NodeBlockDefinition = { + name: "Content Refresher Agent", + type: "ContentRefresherAgent", + description: "Refresh the content on an existing page", + image_url: "/tools/web.png", + icon: FaRobot, + input_fields: [ + { + name: "url", + description: "The page whose content the agent will refresh", + type: "string", + }, + ], + output_fields: [ + { + name: "original_content", + description: "The original content of the page", + type: "string", + }, + { + name: "refreshed_content", + description: "The refreshed content for the page", + type: "string", + }, + ], +}; + export const getNodeBlockDefinitions = (): NodeBlockDefinition[] => { return [ UrlStatusCheckBlockDefinition, @@ -209,6 +236,7 @@ export const getNodeBlockDefinitions = (): NodeBlockDefinition[] => { TriggerBlockDefinition, SummaryWebhookBlockDefinition, TextInputWebhookBlockDefinition, + ContentRefresherAgent ]; }; diff --git a/platform/poetry.lock b/platform/poetry.lock index 89a383ef7c..b08d7948f8 100644 --- a/platform/poetry.lock +++ b/platform/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.5.0 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. [[package]] name = "aiohttp" @@ -186,6 +186,25 @@ files = [ [package.dependencies] frozenlist = ">=1.1.0" +[[package]] +name = "anthropic" +version = "0.3.6" +description = "Client library for the anthropic API" +optional = false +python-versions = ">=3.7,<4.0" +files = [ + {file = "anthropic-0.3.6-py3-none-any.whl", hash = "sha256:45036a96f38598be82237c12d77d7aefe814a3bceb9da0bc6721a381c29821b1"}, + {file = "anthropic-0.3.6.tar.gz", hash = "sha256:6e644c84ad9375dc12e07b36aab1862ca4db98eb1750e08acfe4847e62afe0dd"}, +] + +[package.dependencies] +anyio = ">=3.5.0,<4" +distro = ">=1.7.0,<2" +httpx = ">=0.23.0,<1" +pydantic = ">=1.9.0,<2.0.0" +tokenizers = ">=0.13.0" +typing-extensions = ">=4.1.1,<5" + [[package]] name = "anyio" version = "3.7.1" @@ -380,17 +399,17 @@ uvloop = ["uvloop (>=0.15.2)"] [[package]] name = "boto3" -version = "1.28.8" +version = "1.28.12" description = "The AWS SDK for Python" optional = false python-versions = ">= 3.7" files = [ - {file = "boto3-1.28.8-py3-none-any.whl", hash = "sha256:7132ac3f3a9c28b84dcc344cfb439d37d2c5ab45f6b577358fc9aeba5d5aab63"}, - {file = "boto3-1.28.8.tar.gz", hash = "sha256:cf88309d9b8cd9a2fb0c8049cb4b217b4e9dcb55bf670d6054b0bbe2eef25e57"}, + {file = "boto3-1.28.12-py3-none-any.whl", hash = "sha256:cfcb20d5784428f31d89889e68b26efeda90f231c3119eef4af8b25ad405c55f"}, + {file = "boto3-1.28.12.tar.gz", hash = "sha256:d5ac6599951fdd519ed26c6fe15c41a7aa4021cb9adce33167344f8ce5cdb07b"}, ] [package.dependencies] -botocore = ">=1.31.8,<1.32.0" +botocore = ">=1.31.12,<1.32.0" jmespath = ">=0.7.1,<2.0.0" s3transfer = ">=0.6.0,<0.7.0" @@ -399,13 +418,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.31.8" +version = "1.31.12" description = "Low-level, data-driven core of boto 3." optional = false python-versions = ">= 3.7" files = [ - {file = "botocore-1.31.8-py3-none-any.whl", hash = "sha256:61ba7efaa6305c1928b9b3fbb6f780cbfbd762e19008d20c11ba52b47f20e1b0"}, - {file = "botocore-1.31.8.tar.gz", hash = "sha256:092baa2168ae78080b0c28011527bfc11d8debd3767aa1e9a4ce8a91fd9943a2"}, + {file = "botocore-1.31.12-py3-none-any.whl", hash = "sha256:86380672151866b5e425636e3ebad74f2b83e7163e36ef5d38d11a04b9cba33b"}, + {file = "botocore-1.31.12.tar.gz", hash = "sha256:7e5db466c762a071bb58c9a39d070f1333ce4f4ba6fdf9820ba21e87bd4c7e29"}, ] [package.dependencies] @@ -418,13 +437,13 @@ crt = ["awscrt (==0.16.26)"] [[package]] name = "certifi" -version = "2023.5.7" +version = "2023.7.22" description = "Python package for providing Mozilla's CA Bundle." optional = false python-versions = ">=3.6" files = [ - {file = "certifi-2023.5.7-py3-none-any.whl", hash = "sha256:c6c2e98f5c7869efca1f8916fed228dd91539f9f1b444c314c06eef02980c716"}, - {file = "certifi-2023.5.7.tar.gz", hash = "sha256:0f0d56dc5a6ad56fd4ba36484d6cc34451e1c6548c61daad8c320169f91eddc7"}, + {file = "certifi-2023.7.22-py3-none-any.whl", hash = "sha256:92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9"}, + {file = "certifi-2023.7.22.tar.gz", hash = "sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082"}, ] [[package]] @@ -725,24 +744,31 @@ files = [ {file = "distlib-0.3.7.tar.gz", hash = "sha256:9dafe54b34a028eafd95039d5e5d4851a13734540f1331060d31c9916e7147a8"}, ] +[[package]] +name = "distro" +version = "1.8.0" +description = "Distro - an OS platform information API" +optional = false +python-versions = ">=3.6" +files = [ + {file = "distro-1.8.0-py3-none-any.whl", hash = "sha256:99522ca3e365cac527b44bde033f64c6945d90eb9f769703caaec52b09bbd3ff"}, + {file = "distro-1.8.0.tar.gz", hash = "sha256:02e111d1dc6a50abb8eed6bf31c3e48ed8b0830d1ea2a1b78c61765c2513fdd8"}, +] + [[package]] name = "dnspython" -version = "2.4.0" +version = "2.4.1" description = "DNS toolkit" optional = false python-versions = ">=3.8,<4.0" files = [ - {file = "dnspython-2.4.0-py3-none-any.whl", hash = "sha256:46b4052a55b56beea3a3bdd7b30295c292bd6827dd442348bc116f2d35b17f0a"}, - {file = "dnspython-2.4.0.tar.gz", hash = "sha256:758e691dbb454d5ccf4e1b154a19e52847f79e21a42fef17b969144af29a4e6c"}, + {file = "dnspython-2.4.1-py3-none-any.whl", hash = "sha256:5b7488477388b8c0b70a8ce93b227c5603bc7b77f1565afe8e729c36c51447d7"}, + {file = "dnspython-2.4.1.tar.gz", hash = "sha256:c33971c79af5be968bb897e95c2448e11a645ee84d93b265ce0b7aabe5dfdca8"}, ] -[package.dependencies] -httpcore = {version = ">=0.17.3", markers = "python_version >= \"3.8\""} -sniffio = ">=1.1,<2.0" - [package.extras] dnssec = ["cryptography (>=2.6,<42.0)"] -doh = ["h2 (>=4.1.0)", "httpx (>=0.24.1)"] +doh = ["h2 (>=4.1.0)", "httpcore (>=0.17.3)", "httpx (>=0.24.1)"] doq = ["aioquic (>=0.9.20)"] idna = ["idna (>=2.1,<4.0)"] trio = ["trio (>=0.14,<0.23)"] @@ -1414,13 +1440,13 @@ socks = ["socksio (==1.*)"] [[package]] name = "identify" -version = "2.5.25" +version = "2.5.26" description = "File identification library for Python" optional = false python-versions = ">=3.8" files = [ - {file = "identify-2.5.25-py2.py3-none-any.whl", hash = "sha256:9df2489842707d431b38ce3410ef8df40da5b10a3e28a3fcac1a42523e956409"}, - {file = "identify-2.5.25.tar.gz", hash = "sha256:db4de0e758c0db8f81996816cd2f3f2f8c5c8d49a7fd02f3b4109aac6fd80e29"}, + {file = "identify-2.5.26-py2.py3-none-any.whl", hash = "sha256:c22a8ead0d4ca11f1edd6c9418c3220669b3b7533ada0a0ffa6cc0ef85cf9b54"}, + {file = "identify-2.5.26.tar.gz", hash = "sha256:7243800bce2f58404ed41b7c002e53d4d22bcf3ae1b7900c2d7aefd95394bf7f"}, ] [package.extras] @@ -2331,47 +2357,47 @@ files = [ [[package]] name = "pydantic" -version = "1.10.11" +version = "1.10.12" description = "Data validation and settings management using python type hints" optional = false python-versions = ">=3.7" files = [ - {file = "pydantic-1.10.11-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ff44c5e89315b15ff1f7fdaf9853770b810936d6b01a7bcecaa227d2f8fe444f"}, - {file = "pydantic-1.10.11-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a6c098d4ab5e2d5b3984d3cb2527e2d6099d3de85630c8934efcfdc348a9760e"}, - {file = "pydantic-1.10.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:16928fdc9cb273c6af00d9d5045434c39afba5f42325fb990add2c241402d151"}, - {file = "pydantic-1.10.11-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0588788a9a85f3e5e9ebca14211a496409cb3deca5b6971ff37c556d581854e7"}, - {file = "pydantic-1.10.11-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e9baf78b31da2dc3d3f346ef18e58ec5f12f5aaa17ac517e2ffd026a92a87588"}, - {file = "pydantic-1.10.11-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:373c0840f5c2b5b1ccadd9286782852b901055998136287828731868027a724f"}, - {file = "pydantic-1.10.11-cp310-cp310-win_amd64.whl", hash = "sha256:c3339a46bbe6013ef7bdd2844679bfe500347ac5742cd4019a88312aa58a9847"}, - {file = "pydantic-1.10.11-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:08a6c32e1c3809fbc49debb96bf833164f3438b3696abf0fbeceb417d123e6eb"}, - {file = "pydantic-1.10.11-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a451ccab49971af043ec4e0d207cbc8cbe53dbf148ef9f19599024076fe9c25b"}, - {file = "pydantic-1.10.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5b02d24f7b2b365fed586ed73582c20f353a4c50e4be9ba2c57ab96f8091ddae"}, - {file = "pydantic-1.10.11-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3f34739a89260dfa420aa3cbd069fbcc794b25bbe5c0a214f8fb29e363484b66"}, - {file = "pydantic-1.10.11-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:e297897eb4bebde985f72a46a7552a7556a3dd11e7f76acda0c1093e3dbcf216"}, - {file = "pydantic-1.10.11-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d185819a7a059550ecb85d5134e7d40f2565f3dd94cfd870132c5f91a89cf58c"}, - {file = "pydantic-1.10.11-cp311-cp311-win_amd64.whl", hash = "sha256:4400015f15c9b464c9db2d5d951b6a780102cfa5870f2c036d37c23b56f7fc1b"}, - {file = "pydantic-1.10.11-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2417de68290434461a266271fc57274a138510dca19982336639484c73a07af6"}, - {file = "pydantic-1.10.11-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:331c031ba1554b974c98679bd0780d89670d6fd6f53f5d70b10bdc9addee1713"}, - {file = "pydantic-1.10.11-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8268a735a14c308923e8958363e3a3404f6834bb98c11f5ab43251a4e410170c"}, - {file = "pydantic-1.10.11-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:44e51ba599c3ef227e168424e220cd3e544288c57829520dc90ea9cb190c3248"}, - {file = "pydantic-1.10.11-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d7781f1d13b19700b7949c5a639c764a077cbbdd4322ed505b449d3ca8edcb36"}, - {file = "pydantic-1.10.11-cp37-cp37m-win_amd64.whl", hash = "sha256:7522a7666157aa22b812ce14c827574ddccc94f361237ca6ea8bb0d5c38f1629"}, - {file = "pydantic-1.10.11-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:bc64eab9b19cd794a380179ac0e6752335e9555d214cfcb755820333c0784cb3"}, - {file = "pydantic-1.10.11-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:8dc77064471780262b6a68fe67e013298d130414d5aaf9b562c33987dbd2cf4f"}, - {file = "pydantic-1.10.11-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe429898f2c9dd209bd0632a606bddc06f8bce081bbd03d1c775a45886e2c1cb"}, - {file = "pydantic-1.10.11-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:192c608ad002a748e4a0bed2ddbcd98f9b56df50a7c24d9a931a8c5dd053bd3d"}, - {file = "pydantic-1.10.11-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:ef55392ec4bb5721f4ded1096241e4b7151ba6d50a50a80a2526c854f42e6a2f"}, - {file = "pydantic-1.10.11-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:41e0bb6efe86281623abbeeb0be64eab740c865388ee934cd3e6a358784aca6e"}, - {file = "pydantic-1.10.11-cp38-cp38-win_amd64.whl", hash = "sha256:265a60da42f9f27e0b1014eab8acd3e53bd0bad5c5b4884e98a55f8f596b2c19"}, - {file = "pydantic-1.10.11-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:469adf96c8e2c2bbfa655fc7735a2a82f4c543d9fee97bd113a7fb509bf5e622"}, - {file = "pydantic-1.10.11-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e6cbfbd010b14c8a905a7b10f9fe090068d1744d46f9e0c021db28daeb8b6de1"}, - {file = "pydantic-1.10.11-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:abade85268cc92dff86d6effcd917893130f0ff516f3d637f50dadc22ae93999"}, - {file = "pydantic-1.10.11-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e9738b0f2e6c70f44ee0de53f2089d6002b10c33264abee07bdb5c7f03038303"}, - {file = "pydantic-1.10.11-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:787cf23e5a0cde753f2eabac1b2e73ae3844eb873fd1f5bdbff3048d8dbb7604"}, - {file = "pydantic-1.10.11-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:174899023337b9fc685ac8adaa7b047050616136ccd30e9070627c1aaab53a13"}, - {file = "pydantic-1.10.11-cp39-cp39-win_amd64.whl", hash = "sha256:1954f8778489a04b245a1e7b8b22a9d3ea8ef49337285693cf6959e4b757535e"}, - {file = "pydantic-1.10.11-py3-none-any.whl", hash = "sha256:008c5e266c8aada206d0627a011504e14268a62091450210eda7c07fabe6963e"}, - {file = "pydantic-1.10.11.tar.gz", hash = "sha256:f66d479cf7eb331372c470614be6511eae96f1f120344c25f3f9bb59fb1b5528"}, + {file = "pydantic-1.10.12-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a1fcb59f2f355ec350073af41d927bf83a63b50e640f4dbaa01053a28b7a7718"}, + {file = "pydantic-1.10.12-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b7ccf02d7eb340b216ec33e53a3a629856afe1c6e0ef91d84a4e6f2fb2ca70fe"}, + {file = "pydantic-1.10.12-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8fb2aa3ab3728d950bcc885a2e9eff6c8fc40bc0b7bb434e555c215491bcf48b"}, + {file = "pydantic-1.10.12-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:771735dc43cf8383959dc9b90aa281f0b6092321ca98677c5fb6125a6f56d58d"}, + {file = "pydantic-1.10.12-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:ca48477862372ac3770969b9d75f1bf66131d386dba79506c46d75e6b48c1e09"}, + {file = "pydantic-1.10.12-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a5e7add47a5b5a40c49b3036d464e3c7802f8ae0d1e66035ea16aa5b7a3923ed"}, + {file = "pydantic-1.10.12-cp310-cp310-win_amd64.whl", hash = "sha256:e4129b528c6baa99a429f97ce733fff478ec955513630e61b49804b6cf9b224a"}, + {file = "pydantic-1.10.12-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b0d191db0f92dfcb1dec210ca244fdae5cbe918c6050b342d619c09d31eea0cc"}, + {file = "pydantic-1.10.12-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:795e34e6cc065f8f498c89b894a3c6da294a936ee71e644e4bd44de048af1405"}, + {file = "pydantic-1.10.12-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:69328e15cfda2c392da4e713443c7dbffa1505bc9d566e71e55abe14c97ddc62"}, + {file = "pydantic-1.10.12-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2031de0967c279df0d8a1c72b4ffc411ecd06bac607a212892757db7462fc494"}, + {file = "pydantic-1.10.12-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:ba5b2e6fe6ca2b7e013398bc7d7b170e21cce322d266ffcd57cca313e54fb246"}, + {file = "pydantic-1.10.12-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:2a7bac939fa326db1ab741c9d7f44c565a1d1e80908b3797f7f81a4f86bc8d33"}, + {file = "pydantic-1.10.12-cp311-cp311-win_amd64.whl", hash = "sha256:87afda5539d5140cb8ba9e8b8c8865cb5b1463924d38490d73d3ccfd80896b3f"}, + {file = "pydantic-1.10.12-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:549a8e3d81df0a85226963611950b12d2d334f214436a19537b2efed61b7639a"}, + {file = "pydantic-1.10.12-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:598da88dfa127b666852bef6d0d796573a8cf5009ffd62104094a4fe39599565"}, + {file = "pydantic-1.10.12-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ba5c4a8552bff16c61882db58544116d021d0b31ee7c66958d14cf386a5b5350"}, + {file = "pydantic-1.10.12-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c79e6a11a07da7374f46970410b41d5e266f7f38f6a17a9c4823db80dadf4303"}, + {file = "pydantic-1.10.12-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:ab26038b8375581dc832a63c948f261ae0aa21f1d34c1293469f135fa92972a5"}, + {file = "pydantic-1.10.12-cp37-cp37m-win_amd64.whl", hash = "sha256:e0a16d274b588767602b7646fa05af2782576a6cf1022f4ba74cbb4db66f6ca8"}, + {file = "pydantic-1.10.12-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6a9dfa722316f4acf4460afdf5d41d5246a80e249c7ff475c43a3a1e9d75cf62"}, + {file = "pydantic-1.10.12-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a73f489aebd0c2121ed974054cb2759af8a9f747de120acd2c3394cf84176ccb"}, + {file = "pydantic-1.10.12-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b30bcb8cbfccfcf02acb8f1a261143fab622831d9c0989707e0e659f77a18e0"}, + {file = "pydantic-1.10.12-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2fcfb5296d7877af406ba1547dfde9943b1256d8928732267e2653c26938cd9c"}, + {file = "pydantic-1.10.12-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:2f9a6fab5f82ada41d56b0602606a5506aab165ca54e52bc4545028382ef1c5d"}, + {file = "pydantic-1.10.12-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:dea7adcc33d5d105896401a1f37d56b47d443a2b2605ff8a969a0ed5543f7e33"}, + {file = "pydantic-1.10.12-cp38-cp38-win_amd64.whl", hash = "sha256:1eb2085c13bce1612da8537b2d90f549c8cbb05c67e8f22854e201bde5d98a47"}, + {file = "pydantic-1.10.12-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ef6c96b2baa2100ec91a4b428f80d8f28a3c9e53568219b6c298c1125572ebc6"}, + {file = "pydantic-1.10.12-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6c076be61cd0177a8433c0adcb03475baf4ee91edf5a4e550161ad57fc90f523"}, + {file = "pydantic-1.10.12-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2d5a58feb9a39f481eda4d5ca220aa8b9d4f21a41274760b9bc66bfd72595b86"}, + {file = "pydantic-1.10.12-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e5f805d2d5d0a41633651a73fa4ecdd0b3d7a49de4ec3fadf062fe16501ddbf1"}, + {file = "pydantic-1.10.12-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:1289c180abd4bd4555bb927c42ee42abc3aee02b0fb2d1223fb7c6e5bef87dbe"}, + {file = "pydantic-1.10.12-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5d1197e462e0364906cbc19681605cb7c036f2475c899b6f296104ad42b9f5fb"}, + {file = "pydantic-1.10.12-cp39-cp39-win_amd64.whl", hash = "sha256:fdbdd1d630195689f325c9ef1a12900524dceb503b00a987663ff4f58669b93d"}, + {file = "pydantic-1.10.12-py3-none-any.whl", hash = "sha256:b749a43aa51e32839c9d71dc67eb1e4221bb04af1033a32e3923d46f9effa942"}, + {file = "pydantic-1.10.12.tar.gz", hash = "sha256:0fe8a415cea8f340e7a9af9c54fc71a649b43e8ca3cc732986116b3cb135d303"}, ] [package.dependencies] @@ -2872,6 +2898,20 @@ botocore = ">=1.12.36,<2.0a.0" [package.extras] crt = ["botocore[crt] (>=1.20.29,<2.0a.0)"] +[[package]] +name = "scrapingbee" +version = "1.2.0" +description = "ScrapingBee Python SDK" +optional = false +python-versions = ">=3.7" +files = [ + {file = "scrapingbee-1.2.0-py3-none-any.whl", hash = "sha256:3ba6be481949a6bac3fb1ddc3d25204d27963d9f35bfe5f2871f7ec14da8bb1a"}, + {file = "scrapingbee-1.2.0.tar.gz", hash = "sha256:604e8aedd75ceff82737b5d95b4cfbe65484e8c534d4f14d8f52452faccd09f4"}, +] + +[package.dependencies] +requests = "*" + [[package]] name = "sentry-sdk" version = "1.28.1" @@ -3155,6 +3195,60 @@ requests = ">=2.26.0" [package.extras] blobfile = ["blobfile (>=2)"] +[[package]] +name = "tokenizers" +version = "0.13.3" +description = "Fast and Customizable Tokenizers" +optional = false +python-versions = "*" +files = [ + {file = "tokenizers-0.13.3-cp310-cp310-macosx_10_11_x86_64.whl", hash = "sha256:f3835c5be51de8c0a092058a4d4380cb9244fb34681fd0a295fbf0a52a5fdf33"}, + {file = "tokenizers-0.13.3-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:4ef4c3e821730f2692489e926b184321e887f34fb8a6b80b8096b966ba663d07"}, + {file = "tokenizers-0.13.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5fd1a6a25353e9aa762e2aae5a1e63883cad9f4e997c447ec39d071020459bc"}, + {file = "tokenizers-0.13.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ee0b1b311d65beab83d7a41c56a1e46ab732a9eed4460648e8eb0bd69fc2d059"}, + {file = "tokenizers-0.13.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ef4215284df1277dadbcc5e17d4882bda19f770d02348e73523f7e7d8b8d396"}, + {file = "tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a4d53976079cff8a033f778fb9adca2d9d69d009c02fa2d71a878b5f3963ed30"}, + {file = "tokenizers-0.13.3-cp310-cp310-win32.whl", hash = "sha256:1f0e3b4c2ea2cd13238ce43548959c118069db7579e5d40ec270ad77da5833ce"}, + {file = "tokenizers-0.13.3-cp310-cp310-win_amd64.whl", hash = "sha256:89649c00d0d7211e8186f7a75dfa1db6996f65edce4b84821817eadcc2d3c79e"}, + {file = "tokenizers-0.13.3-cp311-cp311-macosx_10_11_universal2.whl", hash = "sha256:56b726e0d2bbc9243872b0144515ba684af5b8d8cd112fb83ee1365e26ec74c8"}, + {file = "tokenizers-0.13.3-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:cc5c022ce692e1f499d745af293ab9ee6f5d92538ed2faf73f9708c89ee59ce6"}, + {file = "tokenizers-0.13.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f55c981ac44ba87c93e847c333e58c12abcbb377a0c2f2ef96e1a266e4184ff2"}, + {file = "tokenizers-0.13.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f247eae99800ef821a91f47c5280e9e9afaeed9980fc444208d5aa6ba69ff148"}, + {file = "tokenizers-0.13.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4b3e3215d048e94f40f1c95802e45dcc37c5b05eb46280fc2ccc8cd351bff839"}, + {file = "tokenizers-0.13.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ba2b0bf01777c9b9bc94b53764d6684554ce98551fec496f71bc5be3a03e98b"}, + {file = "tokenizers-0.13.3-cp311-cp311-win32.whl", hash = "sha256:cc78d77f597d1c458bf0ea7c2a64b6aa06941c7a99cb135b5969b0278824d808"}, + {file = "tokenizers-0.13.3-cp311-cp311-win_amd64.whl", hash = "sha256:ecf182bf59bd541a8876deccf0360f5ae60496fd50b58510048020751cf1724c"}, + {file = "tokenizers-0.13.3-cp37-cp37m-macosx_10_11_x86_64.whl", hash = "sha256:0527dc5436a1f6bf2c0327da3145687d3bcfbeab91fed8458920093de3901b44"}, + {file = "tokenizers-0.13.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:07cbb2c307627dc99b44b22ef05ff4473aa7c7cc1fec8f0a8b37d8a64b1a16d2"}, + {file = "tokenizers-0.13.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4560dbdeaae5b7ee0d4e493027e3de6d53c991b5002d7ff95083c99e11dd5ac0"}, + {file = "tokenizers-0.13.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:64064bd0322405c9374305ab9b4c07152a1474370327499911937fd4a76d004b"}, + {file = "tokenizers-0.13.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8c6e2ab0f2e3d939ca66aa1d596602105fe33b505cd2854a4c1717f704c51de"}, + {file = "tokenizers-0.13.3-cp37-cp37m-win32.whl", hash = "sha256:6cc29d410768f960db8677221e497226e545eaaea01aa3613fa0fdf2cc96cff4"}, + {file = "tokenizers-0.13.3-cp37-cp37m-win_amd64.whl", hash = "sha256:fc2a7fdf864554a0dacf09d32e17c0caa9afe72baf9dd7ddedc61973bae352d8"}, + {file = "tokenizers-0.13.3-cp38-cp38-macosx_10_11_x86_64.whl", hash = "sha256:8791dedba834c1fc55e5f1521be325ea3dafb381964be20684b92fdac95d79b7"}, + {file = "tokenizers-0.13.3-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:d607a6a13718aeb20507bdf2b96162ead5145bbbfa26788d6b833f98b31b26e1"}, + {file = "tokenizers-0.13.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3791338f809cd1bf8e4fee6b540b36822434d0c6c6bc47162448deee3f77d425"}, + {file = "tokenizers-0.13.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c2f35f30e39e6aab8716f07790f646bdc6e4a853816cc49a95ef2a9016bf9ce6"}, + {file = "tokenizers-0.13.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:310204dfed5aa797128b65d63538a9837cbdd15da2a29a77d67eefa489edda26"}, + {file = "tokenizers-0.13.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0f9b92ea052305166559f38498b3b0cae159caea712646648aaa272f7160963"}, + {file = "tokenizers-0.13.3-cp38-cp38-win32.whl", hash = "sha256:9a3fa134896c3c1f0da6e762d15141fbff30d094067c8f1157b9fdca593b5806"}, + {file = "tokenizers-0.13.3-cp38-cp38-win_amd64.whl", hash = "sha256:8e7b0cdeace87fa9e760e6a605e0ae8fc14b7d72e9fc19c578116f7287bb873d"}, + {file = "tokenizers-0.13.3-cp39-cp39-macosx_10_11_x86_64.whl", hash = "sha256:00cee1e0859d55507e693a48fa4aef07060c4bb6bd93d80120e18fea9371c66d"}, + {file = "tokenizers-0.13.3-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:a23ff602d0797cea1d0506ce69b27523b07e70f6dda982ab8cf82402de839088"}, + {file = "tokenizers-0.13.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70ce07445050b537d2696022dafb115307abdffd2a5c106f029490f84501ef97"}, + {file = "tokenizers-0.13.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:280ffe95f50eaaf655b3a1dc7ff1d9cf4777029dbbc3e63a74e65a056594abc3"}, + {file = "tokenizers-0.13.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:97acfcec592f7e9de8cadcdcda50a7134423ac8455c0166b28c9ff04d227b371"}, + {file = "tokenizers-0.13.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd7730c98a3010cd4f523465867ff95cd9d6430db46676ce79358f65ae39797b"}, + {file = "tokenizers-0.13.3-cp39-cp39-win32.whl", hash = "sha256:48625a108029cb1ddf42e17a81b5a3230ba6888a70c9dc14e81bc319e812652d"}, + {file = "tokenizers-0.13.3-cp39-cp39-win_amd64.whl", hash = "sha256:bc0a6f1ba036e482db6453571c9e3e60ecd5489980ffd95d11dc9f960483d783"}, + {file = "tokenizers-0.13.3.tar.gz", hash = "sha256:2e546dbb68b623008a5442353137fbb0123d311a6d7ba52f2667c8862a75af2e"}, +] + +[package.extras] +dev = ["black (==22.3)", "datasets", "numpy", "pytest", "requests"] +docs = ["setuptools-rust", "sphinx", "sphinx-rtd-theme"] +testing = ["black (==22.3)", "datasets", "numpy", "pytest", "requests"] + [[package]] name = "tomli" version = "2.0.1" @@ -3430,23 +3524,23 @@ docs = ["mkdocs", "pygments", "pymarkdown", "pymdown-extensions"] [[package]] name = "virtualenv" -version = "20.24.1" +version = "20.24.2" description = "Virtual Python Environment builder" optional = false python-versions = ">=3.7" files = [ - {file = "virtualenv-20.24.1-py3-none-any.whl", hash = "sha256:01aacf8decd346cf9a865ae85c0cdc7f64c8caa07ff0d8b1dfc1733d10677442"}, - {file = "virtualenv-20.24.1.tar.gz", hash = "sha256:2ef6a237c31629da6442b0bcaa3999748108c7166318d1f55cc9f8d7294e97bd"}, + {file = "virtualenv-20.24.2-py3-none-any.whl", hash = "sha256:43a3052be36080548bdee0b42919c88072037d50d56c28bd3f853cbe92b953ff"}, + {file = "virtualenv-20.24.2.tar.gz", hash = "sha256:fd8a78f46f6b99a67b7ec5cf73f92357891a7b3a40fd97637c27f854aae3b9e0"}, ] [package.dependencies] -distlib = ">=0.3.6,<1" -filelock = ">=3.12,<4" -platformdirs = ">=3.5.1,<4" +distlib = ">=0.3.7,<1" +filelock = ">=3.12.2,<4" +platformdirs = ">=3.9.1,<4" [package.extras] docs = ["furo (>=2023.5.20)", "proselint (>=0.13)", "sphinx (>=7.0.1)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] -test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.3.1)", "pytest-env (>=0.8.1)", "pytest-freezer (>=0.4.6)", "pytest-mock (>=3.10)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=67.8)", "time-machine (>=2.9)"] +test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"] [[package]] name = "watchfiles" @@ -3732,4 +3826,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "9a28c447921f41d7c04eefbe1551131b5f663330cdb48a123c62a6aca750f818" +content-hash = "af7847bbf1ce80d04c7aed2dd250a61bdcf2d89c3c76070e191c730a69a19c60" diff --git a/platform/pyproject.toml b/platform/pyproject.toml index 674671de3d..979b58c554 100644 --- a/platform/pyproject.toml +++ b/platform/pyproject.toml @@ -41,6 +41,8 @@ networkx = "^3.1" pusher = "^3.3.2" pypdf2 = "^3.0.1" python-multipart = "^0.0.6" +scrapingbee = "^1.2.0" +anthropic = "^0.3.6" [tool.poetry.dev-dependencies] diff --git a/platform/reworkd_platform/schemas/workflow/blocks/agents/content_refresher_agent.py b/platform/reworkd_platform/schemas/workflow/blocks/agents/content_refresher_agent.py new file mode 100644 index 0000000000..78a4343231 --- /dev/null +++ b/platform/reworkd_platform/schemas/workflow/blocks/agents/content_refresher_agent.py @@ -0,0 +1,183 @@ +from loguru import logger +from reworkd_platform.settings import settings +from reworkd_platform.schemas.workflow.base import Block, BlockIOBase + +import re +import requests +from scrapingbee import ScrapingBeeClient +from bs4 import BeautifulSoup +import anthropic + +class ContentRefresherInput(BlockIOBase): + url: str + + +class ContentRefresherOutput(ContentRefresherInput): + original_content: str + refreshed_content: str + + +class ContentRefresherAgent(Block): + type = "ContentRefresherAgent" + description = "Refresh the content on an existing page" + input: ContentRefresherInput + + async def run(self) -> BlockIOBase: + logger.info(f"Starting {self.type}") + target_url = self.input.url + + target_content = get_page_content(target_url) + logger.info(target_content) + + keywords = find_content_kws(target_content) + logger.info(keywords) + + source_urls = search_results(keywords) + if target_url in source_urls: # TODO: check based on content overlap + source_urls.remove(target_url) + logger.info(source_urls) + + source_contents = [ + get_page_content(url) for url in source_urls[:3] # TODO: remove limit of 3 sources + ] # TODO: async/multithread the LLM calls + source_contents = [ + content for content in source_contents if content is not None + ] + logger.info(source_contents) + + new_infos = "\n\n".join( + [ + find_new_info(target_content, source_content) + for source_content in source_contents + ] + ) + logger.info(new_infos) + + updated_target_content = add_info(target_content, new_infos) + logger.info(updated_target_content) + + return ContentRefresherOutput( + **self.input.dict(), + original_content=target_content, + refreshed_content=updated_target_content, + ) + + +scraper = ScrapingBeeClient( + api_key=settings.scrapingbee_api_key, +) +claude = anthropic.Anthropic( + api_key=settings.anthropic_api_key, +) + + +def get_page_content(url): + page = requests.get(url) + if page.status_code != 200: + page = scraper.get(url) + + html = BeautifulSoup(page.content, "html.parser") + + pgraphs = html.find_all("p") + pgraphs = "\n".join( + [ + f"{i+1}. " + re.sub(r"\s+", " ", p.text).strip() + for i, p in enumerate(pgraphs) + ] + ) + + prompt = f"Below is a numbered list of the text in all the

tags on a web page:\n{pgraphs}\nSome of these lines may not be part of the main content of the page (e.g. footer text, ads, etc). Please list the line numbers that *are* part of the main content (i.e. the article's paragraphs) of the page. You can list consecutive line numbers as a range (e.g. 23-27) and separated by a comma." + response = claude.completions.create( + model="claude-2", + prompt=f"\n\nHuman: {prompt}\n\nAssistant: Here are the line numbers of the main content:", + max_tokens_to_sample=500, + temperature=0, + ) + line_nums = response.completion.strip() + if len(line_nums) == 0: + return None + + pgraphs = pgraphs.split("\n") + content = [] + for line_num in line_nums.split(","): + if "-" in line_num: + start, end = line_num.split("-") + start, end = int(start), int(end) + for i in range(start, end + 1): + text = ".".join(pgraphs[i - 1].split(".")[1:]).strip() + content.append(text) + else: + text = ".".join(pgraphs[int(line_num) - 1].split(".")[1:]).strip() + content.append(text) + + content = "\n".join(content) + return content + + +def find_content_kws(content): + # Claude: find search keywords that content focuses on + prompt = f"Below is content from a web article:\n{content}\nPlease list the keywords that best describe the content of the article. Format them so we can use them to query a search engine effectively." + response = claude.completions.create( + model="claude-2", + prompt=f"\n\nHuman: {prompt}\n\nAssistant: Here is a short search query that best matches the content of the article:", + max_tokens_to_sample=20, + temperature=0, + ) + response_message = response.completion.strip() + return response_message + + +def search_results(search_query): + # use SERP API + response = requests.post( + f"https://google.serper.dev/search", + headers={ + "X-API-KEY": settings.serp_api_key, + "Content-Type": "application/json", + }, + params={ + "q": search_query, + }, + ) + response.raise_for_status() + search_results = response.json() + urls = [result["link"] for result in search_results["organic"]] + return urls + + +def find_new_info(target, source): + # Claude: info mentioned in source that is not mentioned in target + prompt = f"Below is the TARGET article:\n{target}\n----------------\nBelow is the SOURCE article:\n{source}\n----------------\nIn a bullet point list, identify all facts, figures, or ideas that are mentioned in the SOURCE article but not in the TARGET article." + response = claude.completions.create( + model="claude-2", + prompt=f"\n\nHuman: {prompt}\n\nAssistant: Here is a list of claims in the SOURCE that are not in the TARGET:", + max_tokens_to_sample=5000, + temperature=0, + ) + response_message = response.completion.strip() + new_info = "\n".join(response_message.split("\n\n")) + return new_info + + +def add_info(target, info): + # Claude: rewrite target to include the info + prompt = f"Below are notes from some SOURCE articles:\n{info}\n----------------\nBelow is the TARGET article:\n{target}\n----------------\nPlease rewrite the TARGET article to include the information from the SOURCE articles." + response = claude.completions.create( + model="claude-2", + prompt=f"\n\nHuman: {prompt}\n\nAssistant: Here is a rewritten version of the target article that incorporates relevant information from the source articles:", + max_tokens_to_sample=5000, + temperature=0, + ) + response_message = response.completion.strip() + return response_message + + +if __name__ == "main": + print('MAIN') + agent = ContentRefresherAgent( + input=ContentRefresherInput( + url="https://www.science.org/content/article/embattled-physicist-files-patent-unprecedented-ambient-superconductor" + ) + ) + output = agent.run() + print(output) \ No newline at end of file diff --git a/platform/reworkd_platform/settings.py b/platform/reworkd_platform/settings.py index 23012843aa..628ccfd126 100644 --- a/platform/reworkd_platform/settings.py +++ b/platform/reworkd_platform/settings.py @@ -60,6 +60,8 @@ class Settings(BaseSettings): replicate_api_key: Optional[str] = None serp_api_key: Optional[str] = None + scrapingbee_api_key: Optional[str] = None + anthropic_api_key: Optional[str] = None # Frontend URL for CORS frontend_url: str = "http://localhost:3000" diff --git a/platform/reworkd_platform/web/api/workflow/blocks/web/__init__.py b/platform/reworkd_platform/web/api/workflow/blocks/web/__init__.py index b85ce1914f..c3a9055c84 100644 --- a/platform/reworkd_platform/web/api/workflow/blocks/web/__init__.py +++ b/platform/reworkd_platform/web/api/workflow/blocks/web/__init__.py @@ -12,6 +12,9 @@ from reworkd_platform.schemas.workflow.blocks.url_status_check import ( UrlStatusCheckBlock, ) +from reworkd_platform.schemas.workflow.blocks.agents.content_refresher_agent import ( + ContentRefresherAgent, +) def get_block_runner(block: Block) -> Block: @@ -19,6 +22,8 @@ def get_block_runner(block: Block) -> Block: return IfCondition(**block.dict()) if block.type == "WebInteractionAgent": return WebInteractionAgent(**block.dict()) + if block.type == "ContentRefresherAgent": + return ContentRefresherAgent(**block.dict()) if block.type == "ManualTriggerBlock": return ManualTriggerBlock(**block.dict()) if block.type == "UrlStatusCheck": From 0d1752f77bd92df7331628449771463f158941c0 Mon Sep 17 00:00:00 2001 From: Rohan Pandey Date: Wed, 26 Jul 2023 16:57:41 -0700 Subject: [PATCH 2/4] mypy fixes --- .../blocks/agents/content_refresher_agent.py | 30 ++++++++++--------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/platform/reworkd_platform/schemas/workflow/blocks/agents/content_refresher_agent.py b/platform/reworkd_platform/schemas/workflow/blocks/agents/content_refresher_agent.py index 78a4343231..9173d9e041 100644 --- a/platform/reworkd_platform/schemas/workflow/blocks/agents/content_refresher_agent.py +++ b/platform/reworkd_platform/schemas/workflow/blocks/agents/content_refresher_agent.py @@ -8,6 +8,7 @@ from bs4 import BeautifulSoup import anthropic + class ContentRefresherInput(BlockIOBase): url: str @@ -38,7 +39,8 @@ async def run(self) -> BlockIOBase: logger.info(source_urls) source_contents = [ - get_page_content(url) for url in source_urls[:3] # TODO: remove limit of 3 sources + get_page_content(url) + for url in source_urls[:3] # TODO: remove limit of 3 sources ] # TODO: async/multithread the LLM calls source_contents = [ content for content in source_contents if content is not None @@ -71,7 +73,7 @@ async def run(self) -> BlockIOBase: ) -def get_page_content(url): +def get_page_content(url: str) -> str: page = requests.get(url) if page.status_code != 200: page = scraper.get(url) @@ -95,14 +97,13 @@ def get_page_content(url): ) line_nums = response.completion.strip() if len(line_nums) == 0: - return None + return '' pgraphs = pgraphs.split("\n") content = [] for line_num in line_nums.split(","): if "-" in line_num: - start, end = line_num.split("-") - start, end = int(start), int(end) + start, end = map(int, line_num.split("-")) for i in range(start, end + 1): text = ".".join(pgraphs[i - 1].split(".")[1:]).strip() content.append(text) @@ -110,11 +111,10 @@ def get_page_content(url): text = ".".join(pgraphs[int(line_num) - 1].split(".")[1:]).strip() content.append(text) - content = "\n".join(content) - return content + return "\n".join(content) -def find_content_kws(content): +def find_content_kws(content: str) -> str: # Claude: find search keywords that content focuses on prompt = f"Below is content from a web article:\n{content}\nPlease list the keywords that best describe the content of the article. Format them so we can use them to query a search engine effectively." response = claude.completions.create( @@ -127,12 +127,12 @@ def find_content_kws(content): return response_message -def search_results(search_query): +def search_results(search_query: str) -> list[str]: # use SERP API response = requests.post( f"https://google.serper.dev/search", headers={ - "X-API-KEY": settings.serp_api_key, + "X-API-KEY": settings.serp_api_key or '', "Content-Type": "application/json", }, params={ @@ -145,7 +145,7 @@ def search_results(search_query): return urls -def find_new_info(target, source): +def find_new_info(target: str, source: str) -> str: # Claude: info mentioned in source that is not mentioned in target prompt = f"Below is the TARGET article:\n{target}\n----------------\nBelow is the SOURCE article:\n{source}\n----------------\nIn a bullet point list, identify all facts, figures, or ideas that are mentioned in the SOURCE article but not in the TARGET article." response = claude.completions.create( @@ -159,7 +159,7 @@ def find_new_info(target, source): return new_info -def add_info(target, info): +def add_info(target: str, info: str) -> str: # Claude: rewrite target to include the info prompt = f"Below are notes from some SOURCE articles:\n{info}\n----------------\nBelow is the TARGET article:\n{target}\n----------------\nPlease rewrite the TARGET article to include the information from the SOURCE articles." response = claude.completions.create( @@ -173,11 +173,13 @@ def add_info(target, info): if __name__ == "main": - print('MAIN') + print("MAIN") agent = ContentRefresherAgent( + id="test", + type="ContentRefresherAgent", input=ContentRefresherInput( url="https://www.science.org/content/article/embattled-physicist-files-patent-unprecedented-ambient-superconductor" ) ) output = agent.run() - print(output) \ No newline at end of file + print(output) From ff3d52d4a7abd848b6691af75e115f477db3d4f9 Mon Sep 17 00:00:00 2001 From: Rohan Pandey Date: Wed, 26 Jul 2023 17:06:58 -0700 Subject: [PATCH 3/4] content refresher main script removed --- .../blocks/agents/content_refresher_agent.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/platform/reworkd_platform/schemas/workflow/blocks/agents/content_refresher_agent.py b/platform/reworkd_platform/schemas/workflow/blocks/agents/content_refresher_agent.py index 9173d9e041..180e3e83ea 100644 --- a/platform/reworkd_platform/schemas/workflow/blocks/agents/content_refresher_agent.py +++ b/platform/reworkd_platform/schemas/workflow/blocks/agents/content_refresher_agent.py @@ -170,16 +170,3 @@ def add_info(target: str, info: str) -> str: ) response_message = response.completion.strip() return response_message - - -if __name__ == "main": - print("MAIN") - agent = ContentRefresherAgent( - id="test", - type="ContentRefresherAgent", - input=ContentRefresherInput( - url="https://www.science.org/content/article/embattled-physicist-files-patent-unprecedented-ambient-superconductor" - ) - ) - output = agent.run() - print(output) From b93cf710d9ceafb8cc992469ad0ead8214d9c4e5 Mon Sep 17 00:00:00 2001 From: asim-shrestha Date: Thu, 27 Jul 2023 17:15:17 -0700 Subject: [PATCH 4/4] =?UTF-8?q?=F0=9F=92=BB=20Update=20poetry=20lock?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- platform/poetry.lock | 96 ++++++++++++++++++- .../blocks/agents/content_refresher_agent.py | 21 ++-- .../web/api/workflow/blocks/web/__init__.py | 6 +- 3 files changed, 107 insertions(+), 16 deletions(-) diff --git a/platform/poetry.lock b/platform/poetry.lock index 54efcbd905..8575c342d8 100644 --- a/platform/poetry.lock +++ b/platform/poetry.lock @@ -190,6 +190,26 @@ files = [ [package.dependencies] frozenlist = ">=1.1.0" +[[package]] +name = "anthropic" +version = "0.3.6" +description = "Client library for the anthropic API" +category = "main" +optional = false +python-versions = ">=3.7,<4.0" +files = [ + {file = "anthropic-0.3.6-py3-none-any.whl", hash = "sha256:45036a96f38598be82237c12d77d7aefe814a3bceb9da0bc6721a381c29821b1"}, + {file = "anthropic-0.3.6.tar.gz", hash = "sha256:6e644c84ad9375dc12e07b36aab1862ca4db98eb1750e08acfe4847e62afe0dd"}, +] + +[package.dependencies] +anyio = ">=3.5.0,<4" +distro = ">=1.7.0,<2" +httpx = ">=0.23.0,<1" +pydantic = ">=1.9.0,<2.0.0" +tokenizers = ">=0.13.0" +typing-extensions = ">=4.1.1,<5" + [[package]] name = "anyio" version = "3.7.1" @@ -1488,7 +1508,7 @@ files = [ name = "httpcore" version = "0.17.3" description = "A minimal low-level HTTP client." -category = "dev" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1564,7 +1584,7 @@ test = ["Cython (>=0.29.24,<0.30.0)"] name = "httpx" version = "0.24.1" description = "The next generation HTTP client." -category = "dev" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3357,6 +3377,21 @@ botocore = ">=1.12.36,<2.0a.0" [package.extras] crt = ["botocore[crt] (>=1.20.29,<2.0a.0)"] +[[package]] +name = "scrapingbee" +version = "1.2.0" +description = "ScrapingBee Python SDK" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "scrapingbee-1.2.0-py3-none-any.whl", hash = "sha256:3ba6be481949a6bac3fb1ddc3d25204d27963d9f35bfe5f2871f7ec14da8bb1a"}, + {file = "scrapingbee-1.2.0.tar.gz", hash = "sha256:604e8aedd75ceff82737b5d95b4cfbe65484e8c534d4f14d8f52452faccd09f4"}, +] + +[package.dependencies] +requests = "*" + [[package]] name = "sentry-sdk" version = "1.28.1" @@ -3720,6 +3755,61 @@ requests = ">=2.26.0" [package.extras] blobfile = ["blobfile (>=2)"] +[[package]] +name = "tokenizers" +version = "0.13.3" +description = "Fast and Customizable Tokenizers" +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "tokenizers-0.13.3-cp310-cp310-macosx_10_11_x86_64.whl", hash = "sha256:f3835c5be51de8c0a092058a4d4380cb9244fb34681fd0a295fbf0a52a5fdf33"}, + {file = "tokenizers-0.13.3-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:4ef4c3e821730f2692489e926b184321e887f34fb8a6b80b8096b966ba663d07"}, + {file = "tokenizers-0.13.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5fd1a6a25353e9aa762e2aae5a1e63883cad9f4e997c447ec39d071020459bc"}, + {file = "tokenizers-0.13.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ee0b1b311d65beab83d7a41c56a1e46ab732a9eed4460648e8eb0bd69fc2d059"}, + {file = "tokenizers-0.13.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ef4215284df1277dadbcc5e17d4882bda19f770d02348e73523f7e7d8b8d396"}, + {file = "tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a4d53976079cff8a033f778fb9adca2d9d69d009c02fa2d71a878b5f3963ed30"}, + {file = "tokenizers-0.13.3-cp310-cp310-win32.whl", hash = "sha256:1f0e3b4c2ea2cd13238ce43548959c118069db7579e5d40ec270ad77da5833ce"}, + {file = "tokenizers-0.13.3-cp310-cp310-win_amd64.whl", hash = "sha256:89649c00d0d7211e8186f7a75dfa1db6996f65edce4b84821817eadcc2d3c79e"}, + {file = "tokenizers-0.13.3-cp311-cp311-macosx_10_11_universal2.whl", hash = "sha256:56b726e0d2bbc9243872b0144515ba684af5b8d8cd112fb83ee1365e26ec74c8"}, + {file = "tokenizers-0.13.3-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:cc5c022ce692e1f499d745af293ab9ee6f5d92538ed2faf73f9708c89ee59ce6"}, + {file = "tokenizers-0.13.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f55c981ac44ba87c93e847c333e58c12abcbb377a0c2f2ef96e1a266e4184ff2"}, + {file = "tokenizers-0.13.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f247eae99800ef821a91f47c5280e9e9afaeed9980fc444208d5aa6ba69ff148"}, + {file = "tokenizers-0.13.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4b3e3215d048e94f40f1c95802e45dcc37c5b05eb46280fc2ccc8cd351bff839"}, + {file = "tokenizers-0.13.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ba2b0bf01777c9b9bc94b53764d6684554ce98551fec496f71bc5be3a03e98b"}, + {file = "tokenizers-0.13.3-cp311-cp311-win32.whl", hash = "sha256:cc78d77f597d1c458bf0ea7c2a64b6aa06941c7a99cb135b5969b0278824d808"}, + {file = "tokenizers-0.13.3-cp311-cp311-win_amd64.whl", hash = "sha256:ecf182bf59bd541a8876deccf0360f5ae60496fd50b58510048020751cf1724c"}, + {file = "tokenizers-0.13.3-cp37-cp37m-macosx_10_11_x86_64.whl", hash = "sha256:0527dc5436a1f6bf2c0327da3145687d3bcfbeab91fed8458920093de3901b44"}, + {file = "tokenizers-0.13.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:07cbb2c307627dc99b44b22ef05ff4473aa7c7cc1fec8f0a8b37d8a64b1a16d2"}, + {file = "tokenizers-0.13.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4560dbdeaae5b7ee0d4e493027e3de6d53c991b5002d7ff95083c99e11dd5ac0"}, + {file = "tokenizers-0.13.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:64064bd0322405c9374305ab9b4c07152a1474370327499911937fd4a76d004b"}, + {file = "tokenizers-0.13.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8c6e2ab0f2e3d939ca66aa1d596602105fe33b505cd2854a4c1717f704c51de"}, + {file = "tokenizers-0.13.3-cp37-cp37m-win32.whl", hash = "sha256:6cc29d410768f960db8677221e497226e545eaaea01aa3613fa0fdf2cc96cff4"}, + {file = "tokenizers-0.13.3-cp37-cp37m-win_amd64.whl", hash = "sha256:fc2a7fdf864554a0dacf09d32e17c0caa9afe72baf9dd7ddedc61973bae352d8"}, + {file = "tokenizers-0.13.3-cp38-cp38-macosx_10_11_x86_64.whl", hash = "sha256:8791dedba834c1fc55e5f1521be325ea3dafb381964be20684b92fdac95d79b7"}, + {file = "tokenizers-0.13.3-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:d607a6a13718aeb20507bdf2b96162ead5145bbbfa26788d6b833f98b31b26e1"}, + {file = "tokenizers-0.13.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3791338f809cd1bf8e4fee6b540b36822434d0c6c6bc47162448deee3f77d425"}, + {file = "tokenizers-0.13.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c2f35f30e39e6aab8716f07790f646bdc6e4a853816cc49a95ef2a9016bf9ce6"}, + {file = "tokenizers-0.13.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:310204dfed5aa797128b65d63538a9837cbdd15da2a29a77d67eefa489edda26"}, + {file = "tokenizers-0.13.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0f9b92ea052305166559f38498b3b0cae159caea712646648aaa272f7160963"}, + {file = "tokenizers-0.13.3-cp38-cp38-win32.whl", hash = "sha256:9a3fa134896c3c1f0da6e762d15141fbff30d094067c8f1157b9fdca593b5806"}, + {file = "tokenizers-0.13.3-cp38-cp38-win_amd64.whl", hash = "sha256:8e7b0cdeace87fa9e760e6a605e0ae8fc14b7d72e9fc19c578116f7287bb873d"}, + {file = "tokenizers-0.13.3-cp39-cp39-macosx_10_11_x86_64.whl", hash = "sha256:00cee1e0859d55507e693a48fa4aef07060c4bb6bd93d80120e18fea9371c66d"}, + {file = "tokenizers-0.13.3-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:a23ff602d0797cea1d0506ce69b27523b07e70f6dda982ab8cf82402de839088"}, + {file = "tokenizers-0.13.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70ce07445050b537d2696022dafb115307abdffd2a5c106f029490f84501ef97"}, + {file = "tokenizers-0.13.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:280ffe95f50eaaf655b3a1dc7ff1d9cf4777029dbbc3e63a74e65a056594abc3"}, + {file = "tokenizers-0.13.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:97acfcec592f7e9de8cadcdcda50a7134423ac8455c0166b28c9ff04d227b371"}, + {file = "tokenizers-0.13.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd7730c98a3010cd4f523465867ff95cd9d6430db46676ce79358f65ae39797b"}, + {file = "tokenizers-0.13.3-cp39-cp39-win32.whl", hash = "sha256:48625a108029cb1ddf42e17a81b5a3230ba6888a70c9dc14e81bc319e812652d"}, + {file = "tokenizers-0.13.3-cp39-cp39-win_amd64.whl", hash = "sha256:bc0a6f1ba036e482db6453571c9e3e60ecd5489980ffd95d11dc9f960483d783"}, + {file = "tokenizers-0.13.3.tar.gz", hash = "sha256:2e546dbb68b623008a5442353137fbb0123d311a6d7ba52f2667c8862a75af2e"}, +] + +[package.extras] +dev = ["black (==22.3)", "datasets", "numpy", "pytest", "requests"] +docs = ["setuptools-rust", "sphinx", "sphinx-rtd-theme"] +testing = ["black (==22.3)", "datasets", "numpy", "pytest", "requests"] + [[package]] name = "tomli" version = "2.0.1" @@ -4308,4 +4398,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "b5db2bb84a1157f09e3f2043a72df8dbb93f339a52f4852de0bbe41181b9ef1a" +content-hash = "4db8006ac7719ad9759d90e6222f132ca10a10cc671a58580b2c75f21ca29630" diff --git a/platform/reworkd_platform/schemas/workflow/blocks/agents/content_refresher_agent.py b/platform/reworkd_platform/schemas/workflow/blocks/agents/content_refresher_agent.py index 180e3e83ea..43d824084c 100644 --- a/platform/reworkd_platform/schemas/workflow/blocks/agents/content_refresher_agent.py +++ b/platform/reworkd_platform/schemas/workflow/blocks/agents/content_refresher_agent.py @@ -1,12 +1,13 @@ -from loguru import logger -from reworkd_platform.settings import settings -from reworkd_platform.schemas.workflow.base import Block, BlockIOBase - import re + +import anthropic import requests -from scrapingbee import ScrapingBeeClient from bs4 import BeautifulSoup -import anthropic +from loguru import logger +from scrapingbee import ScrapingBeeClient + +from reworkd_platform.schemas.workflow.base import Block, BlockIOBase +from reworkd_platform.settings import settings class ContentRefresherInput(BlockIOBase): @@ -23,7 +24,7 @@ class ContentRefresherAgent(Block): description = "Refresh the content on an existing page" input: ContentRefresherInput - async def run(self) -> BlockIOBase: + async def run(self, workflow_id: str) -> ContentRefresherOutput: logger.info(f"Starting {self.type}") target_url = self.input.url @@ -83,7 +84,7 @@ def get_page_content(url: str) -> str: pgraphs = html.find_all("p") pgraphs = "\n".join( [ - f"{i+1}. " + re.sub(r"\s+", " ", p.text).strip() + f"{i + 1}. " + re.sub(r"\s+", " ", p.text).strip() for i, p in enumerate(pgraphs) ] ) @@ -97,7 +98,7 @@ def get_page_content(url: str) -> str: ) line_nums = response.completion.strip() if len(line_nums) == 0: - return '' + return "" pgraphs = pgraphs.split("\n") content = [] @@ -132,7 +133,7 @@ def search_results(search_query: str) -> list[str]: response = requests.post( f"https://google.serper.dev/search", headers={ - "X-API-KEY": settings.serp_api_key or '', + "X-API-KEY": settings.serp_api_key or "", "Content-Type": "application/json", }, params={ diff --git a/platform/reworkd_platform/web/api/workflow/blocks/web/__init__.py b/platform/reworkd_platform/web/api/workflow/blocks/web/__init__.py index 23d813b432..291627162a 100644 --- a/platform/reworkd_platform/web/api/workflow/blocks/web/__init__.py +++ b/platform/reworkd_platform/web/api/workflow/blocks/web/__init__.py @@ -1,4 +1,7 @@ from reworkd_platform.schemas.workflow.base import Block +from reworkd_platform.schemas.workflow.blocks.agents.content_refresher_agent import ( + ContentRefresherAgent, +) from reworkd_platform.schemas.workflow.blocks.agents.web_interaction_agent import ( WebInteractionAgent, ) @@ -21,9 +24,6 @@ from reworkd_platform.schemas.workflow.blocks.url_status_check import ( UrlStatusCheckBlock, ) -from reworkd_platform.schemas.workflow.blocks.agents.content_refresher_agent import ( - ContentRefresherAgent, -) def get_block_runner(block: Block) -> Block: