In [1]:
%pip install boto3
%pip install wikipedia

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [2]:
import boto3
from botocore.config import Config

region = 'us-east-1'
def init_bedrock_client(region: str):
    retry_config = Config(
        region_name=region,
        retries={"max_attempts": 10, "mode": "standard"}
    )
    return boto3.client("bedrock-runtime", region_name=region, config=retry_config)

def converse_with_bedrock_tools(sys_prompt, usr_prompt, tool_config):
    temperature = 0.0
    top_p = 0.1
    top_k = 1
    inference_config = {"temperature": temperature, "topP": top_p}
    additional_model_fields = {"top_k": top_k}
    response = boto3_client.converse(
        modelId="anthropic.claude-3-sonnet-20240229-v1:0", 
        messages=usr_prompt, 
        system=sys_prompt,
        inferenceConfig=inference_config,
        additionalModelRequestFields=additional_model_fields,
        toolConfig=tool_config
    )
    return response

def create_prompt(sys_template, user_template, **kwargs):
    sys_prompt = [{"text": sys_template.format(**kwargs)}]
    usr_prompt = [{"role": "user", "content": [{"text": user_template.format(**kwargs)}]}]
    return sys_prompt, usr_prompt

boto3_client = init_bedrock_client(region)

In [3]:
system_prompt = """
## 1. Overview
You are a top-tier algorithm designed for extracting information in structured formats to build a knowledge graph.
Try to capture as much information from the text as possible without sacrificing accuracy. 
Do not add any information that is not explicitly mentioned in the text.
- **Nodes** represent entities and concepts.
- The aim is to achieve simplicity and clarity in the knowledge graph, making it accessible for a vast audience.

## 2. Labeling Nodes
- **Consistency**: Ensure you use available types for node labels.
Ensure you use basic or elementary types for node labels.
- For example, when you identify an entity representing a person, always label it as **'person'**. Avoid using more specific terms like 'mathematician' or 'scientist'.
- **Node IDs**: Never utilize integers as node IDs. 
Node IDs should be names or human-readable identifiers found in the text.
- **Relationships** represent connections between entities or concepts.
Ensure consistency and generality in relationship types when constructing knowledge graphs. Instead of using specific and momentary types such as 'BECAME_PROFESSOR', use more general and timeless relationship types like 'PROFESSOR'. Make sure to use general and timeless relationship types!

## 3. Coreference Resolution
- **Maintain Entity Consistency**: When extracting entities, it's vital to ensure consistency.
'If an entity, such as "John Doe", is mentioned multiple times in the text but is referred to by different names or pronouns (e.g., "Joe", "he"), always use the most complete identifier for that entity throughout the "knowledge graph. In this example, use "John Doe" as the entity ID.
Remember, the knowledge graph should be coherent and easily understandable, so maintaining consistency in entity references is crucial.

## 4. Strict Compliance
Adhere to the rules strictly. Non-compliance will result in termination.
"""

user_prompt_template = """
Tip: Make sure to answer in the correct format and do not include any explanations. Use the given format to extract information from the following input: 

{input}
"""


In [4]:
node_type = ['Person', 'Country', 'Organization']
rel_type = ['NATIONALITY', 'LOCATED_IN', 'WORKED_AT', 'SPOUSE']
properties = ['born_year', 'occupation']

def list_to_string(lst):
    return str(lst).replace("'", '"')

tool_config = {
  "tools": [
    {
      "toolSpec": {
        "name": "DynamicGraph",
        "description": "Represents a graph document consisting of nodes and relationships.",
        "inputSchema": {
          "json": {
            "type": "object",
            "properties": {
              "nodes": {
                "description": "List of nodes",
                "type": "array",
                "items": {
                  "type": "object",
                  "properties": {
                    "id": {
                      "description": "Name or human-readable unique identifier.",
                      "type": "string"
                    },
                    "type": {
                      "description": f"The type or label of the node.. Available options are {list_to_string(node_type)}",
                      "enum": node_type,
                      "type": "string"
                    },
                    "properties": {
                      "type": "object",
                      "description": f"Optional properties for the node. Available properties are {list_to_string(properties)}",
                      "properties": {prop: {"type": "string"} for prop in properties},
                      "additionalProperties": True
                    }
                  },
                  "required": ["id", "type"]
                }
              },
              "relationships": {
                "description": "List of relationships",
                "type": "array",
                "items": {
                  "type": "object",
                  "properties": {
                    "source_node_id": {
                      "description": "Name or human-readable unique identifier of source node",
                      "type": "string"
                    },
                    "source_node_type": {
                      "description": f"The type or label of the source node.. Available options are {list_to_string(node_type)}",
                      "enum": node_type,
                      "type": "string"
                    },
                    "target_node_id": {
                      "description": "Name or human-readable unique identifier of target node",
                      "type": "string"
                    },
                    "target_node_type": {
                      "description": f"The type or label of the target node.. Available options are {list_to_string(node_type)}",
                      "enum": node_type,
                      "type": "string"
                    },
                    "type": {
                      "description": f"The type of the relationship.. Available options are {list_to_string(rel_type)}",
                      "enum": rel_type,
                      "type": "string"
                    }
                  },
                  "required": [
                    "source_node_id",
                    "source_node_type",
                    "target_node_id",
                    "target_node_type",
                    "type"
                  ]
                }
              }
            }
          }
        }
      }
    }
  ]
}

In [5]:
import logging

class StationNotFoundError(Exception):
    """Raised when a radio station isn't found."""
    pass
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)

text = """
Marie Curie, born in 1867, was a Polish and naturalised-French physicist and chemist who conducted pioneering research on radioactivity.
She was the first woman to win a Nobel Prize, the first person to win a Nobel Prize twice, and the only person to win a Nobel Prize in two scientific fields.
Her husband, Pierre Curie, was a co-winner of her first Nobel Prize, making them the first-ever married couple to win the Nobel Prize and launching the Curie family legacy of five Nobel Prizes.
She was, in 1906, the first woman to become a professor at the University of Paris.
"""

sys_prompt, user_prompt = create_prompt(system_prompt, user_prompt_template, input=text)
response = converse_with_bedrock_tools(sys_prompt, user_prompt, tool_config=tool_config)

In [6]:
print(response)

{'ResponseMetadata': {'RequestId': '9ce42dec-d8b6-4863-a68c-686eb5c1cddc', 'HTTPStatusCode': 200, 'HTTPHeaders': {'date': 'Sun, 25 Aug 2024 12:48:20 GMT', 'content-type': 'application/json', 'content-length': '1164', 'connection': 'keep-alive', 'x-amzn-requestid': '9ce42dec-d8b6-4863-a68c-686eb5c1cddc'}, 'RetryAttempts': 0}, 'output': {'message': {'role': 'assistant', 'content': [{'toolUse': {'toolUseId': 'tooluse_LMoAoCR8SS-4lHqVhR3JEA', 'name': 'DynamicGraph', 'input': {'properties': {'nodes': [{'id': 'Marie Curie', 'type': 'Person', 'properties': {'born_year': '1867', 'occupation': 'physicist'}}, {'id': 'Pierre Curie', 'type': 'Person', 'properties': {'occupation': 'physicist'}}, {'id': 'Poland', 'type': 'Country'}, {'id': 'France', 'type': 'Country'}, {'id': 'University of Paris', 'type': 'Organization'}], 'relationships': [{'source_node_id': 'Marie Curie', 'source_node_type': 'Person', 'target_node_id': 'Poland', 'target_node_type': 'Country', 'type': 'NATIONALITY'}, {'source_node

In [7]:
from typing import Dict, List
from langchain_community.graphs.graph_document import GraphDocument, Node, Relationship
from langchain.schema import Document

def create_graph_document(input_data: Dict) -> List[GraphDocument]:
    if 'nodes' in input_data and 'relationships' in input_data:
        nodes_data = input_data['nodes']
        relationships_data = input_data['relationships']
    elif 'properties' in input_data and 'nodes' in input_data['properties'] and 'relationships' in input_data['properties']:
        nodes_data = input_data['properties']['nodes']
        relationships_data = input_data['properties']['relationships']
    else:
        raise ValueError("Invalid input data structure. Expected 'nodes' and 'relationships' keys.")

    nodes = [Node(id=node['id'], type=node['type'], properties=node.get('properties', {})) for node in nodes_data]
    node_dict = {node.id: node for node in nodes}

    relationships = [
        Relationship(
            source=node_dict[rel['source_node_id']],
            target=node_dict[rel['target_node_id']],
            type=rel['type']
        ) for rel in relationships_data
    ]

    source_content = input_data.get('source', '')
    source_doc = Document(page_content=source_content)

    graph_doc = GraphDocument(nodes=nodes, relationships=relationships, source=source_doc)
    return [graph_doc]

In [8]:
output_message = response['output']['message']
stop_reason = response['stopReason']

if stop_reason == 'tool_use':
    tool_requests = response['output']['message']['content']
    for tool_request in tool_requests:
        if 'toolUse' in tool_request:
            tool = tool_request['toolUse']
            logger.info("Requesting tool %s. Request: %s",
                        tool['name'], tool['toolUseId'])

            if tool['name'] == 'DynamicGraph':
                tool_result = {}
                try:
                    graph_documents = create_graph_document(tool['input'])
                    for graph_document in graph_documents:
                        print(f"Nodes: {graph_document.nodes}")
                        print(f"Relationships: {graph_document.relationships}")

                except Exception as err:
                    tool_result = {
                        "toolUseId": tool['toolUseId'],
                        "content": [{"text":  str(err)}],
                        "status": 'error'
                    }
                    print(f"Error: {tool_result}")

INFO:__main__:Requesting tool DynamicGraph. Request: tooluse_LMoAoCR8SS-4lHqVhR3JEA


Nodes: [Node(id='Marie Curie', type='Person', properties={'born_year': '1867', 'occupation': 'physicist'}), Node(id='Pierre Curie', type='Person', properties={'occupation': 'physicist'}), Node(id='Poland', type='Country'), Node(id='France', type='Country'), Node(id='University of Paris', type='Organization')]
Relationships: [Relationship(source=Node(id='Marie Curie', type='Person', properties={'born_year': '1867', 'occupation': 'physicist'}), target=Node(id='Poland', type='Country'), type='NATIONALITY'), Relationship(source=Node(id='Marie Curie', type='Person', properties={'born_year': '1867', 'occupation': 'physicist'}), target=Node(id='France', type='Country'), type='NATIONALITY'), Relationship(source=Node(id='Marie Curie', type='Person', properties={'born_year': '1867', 'occupation': 'physicist'}), target=Node(id='Pierre Curie', type='Person', properties={'occupation': 'physicist'}), type='SPOUSE'), Relationship(source=Node(id='Marie Curie', type='Person', properties={'born_year': '