In [1]:
!pip install -r ../requirements.txt



In [2]:
import dotenv
assert dotenv.load_dotenv()

In [3]:
# Import required libraries
import os
from langchain_aws import ChatBedrock

# Set up the model ID for Claude
MODEL_ID3 = "meta.llama3-8b-instruct-v1:0"
MODEL_ID5 = "meta.llama3-70b-instruct-v1:0"
#MODEL_ID = "mistral.mistral-7b-instruct-v0:2"
MODEL_ID4 = "mistral.mixtral-8x7b-instruct-v0:1"
MODEL_ID2 = "anthropic.claude-3-haiku-20240307-v1:0"
MODEL_ID = "anthropic.claude-3-5-sonnet-20240620-v1:0"

HEADERS = {
    "anthropic-beta": "max-tokens-3-5-sonnet-2024-07-15",
    "Content-Type": "application/json"
}

# Initialize the ChatBedrock instance
llm = ChatBedrock(model_id=MODEL_ID, model_kwargs={'temperature': 0, "max_tokens": 8192})
llm2 = ChatBedrock(model_id=MODEL_ID2, model_kwargs={'temperature': 0})
llm3 = ChatBedrock(model_id=MODEL_ID4, model_kwargs={'temperature': 0})
llm4 = ChatBedrock(model_id=MODEL_ID, model_kwargs={'temperature': 0.7})

In [5]:
from langchain_core.messages import (
    BaseMessage,
    HumanMessage,
    ToolMessage,
)
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

from langgraph.graph import END, StateGraph, START


def create_agent(llm, tools, system_message: str):
    """Create an agent."""
    prompt = ChatPromptTemplate.from_messages(
        [
            (
                "system",
                "You are a helpful AI assistant, collaborating with other assistants."
                " Use the provided tools to progress towards answering the question."
                " If you are unable to fully answer, that's OK, another assistant with different tools "
                " will help where you left off. Execute what you can to make progress."
                " If you or any of the other assistants have the final answer or deliverable,"
                " prefix your response with FINAL ANSWER so the team knows to stop."
                " You have access to the following tools: {tool_names}.\n{system_message}",
            ),
            MessagesPlaceholder(variable_name="messages"),
        ]
    )
    prompt = prompt.partial(system_message=system_message)
    prompt = prompt.partial(tool_names=", ".join([tool.name for tool in tools]))
    return prompt | llm.bind_tools(tools)

In [5]:
from typing import Annotated
from langchain_core.tools import tool
from langchain_experimental.utilities import PythonREPL


# Warning: This executes code locally, which can be unsafe when not sandboxed

repl = PythonREPL()


@tool
def python_repl(
    code: Annotated[str, "The python code to execute to generate your chart."],
):
    """Use this to execute python code. If you want to see the output of a value,
    you should print it out with `print(...)`. This is visible to the user."""
    try:
        result = repl.run(code)
    except BaseException as e:
        return f"Failed to execute. Error: {repr(e)}"
    result_str = f"Successfully executed:\n\`\`\`python\n{code}\n\`\`\`\nStdout: {result}"
    return (
        result_str + "\n\nIf you have completed all tasks, respond with FINAL ANSWER."
    )

In [6]:
import operator
from typing import Annotated, Sequence
from typing_extensions import TypedDict

from langchain_openai import ChatOpenAI


# This defines the object that is passed between each node
# in the graph. We will create different nodes for each agent and tool
class AgentState(TypedDict):
    messages: Annotated[Sequence[BaseMessage], operator.add]
    sender: str

In [7]:
import tools.csv_handling as csv_tools
import tools.database as db_tools
import tools.web_crawl as web_tools
import tools.data_viz as viz_tools

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [8]:
import functools

from langchain_core.messages import AIMessage


# Helper function to create a node for a given agent
def agent_node(state, agent, name):
    result = agent.invoke(state)
    # We convert the agent output into a format that is suitable to append to the global state
    if isinstance(result, ToolMessage):
        pass
    else:
        result = AIMessage(**result.dict(exclude={"type", "name"}), name=name)
    return {
        "messages": [result],
        # Since we have a strict workflow, we can
        # track the sender so we know who to pass to next.
        "sender": name,
    }

# # chart_generator
# chart_agent = create_agent(
#     llm,
#     [python_repl],
#     system_message="""
#     You create seaborn charts based on the data you receive from the researcher
#     """,
# )
# chart_node = functools.partial(agent_node, agent=chart_agent, name="chart_generator")

# chart_generator
research_agent = create_agent(
    llm,
    [viz_tools.custom_plot_from_string_to_s3],
    system_message="""
    You are an expert for seaborn code and pride yourself in knowing the code to create the most stunning visuals.
    To get inspiration you query https://seaborn.pydata.org/examples.
    Your goal is to create a beautiful seaborn plot based on the user question.
    You generate data from the user question to provide a fictitious data set for your code.
    ALWAYS store your plot in a variable "fig".
    
    
    ## Examples
    '''
    import seaborn as sns
    sns.set_theme(style="white")

    # Load the example mpg dataset
    mpg = sns.load_dataset("mpg")

    # Plot miles per gallon against horsepower with other semantics
    # Set the theme
    sns.set_theme(style="white")

    # Load the example mpg dataset
    mpg = sns.load_dataset("mpg")

    # Plot miles per gallon against horsepower with other semantics
    fig = sns.relplot(x="horsepower", y="mpg", hue="origin", size="weight",
                      sizes=(40, 400), alpha=.5, palette="muted",
                      height=6, data=mpg)
    '''
    """,
)
research_node = functools.partial(agent_node, agent=research_agent, name="research_agent")

In [17]:
from langgraph.prebuilt import ToolNode

tools = [viz_tools.custom_plot_from_string_to_s3]
tool_node = ToolNode(tools)

In [18]:
# Either agent can decide to end
from typing import Literal


def router(state):
    # This is the router
    messages = state["messages"]
    last_message = messages[-1]
    if last_message.tool_calls:
        # The previous agent is invoking a tool
        return "call_tool"
    if "FINAL ANSWER" in last_message.content:
        # Any agent decided the work is done
        return END
    return "continue"

In [19]:
workflow = StateGraph(AgentState)

# workflow.add_node("chart_generator", chart_node)
workflow.add_node("research_agent", research_node)
workflow.add_node("call_tool", tool_node)
workflow.add_conditional_edges(
    "research_agent",
    router,
    {"continue": "research_agent", "call_tool": "call_tool", END: END},
)
workflow.add_edge(START, "research_agent")
workflow.add_edge("research_agent", END)
graph2 = workflow.compile()

In [20]:
events = graph2.stream(
    {
        "messages": [
            HumanMessage(
                content="Show a plot of the correlation of a countries GDP and its readings skills according to the PIRLS 2021 study"
            )
        ],
    },
    # Maximum number of steps to take in the graph
    {"recursion_limit": 150},
)
for s in events:
    print(s)

{'research_agent': {'messages': [AIMessage(content='', additional_kwargs={'usage': {'prompt_tokens': 1235, 'completion_tokens': 632, 'total_tokens': 1867}, 'stop_reason': 'tool_use', 'model_id': 'anthropic.claude-3-5-sonnet-20240620-v1:0'}, response_metadata={'usage': {'prompt_tokens': 1235, 'completion_tokens': 632, 'total_tokens': 1867}, 'stop_reason': 'tool_use', 'model_id': 'anthropic.claude-3-5-sonnet-20240620-v1:0'}, name='research_agent', id='run-4c178428-337e-4810-be2f-c6b9382598b8-0', tool_calls=[{'name': 'custom_plot_from_string_to_s3', 'args': {'plot_code_string': 'import seaborn as sns\nimport pandas as pd\nimport numpy as np\nimport matplotlib.pyplot as plt\n\n# Set the seed for reproducibility\nnp.random.seed(42)\n\n# Generate fictitious data\nn_countries = 30\ncountries = [f"Country {i}" for i in range(1, n_countries + 1)]\ngdp_per_capita = np.random.uniform(5000, 80000, n_countries)\nreading_score = 400 + 0.001 * gdp_per_capita + np.random.normal(0, 20, n_countries)\n\n

### Create ASCII plot

## Create seaborn chart

In [9]:
import seaborn as sns
import matplotlib.pyplot as plt
import json

def json_string_to_seaborn_plot(json_string):
    """
    Generates a seaborn plot from a JSON string input.

    JSON string Input format:
    {
        "data": {
            "x": [...],  # list of x values
            "y": [...]   # list of y values
        },
        "plot_type": "line"  # You can specify different plot types if needed
    }

    Example usage:
    json_string = '{"data": {"x": [...], "y": [...]}, "plot_type": "line"}'
    """
    # Parse the JSON string into a dictionary
    json_input = json.loads(json_string)

    # Extract data
    x = json_input["data"]["x"]
    y = json_input["data"]["y"]
    
    # Choose the type of plot (extendable for other plot types like scatter, bar, etc.)
    plot_type = json_input.get("plot_type", "line")
    
    # Generate plot based on the type
    plt.figure(figsize=(8, 6))
    
    if plot_type == "line":
        sns.lineplot(x=x, y=y)
    # Add more plot types if needed
    else:
        raise ValueError("Unsupported plot type.")
    
    plt.title('Seaborn Plot from JSON Input')
    plt.xlabel('X axis')
    plt.ylabel('Y axis')
    
    # Show the plot
    plt.show()

# Example usage with JSON string
json_string_input = json.dumps({
    "data": {
        "Factor_1": {"Factor_1": 1.0, "Factor_2": -0.108348, "Factor_3": 0.071725, "Factor_4": -0.076555, "Average_Reading_Score": -0.118946},
        "Factor_2": {"Factor_1": -0.108348, "Factor_2": 1.0, "Factor_3": -0.182089, "Factor_4": 0.188899, "Average_Reading_Score": 0.131942},
        "Factor_3": {"Factor_1": 0.071725, "Factor_2": -0.182089, "Factor_3": 1.0, "Factor_4": -0.020478, "Average_Reading_Score": 0.008728},
        "Factor_4": {"Factor_1": -0.076555, "Factor_2": 0.188899, "Factor_3": -0.020478, "Factor_4": 1.0, "Average_Reading_Score": 0.289095},
        "Average_Reading_Score": {"Factor_1": -0.118946, "Factor_2": 0.131942, "Factor_3": 0.008728, "Factor_4": 0.289095, "Average_Reading_Score": 1.0}
    },
    "plot_type": "heatmap"
})

# Call the function
json_string_to_seaborn_plot(json_string_input)

KeyError: 'x'

### Excel reading

In [13]:
import pandas as pd

In [28]:
# Load the Excel file
xls = pd.ExcelFile("https://pirls2021.org/wp-content/uploads/2022/files/A-1_students-assessed.xlsx")

# Get all sheet names
sheet_names = xls.sheet_names
sheet_names

['Exhibit A.1']

In [None]:
sheet_names = ['Sheet1', 'Sheet2', 2]
dfs = pd.read_excel('file_path.xlsx', sheet_name=sheet_names)
combined_df = pd.concat(dfs, ignore_index=True)

In [29]:
xls = pd.ExcelFile("name")
sheet_names = xls.sheet_names
dfs = pd.read_excel("name", sheet_name=sheet_names)
combined_df = pd.concat(dfs, ignore_index=True)
combined_df.fillna('').to_json(orient="values")

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9
0,Exhibit A.1: Information About the Students As...,,,,,,,,,
1,,,Assessed Fourth Grade Students at the End of t...,,,,,,,
2,,,⋈ Assessed one year later than originally sc...,,,,,,,
3,,,Delayed Assessment of Fourth Grade Cohort at ...,,,,,,,
4,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...
79,,,,,,,,,,
80,,,,,,,,,,
81,,,,,,,,,,
82,,SOURCE: IEA's Progress in International Readi...,,,,,,,,


'[["Exhibit A.1: Information About the Students Assessed in PIRLS 2021","","","","","","","","",""],["","","Assessed Fourth Grade Students at the End of the School Year","","","","","","",""],["","","\\u22c8   Assessed one year later than originally scheduled","","","","","","",""],["",""," Delayed Assessment of Fourth Grade Cohort at the Beginning of Fifth Grade","","","","","","",""],["","","","","","","","","",""],["The PIRLS target population is the grade that represents four years of schooling counting from the first year of ISCED Level 1. In 6 countries, delayed test administration due to COVID-19 occurred a full year later at the end of the fourth year of schooling for the next cohort (\\u22c8). In 14 Northern Hemisphere countries, delayed test administration occurred half a year later at the beginning of the fifth year of schooling (\\u25fc).\\n\\nIEA has a policy that students do not fall under the minimum average age of 9.5 years old at the time of testing, so England, Malta,