# Package Installer with Streamlit

Streamlit turns data scripts into shareable web apps in minutes.

We can use LLMs to create data scripts utilising streamlit but there is one issues to this problem.

Often times, LLMs use packages that are not installed and they require manually installing them. This is where package comes in.

Let' say that we want to convert natural language data queries into streamlit data scripts.


In [34]:
from langchain.agents import AgentType, initialize_agent
from langchain.chat_models import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain.prompts import HumanMessagePromptTemplate, SystemMessagePromptTemplate, MessagesPlaceholder
from langchain.chains import LLMChain
from langchain_core.messages import SystemMessage
from dotenv import load_dotenv
import pandas as pd
import io

load_dotenv()

True

In [128]:
# prompt = ChatPromptTemplate.from_messages(
#     [
#         SystemMessagePromptTemplate.from_template(
#             template_format="jinja2",
#             template="""Your are a data analyst. Your only role is to provide code snippets in Python tailored to various data analysis queries using streamlit.io. Do not explain anything and provide only code as output. You will also invoke the provided tools to install packages imported in the code.
#
# The following libraries are pre-imported and installed in this environment:
# ```
# import streamlit as st
# import pandas as pd
# import numpy as np
# ```
#
# The dataset has been already imported:
# ```python
# df = pd.read_csv("path_to_your_csv_file.csv")
# ```
#
# Dataset Information:{{dataset_info}}"""
#         ),
#         HumanMessagePromptTemplate.from_template("{query}"),
#         MessagesPlaceholder(variable_name="agent_scratchpad")
#     ]
# )

In [134]:
import subprocess
import sys
from typing import List, Optional, Type, Union

from langchain_core.callbacks import (
    AsyncCallbackManagerForToolRun,
    CallbackManagerForToolRun,
)
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.tools import BaseTool, Tool


class PackageInstallInput(BaseModel):
    """Arguments for the PackageInstallTool."""

    package_names: Union[str, List[str]] = Field(
        ...,
        description="List of package name(s) to install",
        examples=["matplotlib", ["pandas", "numpy"]],
    )


class PackageInstallTool(BaseTool):
    """Tool that installs Python packages in runtime."""

    name: str = "package_install"
    args_schema: Type[BaseModel] = PackageInstallInput
    description: str = "Install Python packages dynamically during runtime as they are imported"

    def _run(
            self,
            package_names: Union[str, List[str]],
            run_manager: Optional[CallbackManagerForToolRun] = None,
    ) -> bool:
        try:
            if isinstance(package_names, str):
                package_names = [package_names]
            subprocess.check_call(
                [sys.executable, "-m", "pip", "install", *package_names]
            )
            print(f"Packages successfully installed: {', '.join(package_names)}.")
            return True
        except Exception as e:
            print("Error: " + str(e))
            return False

    async def _arun(
            self,
            package_names: List[str],
            run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
    ) -> str:
        raise NotImplementedError(f"{self.name} does not support async")

    def as_tool(self) -> Tool:
        return Tool.from_function(
            func=self._run,
            name=self.name,
            description=self.description,
            args_schema=self.args_schema,
        )

In [158]:
prompt = ChatPromptTemplate.from_messages(
    [
        SystemMessagePromptTemplate.from_template(
            template_format="jinja2",
            template="""Your are a data analyst. Your role is to provide code snippets in Python tailored to various data analysis queries using streamlit.io, and install any imported packages using the provided tools.

Dataset Information:{{dataset_info}}"""
        ),
        HumanMessagePromptTemplate.from_template("{query}"),
        MessagesPlaceholder(variable_name="agent_scratchpad")
    ]
)

In [159]:
llm = ChatOpenAI(model="gpt-3.5-turbo-1106", temperature=0)

In [160]:
df = pd.read_csv("../california_housing_train.csv")

In [161]:
def dataset_info():
    buf = io.StringIO()
    df.info(buf=buf)
    s = buf.getvalue()
    return s

In [162]:
tools = [PackageInstallTool().as_tool()]

In [170]:
from langchain.agents import AgentExecutor, create_openai_functions_agent
agent = create_openai_functions_agent(llm, tools, prompt)

In [171]:
# from langchain_community.tools.convert_to_openai import format_tool_to_openai_function
# llm_with_tools = llm.bind(functions=[format_tool_to_openai_function(t) for t in tools])

In [172]:
# def function(x):
#     print(x)
#     return format_to_openai_function_messages(x["intermediate_steps"])

In [173]:
# from langchain.agents.format_scratchpad import format_to_openai_function_messages
# from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser
#
# agent = (
#         {
#             "dataset_info": lambda x: x["dataset_info"],
#             "query": lambda x: x["query"],
#             "agent_scratchpad": lambda x: function(x),
#         }
#         | prompt
#         | llm_with_tools
#         | OpenAIFunctionsAgentOutputParser()
# )

In [174]:
from langchain.agents import AgentExecutor

agent_executor = AgentExecutor(
    agent=agent,
    tools=tools,
    verbose=True,
    return_intermediate_steps=True
)

In [175]:
params = {
    "dataset_info": dataset_info(),
    "query": "How can I plot population against long, latitude ?",
    # "agent_scratchpad": "Generate  the code. Then use Package Installer to install any packages imported in the code."
}

In [176]:
result = agent_executor.invoke(params)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `package_install` with `{'package_names': 'matplotlib'}`


Packages successfully installed: matplotlib.
[36;1m[1;3mTrue[0m[32;1m[1;3mYou can use the following code snippet to plot population against longitude and latitude using streamlit.io:

```python
import streamlit as st
import matplotlib.pyplot as plt
import pandas as pd

# Load the dataset
data = pd.read_csv('path_to_your_dataset.csv')

# Create a scatter plot
fig, ax = plt.subplots()
scatter = ax.scatter(data['longitude'], data['latitude'], s=data['population']/100, alpha=0.5)

# Customize the plot
ax.set_xlabel('Longitude')
ax.set_ylabel('Latitude')
ax.set_title('Population Distribution')

# Show the plot using Streamlit
st.pyplot(fig)
```[0m

[1m> Finished chain.[0m


In [177]:
print(result['output'])

You can use the following code snippet to plot population against longitude and latitude using streamlit.io:

```python
import streamlit as st
import matplotlib.pyplot as plt
import pandas as pd

# Load the dataset
data = pd.read_csv('path_to_your_dataset.csv')

# Create a scatter plot
fig, ax = plt.subplots()
scatter = ax.scatter(data['longitude'], data['latitude'], s=data['population']/100, alpha=0.5)

# Customize the plot
ax.set_xlabel('Longitude')
ax.set_ylabel('Latitude')
ax.set_title('Population Distribution')

# Show the plot using Streamlit
st.pyplot(fig)
```


In [70]:
result

{'dataset_info': "<class 'pandas.core.frame.DataFrame'>\nRangeIndex: 17000 entries, 0 to 16999\nData columns (total 9 columns):\n #   Column              Non-Null Count  Dtype  \n---  ------              --------------  -----  \n 0   longitude           17000 non-null  float64\n 1   latitude            17000 non-null  float64\n 2   housing_median_age  17000 non-null  float64\n 3   total_rooms         17000 non-null  float64\n 4   total_bedrooms      17000 non-null  float64\n 5   population          17000 non-null  float64\n 6   households          17000 non-null  float64\n 7   median_income       17000 non-null  float64\n 8   median_house_value  17000 non-null  float64\ndtypes: float64(9)\nmemory usage: 1.2 MB\n",
 'query': 'How can I plot population against long, latitude ?',
 'output': "```python\nimport matplotlib.pyplot as plt\n\nst.map(df[['latitude', 'longitude', 'population']])\n\nplt.figure(figsize=(10,6))\nplt.scatter(df['longitude'], df['latitude'], c=df['population'], cmap='