# Package Installer with Streamlit

Streamlit turns data scripts into shareable web apps in minutes.

We can use LLMs to create data scripts utilising streamlit but there is one issues to this problem.

Often times, LLMs use packages that are not installed and they require manually installing them. This is where package comes in.

Let' say that we want to convert natural language data queries into streamlit data scripts.


In [1]:
from langchain.agents import AgentType, initialize_agent
from langchain.chat_models import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain.prompts import HumanMessagePromptTemplate, SystemMessagePromptTemplate, MessagesPlaceholder
from langchain.chains import LLMChain
from langchain_core.messages import SystemMessage
from dotenv import load_dotenv
import pandas as pd
import io
load_dotenv()

True

In [2]:
chat_template = ChatPromptTemplate.from_messages(
    [
        SystemMessagePromptTemplate.from_template(
            template_format="jinja2",
            template="""Your are a data analyst. Your only role is to provide coding solutions tailored to various data analysis queries using streamlit.io. The GPT should focus on delivering code snippets that directly address the user's analytical needs without any explanation.

The programming language should be Python and framework should be Streamlit. The output should not be in markdown format and only contain Python code without any explanation.

For your convenience, the following libraries are pre-imported in this environment:
```
import streamlit as st
import pandas as pd
import numpy as np
```

The dataset has been already imported:
```python
df = pd.read_csv("path_to_your_csv_file.csv")
```

Dataset Information:{{dataset_info}}"""
        ),
        HumanMessagePromptTemplate.from_template("{query}"),
        MessagesPlaceholder(variable_name="agent_scratchpad")
    ]
)

In [3]:
llm = ChatOpenAI(model="gpt-4", temperature=0)

In [4]:
# chain = LLMChain(llm=llm, prompt=chat_template)

In [5]:
df = pd.read_csv("../california_housing_train.csv")

In [6]:
def dataset_info():
    buf = io.StringIO()
    df.info(buf=buf)
    s = buf.getvalue()
    return s

In [7]:
# params = {"dataset_info": dataset_info(), "query": "How can I plot population against long, latitude ?"}

In [8]:
# ans = chain.run(params)

In [9]:
# print(ans.replace("`", "").replace("python", ""))

In [10]:
import subprocess
import sys
from typing import List, Optional, Type, Union

from langchain_core.callbacks import (
    AsyncCallbackManagerForToolRun,
    CallbackManagerForToolRun,
)
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.tools import BaseTool, Tool

class PackageInstallInput(BaseModel):
    """Arguments for the PackageInstallTool."""

    package_names: Union[str, List[str]] = Field(
        ...,
        description="List of package name(s) to install",
        examples=["pandas", ["pandas", "numpy"]],
    )

class PackageInstallTool(BaseTool):
    """Tool that installs Python packages in runtime."""

    name: str = "package_install"
    args_schema: Type[BaseModel] = PackageInstallInput
    description: str = "Install Python packages during run time."

    def _run(
        self,
        package_names: Union[str, List[str]],
        run_manager: Optional[CallbackManagerForToolRun] = None,
    ) -> bool:
        try:
            if isinstance(package_names, str):
                package_names = [package_names]
            subprocess.check_call(
                [sys.executable, "-m", "pip", "install", *package_names]
            )
            print(f"Packages successfully installed: {', '.join(package_names)}.")
            return True
        except Exception as e:
            print("Error: " + str(e))
            return False

    async def _arun(
        self,
        package_names: List[str],
        run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
    ) -> str:
        raise NotImplementedError(f"{self.name} does not support async")

    def as_tool(self) -> Tool:
        return Tool.from_function(
            func=self._run,
            name=self.name,
            description=self.description,
            args_schema=self.args_schema,
        )

In [11]:
tools = [PackageInstallTool().as_tool()]

In [12]:
tools

[Tool(name='package_install', description='Install Python packages during run time.', args_schema=<class '__main__.PackageInstallInput'>, func=<bound method PackageInstallTool._run of PackageInstallTool()>)]

In [13]:
from langchain_community.tools.convert_to_openai import format_tool_to_openai_function
llm_with_tools = llm.bind(functions=[format_tool_to_openai_function(t) for t in tools])

In [14]:
from langchain.agents.format_scratchpad import format_to_openai_function_messages
from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser

agent = (
    {
        "dataset_info": lambda x: x["dataset_info"],
        "query": lambda x: x["query"],
        "agent_scratchpad": lambda x: format_to_openai_function_messages(
            x["intermediate_steps"]
        ),
    }
    | chat_template
    | llm_with_tools
    | OpenAIFunctionsAgentOutputParser()
)

In [15]:
from langchain.agents import AgentExecutor

agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

In [16]:
params = {"dataset_info": dataset_info(), "query": "How can I plot population against long, latitude ?"}

In [17]:
result = agent_executor.invoke(params)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```python
import matplotlib.pyplot as plt

st.title('Population Distribution')

# Create a scatter plot
fig, ax = plt.subplots()
scatter = ax.scatter(df['longitude'], df['latitude'], c=df['population'], cmap='viridis')

# Add a colorbar
plt.colorbar(scatter)

ax.set_xlabel('Longitude')
ax.set_ylabel('Latitude')
ax.set_title('Population Distribution')

st.pyplot(fig)
```[0m

[1m> Finished chain.[0m


In [18]:
print(result['output'])

```python
import matplotlib.pyplot as plt

st.title('Population Distribution')

# Create a scatter plot
fig, ax = plt.subplots()
scatter = ax.scatter(df['longitude'], df['latitude'], c=df['population'], cmap='viridis')

# Add a colorbar
plt.colorbar(scatter)

ax.set_xlabel('Longitude')
ax.set_ylabel('Latitude')
ax.set_title('Population Distribution')

st.pyplot(fig)
```
