In [None]:
"""
Conduct Data Analysis using OpenAI + Streamlit
"""

In [8]:
from langchain.agents import AgentType, initialize_agent
from langchain.chat_models import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain.prompts import HumanMessagePromptTemplate, SystemMessagePromptTemplate
from langchain.chains import LLMChain
from langchain_core.messages import SystemMessage
from dotenv import load_dotenv

load_dotenv()

True

In [27]:
chat_template = ChatPromptTemplate.from_messages(
    [
        SystemMessagePromptTemplate.from_template(
            template_format="jinja2",
            template="""Your are a data analyst. Your only role is to provide coding solutions tailored to various data analysis queries using streamlit.io. The GPT should focus on delivering code snippets that directly address the user's analytical needs without any explanation.

The programming language should be Python and framework should be Streamlit. The output should not be in markdown format and only contain Python code. Consider that streamlit and pandas have been imported. Any other libraries should be installed first.

Dataset Information:{{dataset_info}}"""
        ),
        HumanMessagePromptTemplate.from_template("{query}"),
    ]
)

In [28]:
import pandas as pd

In [29]:
llm = ChatOpenAI(model="gpt-4", temperature=0)

In [30]:
df = pd.read_csv("../california_housing_train.csv")

In [40]:
"""
- Conditional - Does it need code + text, only code, only text ?
- Pass Dataset Info
- Ask for a query
- Get code, install all packages that are needed, or use the tools provided to install packages
- Get the relevant streamlit docs context to the model.
- Print the code using Streamlit.
"""

'\n- Conditional - Does it need code + text, only code, only text ?\n- Pass Dataset Info\n- Ask for a query\n- Get code, install all packages that are needed, or use the tools provided to install packages\n- Get the relevant streamlit docs context to the model.\n- Print the code using Streamlit.\n\n'

In [32]:
chain = LLMChain(llm=llm, prompt=chat_template)

In [33]:
def dataset_info():
    import io
    buf = io.StringIO()
    df.info(buf=buf)
    s = buf.getvalue()
    return s

In [42]:
import subprocess
import sys
def install_package(package_name):
    subprocess.check_call([sys.executable, "-m", "pip", "install", package_name])

In [43]:
# Example of installing the 'requests' package
install_package('scikit-learn')

Collecting scikit-learn
  Using cached scikit_learn-1.3.2-cp310-cp310-macosx_12_0_arm64.whl.metadata (11 kB)
Collecting scipy>=1.5.0 (from scikit-learn)
  Using cached scipy-1.11.4-cp310-cp310-macosx_12_0_arm64.whl.metadata (112 kB)
Collecting joblib>=1.1.1 (from scikit-learn)
  Using cached joblib-1.3.2-py3-none-any.whl.metadata (5.4 kB)
Collecting threadpoolctl>=2.0.0 (from scikit-learn)
  Using cached threadpoolctl-3.2.0-py3-none-any.whl.metadata (10.0 kB)
Using cached scikit_learn-1.3.2-cp310-cp310-macosx_12_0_arm64.whl (9.5 MB)
Using cached joblib-1.3.2-py3-none-any.whl (302 kB)
Using cached scipy-1.11.4-cp310-cp310-macosx_12_0_arm64.whl (29.8 MB)
Using cached threadpoolctl-3.2.0-py3-none-any.whl (15 kB)
Installing collected packages: threadpoolctl, scipy, joblib, scikit-learn
Successfully installed joblib-1.3.2 scikit-learn-1.3.2 scipy-1.11.4 threadpoolctl-3.2.0


In [37]:
params = {"dataset_info": dataset_info(), "query": "How can I plot population against long, latitude ?"}

In [38]:
ans = chain.run(params)

In [39]:
print(ans.replace("`", "").replace("python", ""))


import streamlit as st
import pandas as pd
import pydeck as pdk

# Assuming that 'df' is your DataFrame
# and 'longitude', 'latitude' and 'population' are columns in 'df'

st.pydeck_chart(pdk.Deck(
    map_style='mapbox://styles/mapbox/light-v9',
    initial_view_state=pdk.ViewState(
        latitude=df['latitude'].mean(),
        longitude=df['longitude'].mean(),
        zoom=11,
        pitch=50,
    ),
    layers=[
        pdk.Layer(
            'HexagonLayer',
            data=df,
            get_position='[longitude, latitude]',
            auto_highlight=True,
            elevation_scale=50,
            pickable=True,
            elevation_range=[0, 3000],
            extruded=True,         
            get_fill_color='[200, 30, 0, 160]',
            get_radius="population",
        ),
    ],
))

