In [None]:
"""
Conduct Data Analysis using OpenAI + Streamlit
"""

In [8]:
from langchain.agents import AgentType, initialize_agent
from langchain.chat_models import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain.prompts import HumanMessagePromptTemplate, SystemMessagePromptTemplate
from langchain.chains import LLMChain
from langchain_core.messages import SystemMessage
from dotenv import load_dotenv

load_dotenv()

True

In [113]:
chat_template = ChatPromptTemplate.from_messages(
    [
        SystemMessagePromptTemplate.from_template(
            template_format="jinja2",
            template="""Your are a data analyst. Your only role is to provide coding solutions tailored to various data analysis queries using streamlit.io. The GPT should focus on delivering code snippets that directly address the user's analytical needs without any explanation.

The programming language should be Python and framework should be Streamlit. The output should not be in markdown format and only contain Python code.

For your convenience, the following libraries are pre-imported in this environment:
```
import streamlit as st
import pandas as pd
import numpy as np
```

The dataset has been already imported:
```python
df = pd.read_csv("path_to_your_csv_file.csv")
```

Dataset Information:{{dataset_info}}"""
        ),
        HumanMessagePromptTemplate.from_template("{query}"),
    ]
)

In [114]:
import pandas as pd
import numpy as np
# import matplotlib.pyplot as plt

In [115]:
llm = ChatOpenAI(model="gpt-4", temperature=0)

In [116]:
df = pd.read_csv("../california_housing_train.csv")

In [118]:
chain = LLMChain(llm=llm, prompt=chat_template)

In [119]:
def dataset_info():
    import io
    buf = io.StringIO()
    df.info(buf=buf)
    s = buf.getvalue()
    return s

In [125]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17000 entries, 0 to 16999
Data columns (total 9 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   longitude           17000 non-null  float64
 1   latitude            17000 non-null  float64
 2   housing_median_age  17000 non-null  float64
 3   total_rooms         17000 non-null  float64
 4   total_bedrooms      17000 non-null  float64
 5   population          17000 non-null  float64
 6   households          17000 non-null  float64
 7   median_income       17000 non-null  float64
 8   median_house_value  17000 non-null  float64
dtypes: float64(9)
memory usage: 1.2 MB


In [120]:
import subprocess
import sys
def install_package(package_name: str = "pandas"):
    subprocess.check_call([sys.executable, "-m", "pip", "install", package_name])

In [121]:
# Example of installing the 'requests' package
install_package('scikit-learn')



In [122]:
params = {"dataset_info": dataset_info(), "query": "How can I plot population against long, latitude ?"}

In [123]:
ans = chain.run(params)

In [124]:
print(ans.replace("`", "").replace("python", ""))


import matplotlib.pyplot as plt

st.title('Population Distribution')

# Create a scatter plot
fig, ax = plt.subplots()
scatter = ax.scatter(df['longitude'], df['latitude'], c=df['population'], cmap='viridis')

# Add a colorbar
plt.colorbar(scatter)

# Set the title and labels
plt.title('Population Distribution')
plt.xlabel('Longitude')
plt.ylabel('Latitude')

# Show the plot
st.pyplot(fig)



In [None]:
"""
To utilize a package that isn't already imported in your script, you can easily install it using the `install_package` function. Replace `"your_package_name_here"` with the name of the package you wish to use.

```python
install_package(package_name: str = "your_package_name_here")
```
"""

## Detect the packages needed to be installed, and install them

In [None]:
"""
To utilize a package that isn't already imported in your script, you can easily install it using the `install_package` function. Replace `"your_package_name_here"` with the name of the package you wish to use.

```python
install_package(package_name: str = "your_package_name_here")
```
"""

In [None]:
# LLMSDAChain