In [34]:
import openai
import panel as pn
from sklearn import datasets
import numpy as np
import pandas as pd
import os
import json
import matplotlib.pyplot as plt
from matplotlib.figure import Figure
import seaborn as sns

In [13]:
openai.api_key  = os.environ.get("OPENAI_KEY")

In [14]:
iris = datasets.load_iris()

In [15]:
type_map = {k: v for k, v in enumerate(iris['target_names'])}

In [16]:
df = pd.DataFrame(data= np.c_[iris['data'], iris['target']],
                     columns= iris['feature_names'] + ['target'])
df['target'] = df['target'].map(type_map)

In [17]:
numeric_vars = list(df.dtypes[df.dtypes == float].index)
other_vars = list(df.dtypes[df.dtypes != float].index)

In [18]:
cat_levels = {}
for var in other_vars:
    cat_levels[var] = list(df[var].unique())

In [19]:
def get_completion_from_messages(messages, model="gpt-3.5-turbo", temperature=0):
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=temperature, # this is the degree of randomness of the model's output
    )

    return response.choices[0].message["content"]

In [86]:
def collect_messages(_):
    prompt = inp.value_input
    inp.value = ''
    context.append({'role':'user', 'content':f"{prompt}"})
    response = get_completion_from_messages(context) 
    context.append({'role':'assistant', 'content':f"{response}"})
    panels.append(
        pn.Row('User:', pn.pane.Markdown(prompt, width=600))
    )
    try:
        exec(json.loads(context[-1]['content'])['codegen'])
        panels.append(
            pn.Row('Assistant:', pn.pane.PNG(json.loads(context[-1]["content"])["filename"]))
        )
    except Exception as e:
        panels.append(
            pn.Row('Assistant:', pn.pane.Markdown(str(e), width=600, style={'background-color': '#F6F6F6'}))
        )
 
    return pn.Column(*panels)

In [87]:
content = (
    "You are Exploratory Data Analysis Bot, an automated service that solely generates Python code so that users "
    "can visually explore their dataset interactively. You are blunt and only know how to communicate using JSON and \
    cannot use normal prose. You are a developer assistant where you only provide the code for a question. No explanation required."
    "You have access to a pandas DataFrame loaded into memory as ```df``` and a limited "
    "set of Python data science tools. You can use pandas, matplotlib, and seaborn. You can assume "
    "that pandas is already imported as pd and matplotlib is already imported as plt and "
    "seaborn is already imported as sns. "
    "This is a strict requirement. Here is what we know about the dataset. "
    f"There are {len(df.columns)} variables. The following array of variables are numeric "
    f"```{numeric_vars}```. Next is a JSON object that contains all "
    "categorical variables as keys and the valid categories of each categorical variable as values "
    f"{cat_levels}. That is it! "
    "The output must be a JSON object with ```codegen``` as a key and Python code that generates a visualization "
    "as the corresponding value. Make sure that the code exports the chart as a PNG with a UUID. The second key"
    "in the JSON should be ```filename``` and contain the name of the exported PNG as the value."
    "Do not output any text other than the JSON object. The user only wants the JSON and no additional commentary. "
    "If for any reason the request cannot be handled giving the constraints or more information is needed, "
    "return the following JSON object ```{\"error\": true}```. "
    
    "Some examples: "
    
    f"- generate a scatterplot of  {numeric_vars[0]} versus {numeric_vars[1]}: "
    f"{{\"codegen\": \"p = sns.scatterplot(data=df, x='{numeric_vars[0]}', y='{numeric_vars[1]}') \n "
    "fig = p.get_figure() \n fig.savefig('85994439-444c-43ed-b9fd-cfc05a356eac.png')\", "
    "\"filename\": \"85994439-444c-43ed-b9fd-cfc05a356eac.png\"}} "
    
    "- plot the values of the target varible: "
    "{\"codegen\": \"p = sns.countplot(data=df, x='target') \n fig = p.get_figure() \n "
    "fig.savefig('11606cf3-360e-4f4e-9417-504b561e7a02.png'),"
     "\"filename\": \"11606cf3-360e-4f4e-9417-504b561e7a02.png\"} "
    
    "- what is the square root of 16?: {\"error\": true} "
    
    "- hello: {\"error\": true} "
    
    "- bgdbgbbgn d: {\"error\": true} "
    
    "- help!: {\"codegen\": true} "
    
    "Make sure that the output is valid JSON before responding."
)

In [88]:
content

'You are Exploratory Data Analysis Bot, an automated service that solely generates Python code so that users can visually explore their dataset interactively. You are blunt and only know how to communicate using JSON and     cannot use normal prose. You are a developer assistant where you only provide the code for a question. No explanation required.You have access to a pandas DataFrame loaded into memory as ```df``` and a limited set of Python data science tools. You can use pandas, matplotlib, and seaborn. You can assume that pandas is already imported as pd and matplotlib is already imported as plt and seaborn is already imported as sns. This is a strict requirement. Here is what we know about the dataset. There are 5 variables. The following array of variables are numeric ```[\'sepal length (cm)\', \'sepal width (cm)\', \'petal length (cm)\', \'petal width (cm)\']```. Next is a JSON object that contains all categorical variables as keys and the valid categories of each categorical 

In [89]:
pn.extension()

panels = [] # collect display 

context = [ {'role':'system', 'content':content} ]

inp = pn.widgets.TextInput(value="Hi", placeholder='Enter text here…')
button_conversation = pn.widgets.Button(name='Chat!')

interactive_conversation = pn.bind(collect_messages, button_conversation)

dashboard = pn.Column(
    inp,
    pn.Row(button_conversation),
    pn.panel(interactive_conversation, loading_indicator=True, height=300),
)

dashboard

In [None]:
fig