# Chat with a CSV file

This application allows you to upload a `.csv` file and ask questions.

Sample questions:

- What is the max value of the column {{column}}
- Count the NAs per column
- Plot a histogram/boxplot of {{column}}

Built with OpenAI, [JupySQL](https://github.com/ploomber/jupysql), [DuckDB](https://github.com/duckdb/duckdb), and [Voilà](https://github.com/voila-dashboards/voila)

## 1. Upload `.csv` file

In [1]:
DEBUG = True

In [2]:
%load_ext sql
%config SqlMagic.feedback = 0
%sql duckdb://

In [3]:
import uuid
from functools import partial

import openai
import ipywidgets as widgets
from IPython.display import display, clear_output
import requests
from sql.plot import boxplot, histogram
import matplotlib.pyplot as plt


def chat_with_gpt3(prompt):
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": INITIAL_PROMPT},
            {"role": "user", "content": "Show me the first 5 rows"},
            {"role": "assistant", "content": "SELECT * FROM my_data LIMIT 5"},
            {"role": "user", "content": prompt}
        ]
    )
    return response['choices'][0]['message']['content']

def gen_name():
    return str(uuid.uuid4())[:8] + '.csv'

In [4]:
def handle_api_key_input(_):
    """Function to allow users to set OpenAI key"""
    api_input = api_key_input.value
    api_key_input.value = ''

    openai.api_key = api_input
    submit_button.disabled=False
    user_input.disabled=False
    api_key_info.value = 'API key saved successfully. Now you are ready to chat!'

In [5]:
# text input
api_key_input = widgets.Text(placeholder='Enter your OpenAI API Key...')
api_key_input.on_submit(handle_api_key_input)

# submit button
save_api_button = widgets.Button(description='Save')
save_api_button.on_click(handle_api_key_input)

  api_key_input.on_submit(handle_api_key_input)


In [6]:
INITIAL_PROMPT = None

def set_initial_prompt(name):
    %sql drop table if exists my_data
    %sql create table my_data as (select * from '{{name}}')
    cols = %sqlcmd columns -t my_data
    global INITIAL_PROMPT
    INITIAL_PROMPT = f"""
This is the schema for the my_data table:

{cols}

I'll start prompting you and I want you to return SQL code.

If you're asked to plot a histogram, you can return: %sqlplot histogram NAME
If you're asked to plot a boxplot, you can return: %sqlplot boxplot NAME

And replace NAME with the column name, do not include the table  name
"""
    if not openai.api_key:
        display(api_key_input)
        display(save_api_button)
    preview_data()

In [7]:
# Create an upload button widget
upload_button = widgets.FileUpload(
    accept='.csv',  # Specify accepted file type(s)
    multiple=False  # Allow only a single file to be uploaded
)
display(upload_button)

# Create an output widget to display the status of the uploaded file
upload_status_output = widgets.Output()

# Define a function to handle file upload
def handle_file_upload(change):
    name = gen_name()

    with upload_status_output:
        clear_output()
        uploaded_files = upload_button.value

        if uploaded_files:
            uploaded_file = uploaded_files[0]

            with open(name, 'wb') as f:
                f.write(uploaded_file['content'])
            
            set_initial_prompt(name)
            
            print(f'File has been uploaded and saved locally.')
        else:
            print('Please upload a CSV file.')

# Attach the event handler to the upload button's change event
upload_button.observe(handle_file_upload, names='value')

FileUpload(value=(), accept='.csv', description='Upload')

In [8]:
# Create a button widget
fetch_button = widgets.Button(description="Use sample data")
display(fetch_button)

display(upload_status_output)

# Define a function to fetch and store the data
def download_sample(_):
    name = gen_name()
    with upload_status_output:
        url = "https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv"
        response = requests.get(url)
    
        if response.status_code == 200:
            with open(name, "wb") as f:
                f.write(response.content)
            print("Data has been fetched and stored as data.csv.")
            set_initial_prompt(name)
        else:
            print("Failed to fetch the data. Check the URL and try again.")

# Attach the function to the button's click event
fetch_button.on_click(download_sample)

Button(description='Use sample data', style=ButtonStyle())

Output()

## 2. Ask questions

In [9]:
import ipywidgets as widgets
from IPython.display import display, clear_output

def preview_data():
    output = %sql select * from my_data limit 5

    with chat_output:
        print("These are the first 5 rows:")
        display(output)  

def handle_submit_button(_):
    if INITIAL_PROMPT is None:
        with chat_output:
            print("You must upload data first")
        return
    
    user_message = user_input.value
    user_input.value = ''
    
    with chat_output:
        print(f'You: {user_message}')
    
    command = chat_with_gpt3(user_message)
    print(command)
    

    if command.startswith("%sqlplot"):
        _, name, column = command.split(" ")
        
        fn_map = {"histogram": partial(histogram, bins=50),
                  "boxplot": boxplot}
        fn = fn_map[name]

        with chat_output:
            plt.show(fn("my_data", column))
    else:
        sql_output = %sql {{command}}

        with chat_output:
            display(sql_output)

# Define a function to handle user input when Enter is pressed
def handle_enter_key(event):
    if event['type'] == 'keydown' and event['key'] == 'Enter':
        handle_submit_button(None)  # Call the submit function when Enter is pressed

In [10]:
chat_disabled=False
if not openai.api_key:
    chat_disabled=True
    
# chat output
chat_output = widgets.Output()
display(chat_output)

# text input
user_input = widgets.Text(placeholder='Type your message...', disabled=chat_disabled)
user_input.on_submit(handle_submit_button)
display(user_input)

# submit button
submit_button = widgets.Button(description='Submit', disabled=chat_disabled)
submit_button.on_click(handle_submit_button)
display(submit_button)

Output()

  user_input.on_submit(handle_submit_button)


Text(value='', placeholder='Type your message...')

Button(description='Submit', style=ButtonStyle())

##### Powered by [Ploomber Cloud](https://www.platform.ploomber.io)

In [11]:
api_key_info = widgets.Textarea(
    value="No OpenAI API key was found.\nEnter your OpenAI API key when prompted while uploading data or using sample data.\n\nRefer the documentation for setting environment variable when deploying: https://docs.cloud.ploomber.io/en/latest/user-guide/env-vars.html.",
    placeholder='',
    disabled=True,
    layout={'height': '110px', 'width': '650px'}
    )
if not openai.api_key:
    display(api_key_info)