In [1]:
import os
import mercury as mr
import html2text
import tiktoken
import requests
import openai
import pandas as pd
import numpy as np
from io import StringIO
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from dotenv import load_dotenv

In [2]:
_ = load_dotenv()

In [3]:
openai.api_key = os.getenv("OPENAI_API_KEY")

In [4]:
app = mr.App(title="GPT predicts on tabular data", description="Pass tabular training data to GPT and ask for prediction")

In [5]:
data_file = mr.File(label="Upload CSV with training data", max_file_size="1MB")

mercury.File

In [6]:
if data_file.filepath is None:
    mr.Markdown("Please upload data")
    mr.Stop()

Please upload data

In [None]:

df = pd.read_csv(data_file.filepath)

In [None]:
train, test = train_test_split(df, test_size=0.3)

In [None]:
x_columns = mr.MultiSelect(label="Input features", value=list(df.columns)[:-1], 
                           choices=list(df.columns))

In [None]:
y_column = mr.Select(label="Target", value=list(df.columns)[-1], choices=list(df.columns))

In [None]:
if x_columns.value is None or len(x_columns.value) == 0 or y_column.value is None:
    mr.Markdown("Please select input features and target column")
    mr.Stop()

In [None]:
ask_chat = mr.Button(label="Ask ChatGPT")

In [None]:
if ask_chat.clicked:
    completion = openai.ChatCompletion.create(model="gpt-3.5-turbo", 
                                          temperature=0,
                                          messages=[{
                                                  "role": "user", 
                                                  "content": f"""
                                                  Below is a training CSV table. 
                                                  Input features are columns: {x_columns.value}.
                                                  Target feature is column {y_column.value}."""
                                              },
                                              {
                                                  "role": "user", 
                                                  "content": train.to_csv(index=False)
                                              },
                                              {
                                                  "role": "user", 
                                                  "content": """
                                                  Predict target value for each sample from table below. 
                                                  Write predicted target for each sample in separate row.
                                                  Dont add header. Just predicted targets."""
                                              },
                                              {
                                                  "role": "user", 
                                                  "content": test[x_columns.value].to_csv(index=False)
                                              }
                                          ])
else:
    mr.Stop()

In [None]:
chat_response = completion.choices[0].message.content
print(chat_response)

In [None]:
y_predicted = np.array(chat_response.split("\n")) 

In [None]:
y_predicted.shape

In [None]:
test_target = test[y_column.value]

In [None]:
test_target.shape

In [None]:
shortest = min(y_predicted.shape[0], test_target.shape[0])

In [None]:
acc = np.round(accuracy_score(test_target[:shortest], y_predicted[:shortest]), 4)

In [None]:
print(f"Accuracy: {acc}")

In [None]:
y_predicted

In [None]:
test_target