In [33]:
import argilla as rg
import json
import os
import time
from datasets import load_dataset, load_from_disk, Dataset, load_metric, concatenate_datasets
import pandas as pd
import openai
from datetime import datetime
import pickle
openai.api_key = os.environ["OPENAI_API_KEY"]

In [26]:
def complete_chatgpt(messages, model="gpt-3.5-turbo", temperature=0, max_tokens=1024, sleep_seconds=1.2):
    try:
        response = openai.ChatCompletion.create(
            model=model,
            messages=messages,
            temperature=temperature,
            max_tokens=max_tokens,
            top_p=1,
            frequency_penalty=0,
            presence_penalty=0,
            stop=None,
        )

        if sleep_seconds > 0:
            time.sleep(sleep_seconds)

        return response["choices"][0]["message"]["content"].strip()

    except Exception as e:
        print(e)
        return ""

"""
possible roles: system, assistant, user
"""
def create_chatgpt_message(role: str, content: str):
     return {"role": role, "content": content}

In [17]:
system_message = create_chatgpt_message("system", "You are a financial advisor AI that aims to provide support for personal finance-related issues.")
def wrap_finadv_submission_call(text):
    user_message = create_chatgpt_message("user", text)
    messages = [system_message, user_message]

    return complete_chatgpt(messages, model="gpt-4", temperature=0.7)

In [9]:
# messages = [create_chatgpt_message("user", "what is the square root of 33?")]
# complete_chatgpt(messages, model="gpt-4")

In [11]:
df = pd.read_csv("./outdata/submissions.csv")
assert(len(df["id"].unique()) == len(df))
display(df.sample())
len(df)

Unnamed: 0,id,title,created_utc,edited,selftext
78792,s2cap2,Need Feedback on my First Condo Financing,1642009819,-1.0,I recently had an offer accepted for a Condo i...


84944

In [12]:
df.head()

Unnamed: 0,id,title,created_utc,edited,selftext
0,1006ury,Are financial advisors mainly for people who a...,1672530374,-1.0,I grew up as an only child with my dad working...
1,1005xmw,Looking for Thoughts from Strangers,1672527497,-1.0,"Couple w/ Gross Income ~$127,000\n\nTake Home ..."
2,1005hzw,Budgeting buying a vehicle for the first time,1672526170,-1.0,I'm in the military and am moving to Californi...
3,1005gb0,Questions about T-bill auction day,1672526027,-1.0,"&amp;#x200B;\n\n \nFor t-bill 912796YN3, anno..."
4,1005azk,SLS Mortgage Pulled funds on the 30th of this ...,1672525594,-1.0,I woke up yesterday surprised that *Specialize...


In [18]:
# DO NOT OVERWRITE THIS ON A RUNNING KERNEL TO AVOID DELETING CHATGPT ANSWERS THAT HAVENT BEEN SAVED TO DISK YET
id_answer_map = {}

In [24]:
def retrieve_chatgpt_answers_to_submissions(limit=10):
    limit_counter = 0

    for idx, row in df.iterrows():
        if limit_counter >= limit:
            print(f"limit of {limit} reached")
            break

        id = row["id"]
        if id in id_answer_map:
            continue

        print(f"[{id}]🏃‍♂️ retrieving answer...")
        question_text = row["selftext"]
        answer_text = wrap_finadv_submission_call(question_text)
        if answer_text == "":
            print("chatgpt seems to have errored out, aborting")
            return

        print(f"[{id}]✅ retrieved answer")
        id_answer_map[id] = answer_text
        
        limit_counter += 1

In [25]:
retrieve_chatgpt_answers_to_submissions(2)

[1006ury]🏃‍♂️ retrieving answer...
SXYKD
[1006ury]✅ retrieved answer
[1005xmw]🏃‍♂️ retrieving answer...
SXYKD
[1005xmw]✅ retrieved answer
limit of 2 reached


In [38]:
retrieve_chatgpt_answers_to_submissions(40)

[1004q1u]🏃‍♂️ retrieving answer...
[1004q1u]✅ retrieved answer
[1004k51]🏃‍♂️ retrieving answer...
[1004k51]✅ retrieved answer
[1004isk]🏃‍♂️ retrieving answer...
[1004isk]✅ retrieved answer
[1004djv]🏃‍♂️ retrieving answer...
[1004djv]✅ retrieved answer
[1004cue]🏃‍♂️ retrieving answer...
[1004cue]✅ retrieved answer
[10049g4]🏃‍♂️ retrieving answer...
[10049g4]✅ retrieved answer
[10048b4]🏃‍♂️ retrieving answer...
[10048b4]✅ retrieved answer
[10043f4]🏃‍♂️ retrieving answer...
[10043f4]✅ retrieved answer
[1003u24]🏃‍♂️ retrieving answer...
[1003u24]✅ retrieved answer
[1003rme]🏃‍♂️ retrieving answer...
[1003rme]✅ retrieved answer
[1003on7]🏃‍♂️ retrieving answer...
[1003on7]✅ retrieved answer
[1003o86]🏃‍♂️ retrieving answer...
[1003o86]✅ retrieved answer
[1003c7h]🏃‍♂️ retrieving answer...
[1003c7h]✅ retrieved answer
[1003955]🏃‍♂️ retrieving answer...
[1003955]✅ retrieved answer
[1002pwp]🏃‍♂️ retrieving answer...
[1002pwp]✅ retrieved answer
[1002h3v]🏃‍♂️ retrieving answer...
[1002h3v]✅ retrieved

In [40]:
len(id_answer_map)

50

In [45]:
new_records = []
for idx, row in df.iterrows():
    id = row["id"]
    if not id in id_answer_map:
        break

    question_text = row["selftext"]
    answer_text = id_answer_map[id]

    text = f"### Human\n{question_text}\n\n### Assistant\n{answer_text}"

    new_records.append({"id": id, "question": question_text, "answer": answer_text, "text": text})

qa_df = pd.DataFrame(new_records)
qa_df.to_csv("./outdata/finadv50.csv", index=False)

In [46]:
qa_df.sample()

Unnamed: 0,id,question,answer,text
38,1001sp3,401k wellness check\n\nI’m sure this has been ...,"Based on the information you've provided, here...",### Human\n401k wellness check\n\nI’m sure thi...


In [47]:
ds = Dataset.from_pandas(qa_df)
ds.push_to_hub("nihiluis/finadv50-001", private=True)

Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 162.73ba/s]
Pushing dataset shards to the dataset hub: 100%|██████████| 1/1 [00:01<00:00,  1.92s/it]


In [39]:
now = datetime.now()
now_str = now.strftime("%m-%d-%Y_%Hh%Mm%Ss")

with open(f"./outdata/id_answer_map-{now_str}.pickle", "wb") as handle:
    pickle.dump(id_answer_map, handle, protocol=pickle.HIGHEST_PROTOCOL)


In [41]:
with open("./outdata/id_answer_map-08-22-2023_12h02m53s.pickle", "rb") as handle:
    test_map = pickle.load(handle)
    print(len(test_map))
    #display(test_map)

50


{'1006ury': "Your approach to money management and investing has clearly worked well for you and your family. As an AI financial advisor, I can tell you that a financial advisor isn't necessary for everyone. \n\nFinancial advisors can provide a range of services, from investment advice to comprehensive financial planning. Individuals with straightforward financial situations, like yours, may not need the full range of services that a financial advisor provides. \n\nHowever, there are some situations where a financial advisor could be useful:\n\n1. **Complex Financial Planning:** If you have a complex financial situation, such as owning multiple businesses or properties, having a large family with different financial needs, or planning for a special-needs family member, a financial advisor could be beneficial.\n\n2. **Tax Planning:** Advisors can provide advice on minimizing tax liability, especially when it comes to estate planning and inheritance tax.\n\n3. **Retirement Planning:** If