In [1]:
def set_jupyter_widescreen():
    from IPython.display import display, HTML
    
    display(HTML(data="""
    <style>
        div#notebook-container    {width: 95%; }
        div#menubar-container     {width: 65%; }
        div#maintoolbar-container {width: 99%; }
    </style>
    """))
set_jupyter_widescreen()

In [2]:
import os
import re
import time
import json
import openai
from dotenv import load_dotenv, find_dotenv
import pandas as pd
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.options.mode.chained_assignment = None

_ = load_dotenv(find_dotenv())

openai.api_key = os.environ["OPENAI_API_KEY"]

- Summerize key points in the client meta data

In [3]:
import pickle
with open("./clients_data.pkl", 'rb') as f:
    clients = pickle.load(f) 

clients.popitem()

('Zeus Manly',
 {'description': 'is a professional athlete that won 3 Olympic gold medals for discus throwing. \n                    He lives in Montana with his two pitbulls - Ares and Apollo.'})

In [4]:
def get_completion_from_messages(messages, model="gpt-3.5-turbo", verbose=False, **kwargs):
    """
    Prime the model with one or more input prompts
        messages - list of prompts (each prompt is a dictionary of "role" and "content")
        model - ChatGPT model to use
        temperature - Single values in the range [0, 2]. Higher values
            produce more randomness.
        verbose - if True, print the full response and then
            return only the chat response
    """
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        **kwargs
    )
    if verbose:
        print(f"full response:\n{response}")
    return response.choices[0].message["content"]

- Create profile prompt

In [5]:
# Output schema:
metadata_schema = {"_linkedin.html": {
                  "Employment 1": "Big Bank, Senior Data Scientist",
                  "Employment 2": "Small Bank, Data Scientist",
                  "Education": "UC Berkeley, PhD Astronomy",
                  "Board Member": "Girls and Boys Club, Chairman",
                  "Bio": "biography of up to 100 words that is consistent with the above information",
                 },

                 "_wealthx.html": {
                    "Estimated Net Worth": "At least $19.5 million",
                    "Estimated Liquid Assets": "At least $11.5 million",
                    "Estimated Household Wealth": "At least $31.5 million",
                    "Estimated Household Liquid Assets": "At least $11.5 million",
                    "Estimated Family Net Worth": "At least $226.1 billion",
                    "Interests, Passions, Hobbies" : "Tennis and golf",
                 },

                 "_relsci.html": {
                     "Boards & Committees (Corporate)": "Paramount Global, Board Director",
                     "Boards & Committees (Nonprofit)": "Brentwood School - California, Trustee",
                     "Former/Prior Boards & Committees (Corporate)": "The Walt Disney Company, Board Director",
                     "Former/Prior Boards & Committees (Nonprofit)": "The Paley Center for Media, Trustee",
                     "Top donations (Nonprofit)": "Greenpeace - $2M, UCSF - $1M, Kiva - $500K",
                     "Top donations (Political parties)": "Kamala Harris - $100K, CA Democratic Committee - $250K",
                 },

                 "_equilar.html": {
                    "Stock sold - Equity Transactions (Last 36 Months)": "$215.3 million",
                    "New Equity Grants - Equity Transactions (Last 36 Months)": "$93 million",
                    "Options Exercised - Equity Transactions (Last 36 Months)": "$17.5 million",
                    "Equity Holdings - Equity Transactions": "$167.5 million",
                    "Annual Compensation": "$11.9 million",
                    "Stock Sold": "CEO and Chairperson, $1.6M at Cerevel Therapeutics Holdings, Inc. (50,000 shares), June 5 2023 (SEC) | June 1 2023 (Effective)",
                 },

                 "_zoominfo.html": {
                     "Personal Email": "email@domain.com",
                 },

                 "_pitchbook.html": {
                     "Lead partner on deals": {
                          "Company": "Harvey (Business/Productivity Software)",
                          "Deal Date": "April 26 2023",
                          "Deal Type": "Early Stage VC",
                          "Deal Size": "$21M",
                          "Deal Status": "Completed",
                          "Location": "Los Angeles, CA",
                          "Representing": "Sequoia Capital",
                          "Other Partners": "Rich Dude I, Rich Guy II, Notso Rich III, Rich Wannabe",
                          #"Other Partners emails": "richdude@aristocracy.com, richguy@aristocracy.com",
                     },
#                     "Investor bio": "A brief summary of the investor, his peers, and his investment deals"
                 },

                 "_google.html": {
                     "Article 1": {
                         "Title": 'John Smith granted key to the city',
                         'Date': 'January 1, 2023',
                         "Abstract": '''In an expensive public ceremony, the mayor granted John Smith the key to the city. The mayor then spoke for 20 minutes on how great of a person is John Smith and how luck we are to be his contemporaries.'''
                         },
                 },
                 ## Should we add info on:
                     # criminal background
                     # if existing client, current relationship depth (e.g. CRB info)
                }

In [6]:
message_1_content = """***Client description: {client_description}
metadata: {client_metadata}*** \
<Your repsonse here> """

messages =  [  
{"role":"system",
 "content":f"""Given the following detailed meta data about an individual, \
generate a concise summary that captures the key details and any other significant \
aspects from the meta data and is consistent with the provided information:

 metadata: {metadata_schema}
 """},    
{"role":"user", "content": None},    
]

In [8]:
kwargs = {
    'verbose':False,
    'temperature':0,
    'top_p':1,
    'frequency_penalty':0,
    'presence_penalty':0
}

for name in clients.keys():
    print(name)
    client_description = name + ' ' + clients[name]['description']
    client_metadata = clients[name]['docs']
    messages[1]['content'] = message_1_content.format(client_description=client_description, client_metadata=client_metadata)
    profile_prompt = get_completion_from_messages(messages, model="gpt-3.5-turbo", **kwargs)
    print('profile prompt created')
    clients[name]['profile_prompt'] = profile_prompt

Robert King
profile prompt created
Julia Harpman
profile prompt created
Hanna Smith
profile prompt created
Jerry Smith
profile prompt created
Mariann Avocado
profile prompt created
Velvet Throat
profile prompt created
Jared Livinglife
profile prompt created
Aphrodite Greek
profile prompt created
Helen Troy
profile prompt created


- create banking prompt

In [29]:
message_1_content = """***Client description: {client_description}*** \
<Your repsonse here> """

messages =  [  
{
  "role": "system",
  "content": """You are an expert in banking. Given a brief description of an individual, generate a concise summary of their \
  banking relationship with up to three accounts, tailored to their lifestyle and interests. Focus solely on the account types \
  and their primary features, avoiding any introductory or concluding remarks. Be direct and succinct. Consider the client's \
  financial activity level (highly active, moderate, inactive) when detailing the account types and usage.

  Example:
  Client Description: Robert King is a highly successful finance professional... [additional details]
  
  Output: Robert King, as CEO of Hedge Fund A, oversees significant financial activities with the bank. His banking engagement includes:
  - Authority on two corporate accounts of Hedge Fund A for company transactions.
  - A personal checking account used mainly for his $3 million mortgage.
  - Another checking account with dual debit cards: one for personal use and another for his child’s daily expenses.
  - Financial activity: highly active with engagement in online banking."""
},
{"role": "user", "content": None} 
]


kwargs = {
    'verbose':False,
    'temperature':0,
    'top_p':1,
    'frequency_penalty':0,
    'presence_penalty':0
}

for name in clients.keys():
    print(name)
    client_description = clients[name]['profile_prompt']
    messages[1]['content'] = message_1_content.format(client_description=client_description)
    banking_prompt = get_completion_from_messages(messages, model="gpt-3.5-turbo", **kwargs)
    print('banking prompt created')
    clients[name]['banking_prompt'] = banking_prompt

Robert King
banking prompt created
Julia Harpman
banking prompt created
Hanna Smith
banking prompt created
Jerry Smith
banking prompt created
Mariann Avocado
banking prompt created
Velvet Throat
banking prompt created
Jared Livinglife
banking prompt created
Aphrodite Greek
banking prompt created
Helen Troy
banking prompt created


- prompt template for transaction sturcture

In [31]:
categories = ['Automotive',
              'Bills & utilities',
              'Education',
              'Entertainment',
              'Fees & adjustments',
              'Food & drink',
              'Gas',
              'Gift & Donations',
              'Grocieries', 
              'Health & Wellness', 
              'Home', 
              'Misc', 
              'Personal', 
              'Professional Services', 
              'Shopping', 
              'Travel', 
              'Employment', 
              'Benefits', 
              'Investment',
              'Loan', 
              'Tax']
categories = sorted(categories)
transaction_purposes= ', '.join(categories)

In [32]:
transaction_types = ', '.join(
    ['Branch TLR',
     'Cashiers Check',
     'ACH', 
     'Wire', 
     'Check', 
     'Credit Card', 
     'Transfer', 
     'Debit Card', 
     'Zelle', 
     'Bill Pay', 
     'Venmo', 
     'Apple Pay', 
     'Google Wallet', 
     'PayPal']
)

transaction_statement_examples = """
POS PURCHASE #2034 STARBUCKS SF CASTRO ST CA
POS PURCHASE #0765 COSTCO WHOLESALE SF
POS PURCHASE #6567 VENMO *UBER EATS 855-981-2 NY
POS CREDIT POS REFUND TERMIL 4435353 WEGMANS ITHACA NY
ACH CREDIT VANGUARD INVEST FUND DIVIDEND {name of the client}
ACH DEBIT VENMO PAYMENT SENT TO SARAH CONNOR
INCOMING WIRE REFUND - EMERSON RESORT & SPA
INCOMING WIRE ALPHABET INC CLASS A SOLIUM MORGA
DOMESTIC ONLINE WIRE FLYWIRE PAYMENTS CORPORATION 
ATM WITHDRAWAL #1734 BAY ATM LOCATOR SAN FRANCISCO CA
ATM DEPOSIT #6763 POST OFFICE SQ BOSTON MA TERMIL 3324Q
PAYROLL CREDIT ACH PACIFIC GATEWAY PAYROLL {name of the client}
ONLINE BANKING TRANSFER TO CHECKING ACCOUNT {client's account number}
ACH DEBIT CHASE AUTO LOAN BILL PAY {client's name}
ACH DEBIT PAYCHEX-HRS 401(K) {client's company name}
ACH DEBIT PERSHING BROKERAGE {client's name}
ACH DEBIT CITIZENS BANK LOAN PAYMENT {client's name}
ACH DEBIT USBANK HOME MTG PYMT {client's name}
AUTO TRANSFER TO LN TRANSFER TO SCHEDUELED LOAN PAYMENT {mask the first four client's account number digits with XXXX}
ACH DEBIT IRS USATAXPYMT QUARTERLY TAX 2023 {client's name}
RETURN ITEM PAYPAL INST XFFER {client's name} LINKEDIN
INTERNET TRANSFER FROM DDA {client's account number} ON 11/03 AT 08.10
MOBILE DEPOSIT
DEPOSIT TRANSFER TLR 18 BR 76 XFER FROM {client's name}
ZELLE CREDIT PAYMENT FROM: MARGARET'S BOUTIQUE L TD ID:2324424A01
ZELLE CREDIT PAYMENT FROM: TAX ARMOUT INC.
INTEREST CREDIT
DIRECT S/C WIRE TRANSFER FEE
CHECK WITHDRAWAL CK # {last four digits of checking account number}
WITHDRAWAL TLR 23 BR 90 REAL ESTATE INVESTMENT"""

In [36]:
transaction_statement_examples = ['POS PURCHASE #2034 STARBUCKS SF',
                                  'ACH CREDIT VANGUARD INVEST FUND DIVIDEND',
                                  'ACH DEBIT VENMO PAYMENT SENT TO SARAH CONNOR',
                                  'ATM WITHDRAWAL #1734 BAY ATM LOCATOR SF',
                                  'INCOMING WIRE ALPHABET INC CLASS A SOLIUM MORGA',
                                  'BILL PAY AUTO DEBIT COMCAST SAN FRANCISCO UTILITY BILL'
                                 ]

transaction_statement_examples = ','.join(transaction_statement_examples)

In [41]:
messages =  [  
{"role":"system", "content":None},    
{"role":"user", "content": None},    
]

message_1_content = """- Profile Summary: [{profile_prompt}]
- Banking Relationship:[{banking_prompt}]
<Your repsonse here>
"""

In [103]:
messages[0]["content"] = """Assume the role of an expert assistant in generating financial bank transactions. Create transaction records for an individual, reflecting their profile and banking relationships. Guidelines:
- Format: CSV-like, single-line transactions with headers 'Date', 'Account_Number', 'Transaction_Type', 'Transaction_Purpose', 'Transaction_Amount', 'Transaction_Description'.
- Data Types: 'Account_Number' as integer, 'Transaction_Amount' as float, with outgoing amounts as negative and incoming as positive.
- Timeline: Over two months from 2023-08-01 to 2023-11-01.
- Activity Levels: Generate 10-40 transactions for inactive, 30-100 for moderate, 100-270 for highly active profiles.
- Recurring Transactions: Include monthly recurring (e.g., utilities, mortgage) and frequent (e.g., daily POS) transactions.
- Multiple Daily Transactions: Include at least a few examples.
- Use following transaction purposes: {transaction_categories}
- Use following transaction purposes: {transaction_types}
- Transaction Descriptions: Follow standard formats, avoid fictitious entities. Here are a few examples:{transaction_statement_examples}. Extrapolate similar formats for other transactions.
- Profile and Banking Details: [Provided by the user].

Note: make sure to avoid any introductory or concluding remarks
""".format(transaction_types=transaction_types, transaction_categories=transaction_purposes, transaction_statement_examples=transaction_statement_examples)

In [104]:
print(messages[0]["content"])

Assume the role of an expert assistant in generating financial bank transactions. Create transaction records for an individual, reflecting their profile and banking relationships. Guidelines:
- Format: CSV-like, single-line transactions with headers 'Date', 'Account_Number', 'Transaction_Type', 'Transaction_Purpose', 'Transaction_Amount', 'Transaction_Description'.
- Data Types: 'Account_Number' as integer, 'Transaction_Amount' as float, with outgoing amounts as negative and incoming as positive.
- Timeline: Over two months from 2023-08-01 to 2023-11-01.
- Activity Levels: Generate 10-40 transactions for inactive, 30-100 for moderate, 100-270 for highly active profiles.
- Recurring Transactions: Include monthly recurring (e.g., utilities, mortgage) and frequent (e.g., daily POS) transactions.
- Multiple Daily Transactions: Include at least a few examples.
- Use following transaction purposes: Automotive, Benefits, Bills & utilities, Education, Employment, Entertainment, Fees & adjustme

In [105]:
kwargs = {
    'verbose':False,
    'temperature':0,
    'top_p':1,
    'frequency_penalty':0,
    'presence_penalty':0,
}

for name in ['Hanna Smith', 'Jerry Smith', 'Mariann Avocado', 'Velvet Throat', 'Jared Livinglife', 'Aphrodite Greek', 'Helen Troy']:
    print(name)
    pprompt = clients[name]['profile_prompt']
    bprompt = clients[name]['banking_prompt']
    print(f'\n {bprompt} \n')
    messages[1]['content'] = message_1_content.format(profile_prompt=pprompt, banking_prompt=bprompt)
    transactions = get_completion_from_messages(messages, model="gpt-4-1106-preview", **kwargs)
    print('transactions created')
    clients[name]['transactions'] = transactions
    
    data = transactions.split('\n')
    df = pd.DataFrame([x.split(',') for x in data[1:]], columns=data[0].split(','))
#     df['Transaction_Amount'] = df.apply(lambda x: x['Transaction_Amount']*-1 if x['Transaction_Direction'] in ('Debit', 'Out') else x['Transaction_Amount'], axis=1)
#     df = df.drop(columns = ['Transaction_Direction'])
    df.to_excel(f'{name.replace(" ","_").upper()}.xlsx')


Hanna Smith

 Hanna Smith, the owner and baker at Hanna's French Pastries, has a banking relationship that supports her entrepreneurial endeavors. Her banking engagement includes:
- A business checking account for managing her bakery's finances and processing customer payments.
- A savings account to set aside funds for future business expansion or unexpected expenses.
- A personal checking account for her personal expenses and managing her household finances.
- Financial activity: moderate, with regular deposits from her bakery sales and occasional withdrawals for personal expenses. 

transactions created
Jerry Smith

 Jerry Smith, the race car driver and philanthropist, has a banking relationship that aligns with his active lifestyle and charitable endeavors. His banking engagement includes:
- A high-yield savings account to maximize his wealth and savings.
- A premium credit card with exclusive benefits for his frequent travel and racing expenses.
- A business account for his motors

In [9]:
helen = """Helen Troy, a successful lead model and city council member, maintains a banking \nrelationship that aligns with her lifestyle and interests.\nHer banking engagement includes:
- A high-yield savings account to maximize her wealth accumulation.
- A premium credit card with exclusive fashion-related perks and rewards.
- A philanthropic account for managing donations to various nonprofits \nsupporting domestic abuse victims.
- Financial activity: highly active with frequent transactions and online banking."""

In [10]:
print(helen)

Helen Troy, a successful lead model and city council member, maintains a banking 
relationship that aligns with her lifestyle and interests.
Her banking engagement includes:
- A high-yield savings account to maximize her wealth accumulation.
- A premium credit card with exclusive fashion-related perks and rewards.
- A philanthropic account for managing donations to various nonprofits 
supporting domestic abuse victims.
- Financial activity: highly active with frequent transactions and online banking.


In [11]:
trans_examples = """
TRANSFER TO TD AMERITRADE INVESTMENT ACCOUNT
POS PURCHASE #1903 BLOOMINGDALE'S GLENDALE
ACH CREDIT MODELLING AGENCY PAYMENT RECEIVED
OUTGOING WIRE TO DOMESTIC ABUSE FOUNDATION DONATION

"""

"\nPOS PURCHASE #2034 STARBUCKS SF CASTRO ST CA\nPOS PURCHASE #0765 COSTCO WHOLESALE SF\nPOS PURCHASE #6567 VENMO *UBER EATS 855-981-2 NY\nPOS CREDIT POS REFUND TERMIL 4435353 WEGMANS ITHACA NY\nACH CREDIT VANGUARD INVEST FUND DIVIDEND {name of the client}\nACH DEBIT VENMO PAYMENT SENT TO SARAH CONNOR\nINCOMING WIRE REFUND - EMERSON RESORT & SPA\nINCOMING WIRE ALPHABET INC CLASS A SOLIUM MORGA\nDOMESTIC ONLINE WIRE FLYWIRE PAYMENTS CORPORATION \nATM WITHDRAWAL #1734 BAY ATM LOCATOR SAN FRANCISCO CA\nATM DEPOSIT #6763 POST OFFICE SQ BOSTON MA TERMIL 3324Q\nPAYROLL CREDIT ACH PACIFIC GATEWAY PAYROLL {name of the client}\nONLINE BANKING TRANSFER TO CHECKING ACCOUNT {client's account number}\nACH DEBIT CHASE AUTO LOAN BILL PAY {client's name}\nACH DEBIT PAYCHEX-HRS 401(K) {client's company name}\nACH DEBIT PERSHING BROKERAGE {client's name}\nACH DEBIT CITIZENS BANK LOAN PAYMENT {client's name}\nACH DEBIT USBANK HOME MTG PYMT {client's name}\nAUTO TRANSFER TO LN TRANSFER TO SCHEDUELED LOAN 