# Notebook Purpose

We will generate synthetic data for our Investment Advisor Multi-Agent Use Cases

- Customer Demographic Data
- Investment Services and Products

In [0]:
%pip install openai -q -U
%pip install Faker -q
dbutils.library.restartPython()

In [0]:
dbutils.widgets.text(name="catalog", label="Catalog", defaultValue="fins_genai")
dbutils.widgets.text(name="schema", label="Schema", defaultValue="agents")

In [0]:
catalog = dbutils.widgets.get("catalog")
schema = dbutils.widgets.get("schema")

In [0]:
%load_ext autoreload
%autoreload 2

In [0]:
import sys
import os
sys.path.append(os.path.abspath('.'))

spark.sql(f"USE CATALOG {catalog};")
spark.sql(f"USE SCHEMA {schema};")

# Create Demographic Data

In [0]:
from faker_utils import FakeDemographicDataGenerator

config = {
    "name": None,
    "age": {"min": 18, "max": 90},
    "gender": ["Male", "Female", "Non-binary"],
    "email": None,
    "phone": None,
    "address": None,
    "city": None,
    "country": None,
    "income_level": ["Low", "Middle", "High"],
    "investment_experience": ["Beginner", "Intermediate", "Expert"],
    "risk_aversion": ["Low", "Medium", "High"],
    "investment_preference": ["Stocks", "Bonds", "Real Estate", "Cryptocurrency", "Mutual Funds"]
}

demographic_gen = FakeDemographicDataGenerator(config=config, num_records=100)
df_demographic = demographic_gen.generate()
df_demographic.head()

In [0]:
spark.createDataFrame(df_demographic).write \
    .mode("overwrite") \
    .saveAsTable("investment_customer_demographics")


# Create Investment Products

# Create Investment Product Offering

In [0]:
from openai import OpenAI
import os

DATABRICKS_TOKEN = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().get()

CLIENT = OpenAI(
  api_key=DATABRICKS_TOKEN,
  base_url="https://adb-984752964297111.11.azuredatabricks.net/serving-endpoints"
)

In [0]:
import re
import json

def oneshot_prompt(prompt, model="databricks-meta-llama-3-3-70b-instruct"):
    response = CLIENT.chat.completions.create(
      model=model,
      max_tokens=2000,
      messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": prompt},
      ]
    )
    return response.choices[0].message.content 


def create_product(model="databricks-meta-llama-3-3-70b-instruct", k=10):
    product_prompt = \
    f"""
    Generate a list {k} finacial products or services a investment management company would offer to customer of different risk aversion and income levels. Return JSON key value pairs where key is the service name and value is the service description.
    """
    result = oneshot_prompt(product_prompt, model)
    match = re.search(r'```json(.*?)```', result, re.DOTALL)
    if match:
        result = match.group(1).strip()
        try:
            result = json.loads(result)
        except json.JSONDecodeError:
            result = {}
    else:
        return result
    return result

In [0]:
products = create_product()
products

In [0]:
from faker_utils import generate_product_data

products_tiers = ['self_managed', 'digital_advisor', 'personal_advisor']

df_products = generate_product_data(products, products_tiers)
df_products

In [0]:
spark.createDataFrame(df_products).write.mode('overwrite').saveAsTable('investment_products')

# Marketing Condition

LLM based report generator (not following general template)

* Look at customer returns
* Write an email to customer about recommendation based on the current market condition
  * summary of top movers
  * Snapshot of their proflio performance (less important)


scenario: 
* Tech stocks
* customer [stocks ...] [%gain ....]
* Get headline from internet search
  * stock symbol, news chunks, changes in price of stocks, detect the reason why the stock price changes
