In [None]:
import os
from openai import OpenAI

In [None]:
# Set api key
client = OpenAI(api_key=OPENAI_API_KEY)

In [None]:
prompt = f"Generate 50 samples of synthetic data with the following fields:\n" \
                 f"Age, Gender, Occupation, Location, Marital Status, Family Size, Values, " \
                 f"Interests, Desires, Goals, Lifestyle Choices, Social Status, Attitudes, " \
                 f"Fears, Income, Net Worth, Credit Score, Debt-to-Income Ratio, " \
                 f"Investment Portfolio, Product Usage, Purchase History, Customer Loyalty, " \
                 f"Digital Channel Preference/Usage, Online Activity.\n\n" \
                 f"Format the output as a list of dictionaries."

In [None]:
# Create python code leveraging OpenAI to create some 50 synthetic bank customer data.
MODEL="gpt-4o-mini"

completion = client.chat.completions.create(
    model=MODEL,
    messages=[
        {"role": "system", "content": "You are a helpful assistant that helps me with creating synthetic bank customer data!"},
        {"role": "user", "content": prompt}
        ],
    temperature=0.7,
    max_tokens=1500
)
print(completion.choices[0].message.content)

In [1]:
# The function below is generated by the OpenAI

import random
def generate_synthetic_data(num_samples):
    ages = random.choices(range(18, 70), k=num_samples)
    genders = random.choices(['Male', 'Female', 'Non-binary'], k=num_samples)
    occupations = random.choices(['Engineer', 'Teacher', 'Doctor', 'Nurse', 'Artist', 'Sales', 'Manager', 'Scientist', 'Lawyer', 'Technician'], k=num_samples)
    locations = random.choices(['Sydney', 'Melbourne', 'Brisbane', 'Perth', 'Adelaide', 'Gold Coast', 'Canberra', 'Newcastle', 'Wollongong', 'Logan City'], k=num_samples)
    marital_statuses = random.choices(['Single', 'Married', 'Divorced', 'Widowed'], k=num_samples)
    family_sizes = random.choices(range(1, 6), k=num_samples)
    values = random.choices(['Integrity', 'Innovation', 'Community', 'Sustainability', 'Family'], k=num_samples)
    interests = random.choices(['Technology', 'Sports', 'Arts', 'Travel', 'Cooking', 'Reading', 'Gaming'], k=num_samples)
    desires = random.choices(['Health', 'Wealth', 'Happiness', 'Knowledge', 'Adventure'], k=num_samples)
    goals = random.choices(['Career Advancement', 'Financial Independence', 'Fitness', 'Education', 'Travel'], k=num_samples)
    lifestyle_choices = random.choices(['Minimalist', 'Luxury', 'Eco-friendly', 'Traditional', 'Urban'], k=num_samples)
    social_statuses = random.choices(['Lower Class', 'Middle Class', 'Upper Middle Class', 'Upper Class'], k=num_samples)
    attitudes = random.choices(['Optimistic', 'Pessimistic', 'Realistic', 'Idealistic'], k=num_samples)
    fears = random.choices(['Failure', 'Rejection', 'Loneliness', 'Financial Instability'], k=num_samples)
    incomes = [random.randint(30000, 150000) for _ in range(num_samples)]
    net_worths = [random.randint(10000, 1000000) for _ in range(num_samples)]
    credit_scores = [random.randint(300, 850) for _ in range(num_samples)]
    debt_to_income_ratios = [round(random.uniform(0.0, 0.5), 2) for _ in range(num_samples)]
    investment_portfolios = random.choices(['Stocks', 'Bonds', 'Real Estate', 'Mutual Funds', 'Cryptocurrency'], k=num_samples)
    product_usages = random.choices(['Frequent', 'Occasional', 'Rare', 'Never'], k=num_samples)
    purchase_histories = random.choices(['Electronics', 'Clothing', 'Groceries', 'Home Goods', 'Toys'], k=num_samples)
    customer_loyalties = random.choices(['High', 'Medium', 'Low'], k=num_samples)
    digital_channel_preferences = random.choices(['Email', 'Social Media', 'Website', 'Mobile App'], k=num_samples)
    online_activities = random.choices(['Shopping', 'Gaming', 'Social Networking', 'Learning', 'Streaming'], k=num_samples)

    synthetic_data = []
    for i in range(num_samples):
        synthetic_data.append({
            'Age': ages[i],
            'Gender': genders[i],
            'Occupation': occupations[i],
            'Location': locations[i],
            'Marital Status': marital_statuses[i],
            'Family Size': family_sizes[i],
            'Values': values[i],
            'Interests': interests[i],
            'Desires': desires[i],
            'Goals': goals[i],
            'Lifestyle Choices': lifestyle_choices[i],
            'Social Status': social_statuses[i],
            'Attitudes': attitudes[i],
            'Fears': fears[i],
            'Income': incomes[i],
            'Net Worth': net_worths[i],
            'Credit Score': credit_scores[i],
            'Debt-to-Income Ratio': debt_to_income_ratios[i],
            'Investment Portfolio': investment_portfolios[i],
            'Product Usage': product_usages[i],
            'Purchase History': purchase_histories[i],
            'Customer Loyalty': customer_loyalties[i],
            'Digital Channel Preference/Usage': digital_channel_preferences[i],
            'Online Activity': online_activities[i]
        })
    
    return synthetic_data

In [2]:
# Use the OpenAI generated code to generate the 50 random synthetic bank customer data
synthetic_data_samples = generate_synthetic_data(50)

# Print the samples
print(synthetic_data_samples)

[{'Age': 37, 'Gender': 'Female', 'Occupation': 'Nurse', 'Location': 'Newcastle', 'Marital Status': 'Married', 'Family Size': 3, 'Values': 'Integrity', 'Interests': 'Travel', 'Desires': 'Wealth', 'Goals': 'Education', 'Lifestyle Choices': 'Urban', 'Social Status': 'Middle Class', 'Attitudes': 'Pessimistic', 'Fears': 'Financial Instability', 'Income': 105532, 'Net Worth': 702330, 'Credit Score': 383, 'Debt-to-Income Ratio': 0.14, 'Investment Portfolio': 'Real Estate', 'Product Usage': 'Frequent', 'Purchase History': 'Groceries', 'Customer Loyalty': 'High', 'Digital Channel Preference/Usage': 'Social Media', 'Online Activity': 'Social Networking'}, {'Age': 69, 'Gender': 'Non-binary', 'Occupation': 'Nurse', 'Location': 'Adelaide', 'Marital Status': 'Divorced', 'Family Size': 3, 'Values': 'Family', 'Interests': 'Technology', 'Desires': 'Adventure', 'Goals': 'Education', 'Lifestyle Choices': 'Eco-friendly', 'Social Status': 'Upper Middle Class', 'Attitudes': 'Pessimistic', 'Fears': 'Failure'

In [None]:
# Create a CSV file from the list of dictionary data generated by the OpenAI generated code
import csv
fieldnames = synthetic_data_samples[0].keys()
with open('customer_data_temp.csv', 'w', newline='') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(synthetic_data_samples)