In [1]:
# Notebook to create a set of products for a fictitios company
# Enrure you have created a .env (from sample.env) and updated service details

In [2]:
import os
import json
import openai
from openai import AzureOpenAI, OpenAIError
import json
import random
import pickle
import base64  
import uuid
from copy import deepcopy
import re
from dotenv import load_dotenv

load_dotenv() 

if '<redacted' in os.getenv("openai_gpt_api_base") or os.getenv("openai_gpt_api_base") == None:
    print ('Update .env with your Azure OpenAI Service details. Refer to sample.env for example.')

In [3]:
category_count = 3
subcategories_per_category = 2
products_per_subcategories = 3

gpt_client = AzureOpenAI(
    api_version=os.getenv('openai_gpt_api_version'),
    azure_endpoint=os.getenv('openai_gpt_api_base'),
    api_key=os.getenv('openai_gpt_api_key')
)
openai_temperature = 0.9
max_tokens = 4096


In [4]:
def gpt4_request_json(content):
    max_attempts = 6
    max_backoff = 60
    system_prompt = """
    You are an intelligent assistant.
    """

    user_prompt = content

    counter = 0
    incremental_backoff = 1   # seconds to wait on throttline - this will be incremental backoff
    while True and counter < max_attempts:
        try:
            response = gpt_client.chat.completions.create(
                model=os.getenv('openai_gpt_deployment_name'), 
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_prompt}
                ],
                temperature=openai_temperature,
                max_tokens=max_tokens,
                top_p=0.95,
                frequency_penalty=0,
                presence_penalty=0,
                stop=None,
                stream=False, 
                response_format={ "type": "json_object" }

                            )
            return json.loads(response.choices[0].message.content)
            # return answer
        except openai.APIError as ex:
            # Handlethrottling - code 429
            if str(ex.code) == "429":
                incremental_backoff = min(max_backoff, incremental_backoff * 1.5)
                print ('Waiting to retry after', incremental_backoff, 'seconds...')
                time.sleep(incremental_backoff)
            elif str(ex.code) == "content_filter":
                print ('API Error', ex.code)
                return ""
        except Exception as ex:
            counter += 1
            print ('Error - Retry count:', counter, ex)
        
        return ""

def gpt4_request_text(content):
    max_attempts = 6
    max_backoff = 60
    system_prompt = """
    You are an intelligent assistant.
    """

    user_prompt = content

    counter = 0
    incremental_backoff = 1   # seconds to wait on throttline - this will be incremental backoff
    while True and counter < max_attempts:
        try:
            response = gpt_client.chat.completions.create(
                model=os.getenv('openai_gpt_deployment_name'), 
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_prompt}
                ],
                temperature=openai_temperature,
                max_tokens=max_tokens,
                top_p=0.95,
                frequency_penalty=0,
                presence_penalty=0,
                stop=None,
                stream=False
            )
            return response.choices[0].message.content
            # return answer
        except openai.APIError as ex:
            # Handlethrottling - code 429
            if str(ex.code) == "429":
                incremental_backoff = min(max_backoff, incremental_backoff * 1.5)
                print ('Waiting to retry after', incremental_backoff, 'seconds...')
                time.sleep(incremental_backoff)
            elif str(ex.code) == "content_filter":
                print ('API Error', ex.code)
                return ""
        except Exception as ex:
            counter += 1
            print ('Error - Retry count:', counter, ex)
        
        return ""

def remove_periods(input_string):  
    # Remove trailing periods
    parts = input_string.split('.', 1)  
    return parts[0] + ('.' + parts[1].replace('.', '') if len(parts) > 1 else '')  


In [5]:
# Create the categories and subcategories
categories_prompt = """
Please create a set of """ + str(category_count) + """ categories and subcategories for products from a bigbox retail store called 'Contoso Retail'.
For each category, there should be """ + str(subcategories_per_category) + """ per category.
Respond in JSON format with an array of objects titled "Category" and "Subcategories".
For example:
{'Categories': [{'Category': 'Electronics',
   'Subcategories': ['Mobile Phones',
    'Laptops',
    'Tablets',
    'Televisions',
    'Cameras']},
  {'Category': 'Home Appliances',
   'Subcategories': ['Refrigerators',
    'Washing Machines',
    'Microwaves',
    'Dishwashers',
    'Air Conditioners']},
    ...
}
Ensure at least one of the Categories is titled "Books".
"""
category_subcategories = gpt4_request_json(categories_prompt)
total_categories = len(category_subcategories['Categories'])
print ('Total Categories:', total_categories, '\n')

print ('Sample Category')
print ('='*14)
print (json.dumps(category_subcategories['Categories'][0], indent=4))

Total Categories: 3 

Sample Category
{
    "Category": "Electronics",
    "Subcategories": [
        "Mobile Phones",
        "Laptops"
    ]
}


In [6]:
# Create product titles for all categories / subacategories
category_subcategory_titles = []
failed = []
for c in category_subcategories['Categories']:
    category = c['Category']
    for subcategory in c['Subcategories']:
        print (category, subcategory)
        gen_product_titles_prompt = """
        Please create a list of """ + str(products_per_subcategories) + """ product titles for a fictitions bigbox retailer store called 'Contoso Retail'. 
        The product titles should be within a Category: """ + category + """ and Subcategory: """ + subcategory + """
        
        Respond in JSON format with an object titled "titles" that contains the list of product titles
        """
        product_titles_json = gpt4_request_json(gen_product_titles_prompt)
        if 'titles' in product_titles_json:
            category_subcategory_titles.append({'category': category, 'subcategory': subcategory, 'titles': product_titles_json['titles']})
        else:
            print ('Failed:', category, subcategory)
            failed.append({'category': category, 'subcategory': subcategory})

    # Save the product information
    with open('category_subcategory_titles.pkl', 'wb') as pkl_out:
        pickle.dump(category_subcategory_titles, pkl_out)
        
print ('Product title creation complete and saved to category_subcategory_titles.pkl\n')

total_products = 0
for c in category_subcategory_titles:
    for title in c['titles']:
        total_products+=1
print ('Total Products:', total_products, '\n')

print ('Sample Product Titles')
print ('='*21)
print (json.dumps(category_subcategory_titles[0], indent=4))

Electronics Mobile Phones
Electronics Laptops
Home Appliances Refrigerators
Home Appliances Washing Machines
Books Fiction
Books Non-Fiction
Product title creation complete and saved to category_subcategory_titles.pkl

Total Products: 18 

Sample Product Titles
{
    "category": "Electronics",
    "subcategory": "Mobile Phones",
    "titles": [
        "ContosoMax Pro 5G Smartphone",
        "ContosoLite X1 Dual-SIM Mobile",
        "ContosoNova Ultra HD Camera Phone"
    ]
}


In [7]:
# Create the product details - log any failures
products = []
failed = []

for c in category_subcategory_titles:
    try:
        category = c['category']
        subcategory = c['subcategory']
        for title in c['titles']:
            print (category, subcategory, title)
            product_gen_prompt = """
            Please create a textual product description for a product called '""" + title + """' from a fictitions bigbox retailer store called 'Contoso Retail'. 
            The product is within a category: """ + category + """ and subcategory: """ + subcategory + """.
            """
            product_json = {}
            product_json['Category'] = category
            product_json['Subcategory'] = subcategory
            product_json['Title'] = title
            product_json['Description'] = str(gpt4_request_text(product_gen_prompt))

            price_gen_prompt = """
            Please create a price in USD for a product called '""" + title + """' from a fictitions bigbox retailer store called 'Contoso Retail'. 
            Always create a price, even if you have to guess a potential price.
            Only return the price.
            """
            price = None
            while price == None:
                price = gpt4_request_text(price_gen_prompt)

            # Clean up price to make sure it is of correct format
            string_price = str(price).replace(',', '')
            # Remove all alphabetic characters and spaces
            string_price = re.sub(r'[a-zA-Z]', '', string_price).replace(' ', '').replace('/', '') 
            string_price = remove_periods(string_price)
            product_json['Price'] = string_price
            
            products.append(product_json)
        with open('products.pkl', 'wb') as pkl_out:
            pickle.dump(products, pkl_out)
    except Exception as ex:
        failed.append({"category":category,"subcategory":subcategory})

print ('Saving to products.pkl')
with open('products.pkl', 'wb') as pkl_out:
    pickle.dump(products, pkl_out)

total_products = len(products)
print ('Total Products:', total_products)

Electronics Mobile Phones ContosoMax Pro 5G Smartphone
Electronics Mobile Phones ContosoLite X1 Dual-SIM Mobile
Electronics Mobile Phones ContosoNova Ultra HD Camera Phone
Electronics Laptops Contoso Retail UltraBook Pro X150
Electronics Laptops Contoso Retail Gamer's Choice G730
Electronics Laptops Contoso Retail Business Elite B500
Home Appliances Refrigerators Contoso Retail Frost-Free Double Door Refrigerator - 350L
Home Appliances Refrigerators Contoso Retail Smart Inverter Side-by-Side Refrigerator - 500L
Home Appliances Refrigerators Contoso Retail Energy Efficient Bottom Freezer Refrigerator - 400L
Home Appliances Washing Machines Contoso Retail UltraClean Front Load Washing Machine
Home Appliances Washing Machines Contoso Retail EcoWash Top Load Washing Machine
Home Appliances Washing Machines Contoso Retail SmartWash WiFi-Enabled Washing Machine
Books Fiction The Enchanted Forest: A Contoso Retail Adventure
Books Fiction Mystery at Midnight: A Contoso Retail Bestseller
Books 

In [8]:
print ('Example Product')
print ('='*15)
products[0]

Example Product


{'Category': 'Electronics',
 'Subcategory': 'Mobile Phones',
 'Title': 'ContosoMax Pro 5G Smartphone',
 'Description': "# ContosoMax Pro 5G Smartphone – Available at Contoso Retail\n\n## Overview\n\nExperience the future of mobile technology today with the ContosoMax Pro 5G Smartphone, exclusively available at Contoso Retail. This cutting-edge device redefines excellence in the world of smartphones, offering unbeatable performance, stunning design, and unparalleled connectivity. Whether you're a tech enthusiast, a professional on the go, or a casual user, the ContosoMax Pro 5G is designed to meet all your needs with finesse and style.\n\n## Key Features\n\n### Blazing-Fast 5G Connectivity\nStay ahead of the curve with next-generation 5G connectivity. Enjoy lightning-fast downloads, seamless streaming, and superior network reliability. The ContosoMax Pro 5G ensures you're always connected at top speeds, no matter where you are.\n\n### Brilliant Display\nImmerse yourself in vibrant color

In [14]:
# Save the products to JSON
with open('products.jsonl', 'w') as j_out:
    for p in products:
        j_out.write(json.dumps(p) + '\n')

In [13]:
# Clean up the PKl files
if os.path.exists('products.pkl'):
    os.remove('products.pkl')
if os.path.exists('category_subcategory_titles.pkl'):
    os.remove('category_subcategory_titles.pkl')
