In [5]:
import pandas as pd
import numpy as np
from dotenv import load_dotenv
import os
import tiktoken
import openai
import json

load_dotenv()
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
SERPAPI_API_KEY = os.getenv('SERPAPI_API_KEY')
WOLFRAM_ALPHA_APPID = os.getenv('WOLFRAM_ALPHA_APPID')
PROMPTLAYER_API_KEY = os.getenv('PROMPTLAYER_API_KEY')
HUGGINGFACEHUB_API_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN')

if os.getenv("OPENAI_API_KEY") is not None:
    print ("OPENAI_API_KEY is ready")
else:
    print ("OPENAI_API_KEY environment variable not found")


# from sqlalchemy import create_engine, text
# engine = create_engine('postgresql://postgres:mysecretpassword@localhost:5432/postgres')

GPT_MODEL = "gpt-3.5-turbo-0613"

OPENAI_API_KEY is ready


In [6]:
import json
import requests
from tenacity import retry, wait_random_exponential, stop_after_attempt
from termcolor import colored


In [7]:
@retry(wait=wait_random_exponential(min=1, max=40), stop=stop_after_attempt(3))
def chat_completion_request(messages, functions=None, function_call=None, temperature=0, model=GPT_MODEL):
    headers = {
        "Content-Type": "application/json",
        "Authorization": "Bearer " + openai.api_key,
    }
    json_data = {"model": model, "messages": messages, "temperature": temperature}
    if functions is not None:
        json_data.update({"functions": functions})
    if function_call is not None:
        json_data.update({"function_call": function_call})
    try:
        response = requests.post(
            "https://api.openai.com/v1/chat/completions",
            headers=headers,
            json=json_data,
        )
        return response
    except Exception as e:
        print("Unable to generate ChatCompletion response")
        print(f"Exception: {e}")
        return e

In [8]:
def num_tokens_from_string(string: str, encoding_name = "cl100k_base") -> int:
    """Returns the number of tokens in a text string."""
    encoding = tiktoken.get_encoding(encoding_name)
    num_tokens = len(encoding.encode(string))
    return num_tokens

In [9]:
# SPLIT TO BATCHES OF 'x' tokens
def generate_batches(improvements, max_tokens=3000):
    """
    This function takes a list of improvements and groups them into batches. Each batch has a token count
    that doesn't exceed the specified max_tokens limit. It returns a dict of batches, where each batch is a
    dict of strings.
    
    Args:
        improvements (dict): A dict of improvement texts.
        max_tokens (int): The maximum number of tokens allowed per batch.

    Returns:
        batches (list): A list of lists, where each inner list represents a batch of strings.
    """
    batches = []
    current_batch = []
    current_tokens = 0

    for imp in improvements:
        if isinstance(imp, float) and np.isnan(imp):  # Skip NaN values
            continue

        print(f"Processing strings: {imp}")  # Add print statement
        imp_tokens = num_tokens_from_string(imp, encoding_name="cl100k_base")
        if current_tokens + imp_tokens + 1 <= max_tokens:
            current_batch.append(imp)
            current_tokens += imp_tokens + 1
        else:
            print(f"Batch size: {len(current_batch)}, Tokens: {current_tokens}")  # Print tokens in the current batch
            batches.append(current_batch)
            current_batch = [imp]
            current_tokens = imp_tokens
    if current_batch:
        print(f"Batch size: {len(current_batch)}, Tokens: {current_tokens}")  # Print tokens in the last batch
        batches.append(current_batch)

    print(f"Number of batches: {len(batches)}")  # Print the number of batches
    return batches


In [10]:
# asin_list_path = './data/external/asin_list.csv'
asin_list_path = '/Users/vladbordei/Documents/Development/ProductExplorer/data/external/asin_list.csv'
asin_list = pd.read_csv(asin_list_path)['asin'].tolist()

In [11]:
# query = text("SELECT * FROM products WHERE asin IN :asin_list")
# product = pd.read_sql_query(query, engine, params={"asin_list": tuple(asin_list)})
# product['product_description_data'] = product['product_description_data'].apply(lambda x: eval(x))

In [12]:
# products_path = "./data/interim/products_with_data.csv"
product_path = "/Users/vladbordei/Documents/Development/ProductExplorer/data/interim/products_with_data.csv"
product = pd.read_csv(product_path)

product['product_description_data'] = product['product_description_data'].apply(lambda x: eval(x))

In [13]:
median_product_price = round(product.price_current_price.median(),0)
median_product_price

147.0

In [14]:
product_summary_dict = {}
what_is_in_the_box_dict = {}
technical_facts_dict = {}
features_dict = {}
how_product_use_dict = {}
where_product_use_dict = {}
user_description_dict = {}
packaging_description_dict = {}
season_description_dict = {}
when_product_use_dict = {}

for i in product.index:
    asin = product['asin'][i]
    data = product['product_description_data'][i]
    
    product_summary_dict[asin] = data.get('Product Summary')
    what_is_in_the_box_dict[asin] = data.get('What is in the box?')
    technical_facts_dict[asin] = data.get('Technical Facts?')
    features_dict[asin] = data.get('Features')
    how_product_use_dict[asin] = data.get('How the product is used?')
    where_product_use_dict[asin] = data.get('Where the product is used?')
    user_description_dict[asin] = data.get('User Description?')
    packaging_description_dict[asin] = data.get('Packaging?')
    season_description_dict[asin] = data.get('Season?')
    when_product_use_dict[asin] = data.get('When the product is used?')
    

list_of_product_data_dictionaries = [product_summary_dict, what_is_in_the_box_dict, technical_facts_dict, features_dict, how_product_use_dict, where_product_use_dict, user_description_dict,season_description_dict,when_product_use_dict]


### Product Summary

In [15]:
# https://towardsdatascience.com/an-introduction-to-openai-function-calling-e47e7cd7680e
functions = [
    {
        "name": "product_summary_function",
        "description": "Provide a detailed description of a product based on observations on simmilar products",
        "parameters": {
            "type": "object",
            "properties": {
                "product_summary": {
                    "type": "string",
                    "description": "Write a single product fact sheet summary of a product based on these observations from an ecommerce site, in 200 words. Exclude brand names."
                },
                "product_summary_outliers": {
                    "type": "string",
                    "description": "Identify if any outliers exist and explain them. Example: B09VBZZ9C8 (<asin>) is an outlier as it includes 3 mini magnetic drawing boards \
                                    instead of a single board, and B085Q3TLF8 stands out for its glowing in the dark feature."\
                }
            },
            "required": ["product_summary", "product_summary_outliers"]
        },
    }
]

In [16]:
messages = [
    {"role": "user", "content": f"```PRODUCT SUMMARIES:``` {product_summary_dict}"}
]

# Send the request to the LLM and get the response
response =  chat_completion_request(
    messages=messages,
    functions=functions,
    function_call={"name": "product_summary_function"},
    temperature=0,
    model=GPT_MODEL
)

# Process the response and store in the dictionary

main_product_summary_response = response.json()["choices"]

In [17]:
main_product_summary_response

[{'index': 0,
  'message': {'role': 'assistant',
   'content': None,
   'function_call': {'name': 'product_summary',
    'arguments': '{\n  "Product Summary": "The Raised Garden Bed is a 6x3x2ft galvanized planter box designed for growing vegetables, flowers, and herbs. It is made of stable thickened steel with an evaluated coating paint, ensuring long-term use without rusting. The open bottom design allows for good drainage, promoting healthy plant growth. Setting up the garden bed is quick and easy, as it does not require any tools. The butterfly screws provided can be fastened by hand in just 5-10 minutes. The garden bed also features a cured edge for safety, preventing any injuries to your hands.",\n  "Outliers": ""\n}'}},
  'finish_reason': 'stop'}]

### What is in the box

In [18]:
# https://towardsdatascience.com/an-introduction-to-openai-function-calling-e47e7cd7680e
functions = [
    {
        "name": "what_is_in_the_box",
        "description": "Provide a detailed description of what is in the box of a product based on knowledge of simmilar products",
        "parameters": {
            "type": "object",
            "properties": {
                "In_the_Box": {
                    "type": "string",
                    "description": "Write a single what is in the box of a product based on these OBSERVATIONS. Select the most common values from OBSERVATIONS."
                },
                "In_the_Box_Outliers": {
                    "type": "string",
                    "description": "Identify if any outliers exist on  what is in the box of a product and explain them. If any products have something extra in the box, say what the ASIN is and what is diffrent"\
                }
            },
            "required": ["In_the_Box", "In_the_Box_Outliers"]
        },
    }
]

In [19]:
messages = [
    {"role": "user", "content": f"{what_is_in_the_box_dict}"}
]

# Send the request to the LLM and get the response
response =  chat_completion_request(
    messages=messages,
    functions=functions,
    function_call={"name": "what_is_in_the_box"},
    temperature=0,
    model=GPT_MODEL
)

# Process the response and store in the dictionary
main_product_what_is_in_the_box_response = response.json()["choices"]



### Technical Facts

In [20]:
# https://towardsdatascience.com/an-introduction-to-openai-function-calling-e47e7cd7680e
functions = [
    {
        "name": "technical_facts_function",
        "description": "write the technical facts / details of a single product from the feat sheets of simmilar products",
        "parameters": {
            "type": "object",
            "properties": {
                "technical_facts": {
                    "type": "string",
                    "description": "Write a single what is in the box of a product based on these OBSERVATIONS. \
                        Select the most common values from OBSERVATIONS."
                },
                "technical_facts_outliers": {
                    "type": "string",
                    "description": "Identify if any outliers exist on  technical facts / details of a single product from the feat sheets of a product and explain them. Say what the ASIN is and what is diffrent"\
                }
            },
            "required": ["technical_facts", "technical_facts_outliers"]
        },
    }
]

In [21]:
messages = [
    {"role": "user", "content": f"{technical_facts_dict}"}
]

# Send the request to the LLM and get the response
response =  chat_completion_request(
    messages=messages,
    functions=functions,
    function_call={"name": "technical_facts_function"},
    temperature=0,
    model=GPT_MODEL
)

# Process the response and store in the dictionary
main_product_technical_facts_response = response.json()["choices"]



### Features

In [22]:
# https://towardsdatascience.com/an-introduction-to-openai-function-calling-e47e7cd7680e
functions = [
    {
        "name": "features_function",
        "description": "write the features of a single product from the feat sheets of simmilar products",
        "parameters": {
            "type": "object",
            "properties": {
                "features": {
                    "type": "string",
                    "description": """ Write the features of a single product from the fact sheets of a product \
                                    based on these OBSERVATIONS. Focus on the benefits that using the product brings. Example output: \
                                        "Learning disguised as play": "Makes learning fun and engaging",\
                                        "Portable and travel-friendly": "Easy to carry and use on the go",\
                                        "No eraser needed": "Effortless erasing and reusing",\
                                        "120 magnetic beads": "Provides ample space for creativity and learning",\
                                        "Easy to erase and reset": "Convenient and time-saving",\
                                        "Stylus stored at the bottom": "Ensures easy storage and transportation",\
                                        "Magnetized beads": "Allows for smooth drawing and tactile learning",\
                                        "Stylus pen": "Enables precise control and encourages proper grip" """
                },
                "features_outliers": {
                    "type": "string",
                    "description": "Identify if any features outliers exist and explain them. Say what the ASIN is and what is diffrent"\
                }
            },
            "required": ["features", "features_outliers"]
        },
    }
]

In [23]:
messages = [
    {"role": "user", "content": f"{features_dict}"}
]

# Send the request to the LLM and get the response
response =  chat_completion_request(
    messages=messages,
    functions=functions,
    function_call={"name": "features_function"},
    temperature=0,
    model=GPT_MODEL
)

# Process the response and store in the dictionary
main_product_features_response = response.json()["choices"]



### How to use the product

In [24]:
functions = [
    {
        "name": "how_product_use_function",
        "description": "write how a single product is used based on the observations  on simmilar products", 
        "parameters": {
            "type": "object",
            "properties": {
                "how_the_product_is_used": {
                    "type": "string",
                    "description": """ Write how a single product is used / can be used based on these \
                                    OBSERVATIONS  on simmilar products. Example output: \
                                    "The product is primarily used for drawing, \
                                    designing, creating, and playing with magnetic beads. \
                                    It can also be used for teaching children how to write and draw, \
                                    taking messages, completing classroom assignments, and practicing alphabets and numbers." """
                },
                "how_the_product_is_used_outliers": {
                    "type": "string",
                    "description": "Identify if any outliers exist on who the product is used and explain them. Say what the ASIN is and what is diffrent"\
                }
            },
            "required": ["how_the_product_is_used", "how_the_product_is_used_outliers"]
        },
    }
]

In [25]:
messages = [
    {"role": "user", "content": f"{how_product_use_dict}"}
]

# Send the request to the LLM and get the response
response =  chat_completion_request(
    messages=messages,
    functions=functions,
    function_call={"name": "how_product_use_function"},
    temperature=0,
    model=GPT_MODEL
)

# Process the response and store in the dictionary
main_product_how_to_use_response = response.json()["choices"]



### Where the product is used

In [26]:
functions = [
    {
        "name": "where_product_use_function",
        "description": "write where a single product is used based on the observations  on simmilar products", 
        "parameters": {
            "type": "object",
            "properties": {
                "where_the_product_is_used": {
                    "type": "string",
                    "description": """ Write where a single product is used based on these \
                                    OBSERVATIONS. Example output: \
                                    "Home, schools, classrooms, long drives, \
                                    doctor's offices, waiting for a flight, restaurants, on-the-go, and travel" """
                },
                "where_the_product_is_used_outliers": {
                    "type": "string",
                    "description": "Identify if any features outliers exist on where the product is used and explain them. Say what the ASIN is and what is diffrent"\
                }
            },
            "required": ["where_the_product_is_used", "where_the_product_is_used_outliers"]
        },
    }
]

In [27]:
messages = [
    {"role": "user", "content": f"{where_product_use_dict}"}
]

# Send the request to the LLM and get the response
response =  chat_completion_request(
    messages=messages,
    functions=functions,
    function_call={"name": "where_product_use_function"},
    temperature=0,
    model=GPT_MODEL
)

# Process the response and store in the dictionary
main_product_where_to_use_response = response.json()["choices"]



### User Description

In [28]:
functions = [
    {
        "name": "user_description_function",
        "description": "write who the user of a single product is based on the observations on simmilar products", 
        "parameters": {
            "type": "object",
            "properties": {
                "user_description": {
                    "type": "string",
                    "description": """ Write a user description of a single product based on these OBSERVATIONS. \
                                    Example output: \
                                    "This product is primarily designed for children, \
                                    including kids, toddlers, and preschoolers, with a broad age range from 3 years old \
                                    up to adults. """
                },
                "user_description_outliers": {
                    "type": "string",
                    "description": "Identify if any outliers exist on wheo the user of the product is and explain them. Say what the ASIN is and what is diffrent"\
                }
            },
            "required": ["user_description", "user_description_outliers"]
        },
    }
]

In [29]:
messages = [
    {"role": "user", "content": f"{user_description_dict}"}
]

# Send the request to the LLM and get the response
response =  chat_completion_request(
    messages=messages,
    functions=functions,
    function_call={"name": "user_description_function"},
    temperature=0,
    model=GPT_MODEL
)

# Process the response and store in the dictionary
main_product_user_description_response = response.json()["choices"]

### Packaging Description

In [30]:
functions = [
    {
        "name": "product_packaging_function",
        "description": "describe the packaging of a product based on the observations on simmilar products", 
        "parameters": {
            "type": "object",
            "properties": {
                "product_packaging": {
                    "type": "string",
                    "description": "describe the packaging of a product based on these OBSERVATIONS." 
                },
                "product_packaging_outliers": {
                    "type": "string",
                    "description": "Identify if any outliers exist on the product packaging and explain them. Say what the ASIN is and what is diffrent"\
                }
            },
            "required": ["product_packaging", "product_packaging_outliers"]
        },
    }
]

In [31]:
messages = [
    {"role": "user", "content": f"{packaging_description_dict}"}
]

# Send the request to the LLM and get the response
response =  chat_completion_request(
    messages=messages,
    functions=functions,
    function_call={"name": "product_packaging_function"},
    temperature=0,
    model=GPT_MODEL
)

# Process the response and store in the dictionary
main_product_packaging_description_response = response.json()["choices"]



### Season Description

In [32]:
functions = [
    {
        "name": "product_seasonal_use_function",
        "description": "write where a single product is used based on the observations on simmilar products", 
        "parameters": {
            "type": "object",
            "properties": {
                "product_seasonal_use": {
                    "type": "string",
                    "description": "describe the seasonal use of a product based on these OBSERVATIONS." 
                },
                "product_seasonal_use_outliers": {
                    "type": "string",
                    "description": "Identify if any outliers exist on the season when the product is used and explain them. Say what the ASIN is and what is diffrent"\
                }
            },
            "required": ["product_seasonal_use", "product_seasonal_use_outliers"]
        },
    }
]

In [33]:
messages = [
    {"role": "user", "content": f"{season_description_dict}"}
]

# Send the request to the LLM and get the response
response =  chat_completion_request(
    messages=messages,
    functions=functions,
    function_call={"name": "product_seasonal_use_function"},
    temperature=0,
    model=GPT_MODEL
)

# Process the response and store in the dictionary
main_product_season_to_use_response = response.json()["choices"]



### When the product is used Description

In [34]:
functions = [
    {
        "name": "when_product_use_function",
        "description": "write where a single product is used based on the observations on simmilar products", 
        "parameters": {
            "type": "object",
            "properties": {
                "when_the_product_is_used": {
                    "type": "string",
                    "description": "describe when a product is used based on these OBSERVATIONS." 
                },
                "when_the_product_is_used_outliers": {
                    "type": "string",
                    "description": "Identify if any outliers exist on when the product is used and explain them. Say what the ASIN is and what is diffrent"\
                }
            },
            "required": ["when_the_product_is_used", "when_the_product_is_used_outliers"]
        },
    }
]

In [35]:
messages = [
    {"role": "user", "content": f"{when_product_use_dict}"}
]

# Send the request to the LLM and get the response
response =  chat_completion_request(
    messages=messages,
    functions=functions,
    function_call={"name": "when_product_use_function"},
    temperature=0,
    model=GPT_MODEL
)

# Process the response and store in the dictionary
main_product_when_to_use_response = response.json()["choices"]



In [36]:
initial_responses = {}
initial_responses['product_summary'] = main_product_summary_response
initial_responses['what_is_in_the_box'] = main_product_what_is_in_the_box_response
initial_responses['technical_facts'] = main_product_technical_facts_response
initial_responses['features'] = main_product_features_response
initial_responses['how_product_use'] = main_product_how_to_use_response
initial_responses['where_product_use'] = main_product_where_to_use_response
initial_responses['user_description'] = main_product_user_description_response
initial_responses['packaging_description'] = main_product_packaging_description_response
initial_responses['season_description'] = main_product_season_to_use_response
initial_responses['when_product_use'] = main_product_when_to_use_response

In [63]:
product_data_interim ={}
for key in initial_responses.keys():
    product_data_interim[key] = eval(initial_responses[key][0]['message']['function_call']['arguments'])

product_data = {}
for main_key in product_data_interim.keys():
    for secondary_key in product_data_interim[main_key].keys():
        product_data[secondary_key] = product_data_interim[main_key][secondary_key]

In [65]:
product_data

{'Product Summary': 'The Raised Garden Bed is a 6x3x2ft galvanized planter box designed for growing vegetables, flowers, and herbs. It is made of stable thickened steel with an evaluated coating paint, ensuring long-term use without rusting. The open bottom design allows for good drainage, promoting healthy plant growth. Setting up the garden bed is quick and easy, as it does not require any tools. The butterfly screws provided can be fastened by hand in just 5-10 minutes. The garden bed also features a cured edge for safety, preventing any injuries to your hands.',
 'Outliers': '',
 'In_the_Box': 'The package includes the FRIZIONE Galvanized Metal Raised Garden Bed and all the necessary hardware for assembly.',
 'In_the_Box_Outliers': 'The package includes the FRIZIONE Galvanized Metal Raised Garden Bed and all the necessary hardware for assembly.',
 'technical_facts': 'The garden bed is made of galvanized steel with an anti-rust coating. It has a 2ft depth and measures 6x3ft in size.

### Export the data

In [66]:
with open('/Users/vladbordei/Documents/Development/ProductExplorer/data/interim/summarised_product_information.json', 'w') as f:
    json.dump(product_data, f)

In [67]:
# Check if the JSON file is valid
import json
with open('/Users/vladbordei/Documents/Development/ProductExplorer/data/interim/summarised_product_information.json') as file:
    try:
        data = json.load(file)
    except json.JSONDecodeError as e:
        print("JSONDecodeError:", e)
        file.seek(0)
        lines = file.readlines()
        for i, line in enumerate(lines):
            if e.lineno <= i + 1:
                col = e.colno - 1
                print(f"{i + 1}: {line}")
                print(" " * col + "^")
                break

#################################
########### TESTING #############

In [1]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine, text
import json
import os

import tiktoken
from openai.embeddings_utils import get_embedding
from sklearn.cluster import AgglomerativeClustering

import openai
from dotenv import load_dotenv
load_dotenv()
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
if os.getenv("OPENAI_API_KEY") is not None:
    print ("OPENAI_API_KEY is ready")
else:
    print ("OPENAI_API_KEY environment variable not found")

# Create an SQLAlchemy engine to connect to the database
engine = create_engine('postgresql://postgres:mysecretpassword@localhost/postgres')

# Read the ASIN values from the CSV file
asin_list = pd.read_csv('asin_list.csv')['asin'].tolist()

OPENAI_API_KEY is ready


In [4]:
# Read data about the product

with open('/Users/vladbordei/Documents/Development/oaie2/summarised_product_information.json') as file:
    json_string = file.read()
    general_product_data = json.loads(json_string)

In [5]:
# Reduce the token cost of general_product_data dictionary
# Remove "Outliers" from general_product_data dictionary
if 'Outliers' in general_product_data['product_summary_dict']:
    del general_product_data['product_summary_dict']['Outliers']

if 'Outliers' in general_product_data['what_is_in_the_box_dict']:
    del general_product_data['what_is_in_the_box_dict']['Outliers']

if 'Outliers' in general_product_data['technical_facts_dict']:
    del general_product_data['technical_facts_dict']['Outliers']

if 'Outliers' in general_product_data['features_dict']:
    del general_product_data['features_dict']['Outliers']

# Optional: If you want to remove the 'OutliersExplanation' as well
if 'OutliersExplanation' in general_product_data['technical_facts_dict']:
    del general_product_data['technical_facts_dict']['OutliersExplanation']

if 'OutliersExplanation' in general_product_data['features_dict']:
    del general_product_data['features_dict']['OutliersExplanation']


In [6]:
query = text("SELECT * FROM products WHERE asin IN :asin_list")

product = pd.read_sql_query(query, engine, params={"asin_list": tuple(asin_list)})

product['product_description_data'] = product['product_description_data'].apply(lambda x: eval(x))
median_product_price = round(product.price_current_price.median(),0)

In [7]:
def remove_brand(strings, brand_column):
    cleaned_strings = []
    for string, brand in zip(strings, brand_column):
        cleaned_string = string.replace(brand, '').strip()
        cleaned_strings.append(cleaned_string)
    return cleaned_strings

product['product_information_title'] = remove_brand(product.title, product.product_information_brand)
product_tile = product['product_information_title'].iloc[0]

In [20]:
User_Prompt_1 = """\
Check for repetitive or redundant information and reduce the number of those observations.\
Remove ASIN refferences.\
Understanding the product is very important so don't reduce the information provided.\
Resulting dictionary should have a token count of 70% of curent dictionary.\
\
```GENERAL PRODUCT DATA: ```{'product_summary_dict': {'Product Summary': 'These magnetic drawing boards offer a mess-free and creative play experience for children, promoting problem-solving, hand-eye coordination, and imagination. They feature magnetized beads and a stylus pen for easy drawing and erasing, and are made of child-safe, non-toxic, BPA-free, and lead-free materials. Some boards are portable and travel-friendly, making them great companions for long drives and trips. Some boards also offer a tactile drawing experience and help children learn and write the alphabet correctly. These boards are suitable for both home and classroom use, and make great gifts for toddlers and children.'},
 'what_is_in_the_box_dict': {'What is in the box': 'Magnetic drawing board, magnetic stylus pen'},
 'technical_facts_dict': {'Technical Facts': ['Made of quality plastic',
   'Child safe',
   'Non-toxic',
   'BPA-free',
   'Lead-free',
   'Tested in CPC accredited lab',
   'Made of high-quality ABS plastic',
   'Fully contained magnetic beads',
   'Suitable for ages 3 to adult']},
 'features_dict': {'Features': {'No-mess': 'Keeps the area clean while in use',
   'Learning disguised as play': 'Makes learning fun and engaging',
   'Portable and travel-friendly': 'Easy to carry and use on the go',
   'No eraser needed': 'Effortless erasing and reusing',
   '120 magnetic beads': 'Provides ample space for creativity and learning',
   'Easy to erase and reset': 'Convenient and time-saving',
   'Stylus stored at the bottom': 'Ensures easy storage and transportation',
   'Tactile drawing experience': 'Provides a hands-on learning experience',
   'Helps children learn and write the alphabet correctly': 'Promotes early writing education',
   'Great tool for early writing education': 'Encourages learning and fun',
   'Brings more learning fun for kids': 'Engages children in the learning process',
   'Magnetized beads': 'Allows for smooth drawing and tactile learning',
   'Stylus pen': 'Enables precise control and encourages proper grip',
   'Reusable and erasable': 'Environmentally friendly and cost-effective',
   'Portable and lightweight': 'Comfortable to use and carry',
   'Versatile use': 'Applicable to various learning activities',
   'STEM learning': 'Promotes science, technology, engineering, and math skills',
   'Sensory-based': 'Stimulates multiple senses for enhanced learning',
   'Unlimited potential for drawing': 'Encourages creativity and exploration',
   'No cleanup': 'Minimizes maintenance and hassle',
   'Easy storage for magnetic stylus': 'Prevents loss and keeps workspace organized',
   'Versatile use (drawing, practicing letters/numbers/shapes)': 'Adaptable to different learning activities',
   'Suitable for home or classroom use': 'Applicable to different environments',
   'Glows in the dark': 'Adds a fun and unique element to learning',
   'Stimulates curiosity and creativity': 'Inspires imagination and problem-solving',
   'Develops pencil grasp': 'Helps improve handwriting and fine motor skills'}},
 'how_product_use_dict': {'How the product is used': 'The product is primarily used for drawing, designing, creating, and playing with magnetic beads. It can also be used for teaching children how to write and draw, taking messages, completing classroom assignments, and practicing alphabets and numbers. Users can point the magnetic nib of the stylus on each hole to move the magnetic beads to the surface. There are no significant outliers in the given observations.'},
 'where_product_use_dict': {'Where the product is used': "Home, schools, classrooms, long drives, flights, train rides, doctor's offices, waiting for a flight, restaurants, on-the-go, and travel"},
 'user_description_dict': {'User Description': "This dataset contains a variety of products that are designed for children of different ages, from toddlers to school-age preschool and kindergarten children. Some products are also suitable for families to use together. The age range of the products varies, with one product ('B01M4OV4Q4') suitable for ages 3 to adult. There are no clear outliers in this dataset."}}
"""

In [21]:
AI_Prompt_1 = """\
{
'product_summary_dict': {'Product Summary': 'These magnetic drawing boards provide a creative and mess-free play experience, enhancing problem-solving, hand-eye coordination, and imagination. The boards, made of child-safe, non-toxic, BPA-free, and lead-free materials, are great companions for travel and also promote learning of alphabet writing. They are suitable for both home and classroom use.'},
 
'what_is_in_the_box_dict': {'What is in the box': 'Magnetic drawing board, magnetic stylus pen'},

'technical_facts_dict': {'Technical Facts': ['Made of quality ABS plastic',
   'Child safe',
   'Non-toxic',
   'BPA-free',
   'Lead-free',
   'CPC accredited lab tested',
   'Fully contained magnetic beads',
   'Suitable for ages 3 to adult']},

'features_dict': {'Features': {'No-mess': 'Keeps the area clean while in use',
   'Learning disguised as play': 'Makes learning engaging',
   'Portable': 'Easy to carry and use on the go',
   'Easy to erase and reset': 'Convenient and environmentally friendly',
   'Stylus stored at the bottom': 'Prevents loss and keeps workspace organized',
   'Tactile drawing experience': 'Promotes a hands-on learning experience',
   'Helps children learn and write the alphabet correctly': 'Encourages early writing education',
   'Versatile use': 'Applicable to various learning activities',
   'STEM learning': 'Enhances science, technology, engineering, and math skills',
   'Sensory-based': 'Stimulates multiple senses for enhanced learning',
   'Glows in the dark': 'Adds a unique element to learning',
   'Develops pencil grasp': 'Helps improve handwriting and fine motor skills'}},

'how_product_use_dict': {'How the product is used': 'The product is used for drawing, teaching, and playing with magnetic beads. Users point the magnetic nib of the stylus on each hole to move the magnetic beads to the surface.'},

'where_product_use_dict': {'Where the product is used': 'Home, schools, travel, and waiting areas'},

'user_description_dict': {'User Description': 'The product is designed for different ages, from toddlers to adults. It is ideal for use by families and in classrooms.'}
}
"""

In [22]:
User_Prompt_2 = f"""\
Check for repetitive or redundant information and reduce the number of those observations.\
Remove ASIN refferences.\
Understanding the product is very important so don't reduce the information provided.\
Resulting dictionary should have a token count of 70% of curent dictionary.\
\
    ```GENERAL PRODUCT DATA: ```{general_product_data},\
    """

In [23]:
try:
    response = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                messages=[
                    {"role": "user", "content": User_Prompt_1},
                    {"role": "assistant", "content": AI_Prompt_1},
                    {"role": "user", "content": User_Prompt_2} ],
                temperature=0.2,
                api_key=OPENAI_API_KEY
    )
    chatbot_response = response["choices"][0]["message"]["content"]
    print(chatbot_response)
except Exception as e:
    print(f"An error occurred during the OpenAI ChatCompletion API call: {e}")



{
'product_summary_dict': {'Product Summary': 'These magnetic drawing boards provide a mess-free and creative play experience for children, promoting problem-solving, hand-eye coordination, and imagination. They feature magnetized beads and a stylus pen for easy drawing and erasing, and are made of child-safe, non-toxic, BPA-free, and lead-free materials. Some boards are portable and travel-friendly, making them great companions for long drives and trips. Some boards also offer a tactile drawing experience and help children learn and write the alphabet correctly. These boards are suitable for both home and classroom use, and make great gifts for toddlers and children.'},
 
'what_is_in_the_box_dict': {'What is in the box': 'Magnetic drawing board, magnetic stylus pen'},

'technical_facts_dict': {'Technical Facts': ['Made of quality ABS plastic',
   'Child safe',
   'Non-toxic',
   'BPA-free',
   'Lead-free',
   'CPC accredited lab tested',
   'Fully contained magnetic beads',
   'Suitab

In [31]:
import json

def process_json_string(json_string):
    # Replace single quotes with double quotes
    try:
        json_string = json_string.replace("'", '"')
    except:
        pass
    # Load the processed JSON string
    data = json.loads(json_string)
    
    return data

simplified_product_data = process_json_string(chatbot_response)

In [32]:
simplified_product_data['median_price_usd'] = median_product_price
simplified_product_data['product_title']  = product_tile

In [33]:
with open('summarised_simplified_product_information.json', 'w') as f:
    json.dump(simplified_product_data, f)

In [34]:
# Check if the JSON file is valid
import json
with open('/Users/vladbordei/Documents/Development/oaie2/summarised_simplified_product_information.json') as file:
    try:
        data = json.load(file)
    except json.JSONDecodeError as e:
        print("JSONDecodeError:", e)
        file.seek(0)
        lines = file.readlines()
        for i, line in enumerate(lines):
            if e.lineno <= i + 1:
                col = e.colno - 1
                print(f"{i + 1}: {line}")
                print(" " * col + "^")
                break