In [102]:
import pandas as pd
import numpy as np
from dotenv import load_dotenv
import os
import tiktoken
import openai
import json

load_dotenv()
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
SERPAPI_API_KEY = os.getenv('SERPAPI_API_KEY')
WOLFRAM_ALPHA_APPID = os.getenv('WOLFRAM_ALPHA_APPID')
PROMPTLAYER_API_KEY = os.getenv('PROMPTLAYER_API_KEY')
HUGGINGFACEHUB_API_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN')

if os.getenv("OPENAI_API_KEY") is not None:
    print ("OPENAI_API_KEY is ready")
else:
    print ("OPENAI_API_KEY environment variable not found")


from sqlalchemy import create_engine, text
engine = create_engine('postgresql://postgres:mysecretpassword@localhost:5432/postgres')

In [103]:
def num_tokens_from_string(string: str, encoding_name = "cl100k_base") -> int:
    """Returns the number of tokens in a text string."""
    encoding = tiktoken.get_encoding(encoding_name)
    num_tokens = len(encoding.encode(string))
    return num_tokens

In [104]:
# SPLIT TO BATCHES OF 'x' tokens
def generate_batches(improvements, max_tokens=3000):
    """
    This function takes a list of improvements and groups them into batches. Each batch has a token count
    that doesn't exceed the specified max_tokens limit. It returns a dict of batches, where each batch is a
    dict of strings.
    
    Args:
        improvements (dict): A dict of improvement texts.
        max_tokens (int): The maximum number of tokens allowed per batch.

    Returns:
        batches (list): A list of lists, where each inner list represents a batch of strings.
    """
    batches = []
    current_batch = []
    current_tokens = 0

    for imp in improvements:
        if isinstance(imp, float) and np.isnan(imp):  # Skip NaN values
            continue

        print(f"Processing strings: {imp}")  # Add print statement
        imp_tokens = num_tokens_from_string(imp, encoding_name="cl100k_base")
        if current_tokens + imp_tokens + 1 <= max_tokens:
            current_batch.append(imp)
            current_tokens += imp_tokens + 1
        else:
            print(f"Batch size: {len(current_batch)}, Tokens: {current_tokens}")  # Print tokens in the current batch
            batches.append(current_batch)
            current_batch = [imp]
            current_tokens = imp_tokens
    if current_batch:
        print(f"Batch size: {len(current_batch)}, Tokens: {current_tokens}")  # Print tokens in the last batch
        batches.append(current_batch)

    print(f"Number of batches: {len(batches)}")  # Print the number of batches
    return batches


In [105]:
asin_list_path = './data/external/asin_list.csv'
asin_list = pd.read_csv(asin_list_path)['asin'].tolist()
query = text("SELECT * FROM products WHERE asin IN :asin_list")
product = pd.read_sql_query(query, engine, params={"asin_list": tuple(asin_list)})
product['product_description_data'] = product['product_description_data'].apply(lambda x: eval(x))

In [None]:
median_product_price = round(product.price_current_price.median(),0)
median_product_price

In [None]:
def remove_brand(strings, brand_column):
    cleaned_strings = []
    for string, brand in zip(strings, brand_column):
        cleaned_string = string.replace(brand, '').strip()
        cleaned_strings.append(cleaned_string)
    return cleaned_strings

product['product_information_title'] = remove_brand(product.title, product.product_information_brand)
product_tile = product['product_information_title'].iloc[0]
product_tile

In [107]:
product_summary_dict = {}
what_is_in_the_box_dict = {}
technical_facts_dict = {}
features_dict = {}
how_product_use_dict = {}
where_product_use_dict = {}
user_description_dict = {}
#packaging_description_dict = {}
#season_description_dict = {}

for i in product.index:
    asin = product['asin'][i]
    data = product['product_description_data'][i]
    
    product_summary_dict[asin] = data.get('Product Summary')
    what_is_in_the_box_dict[asin] = data.get('What is in the box')
    technical_facts_dict[asin] = data.get('Technical Facts')
    features_dict[asin] = data.get('Features')
    how_product_use_dict[asin] = data.get('How the product is used')
    where_product_use_dict[asin] = data.get('Where the product is used')
    user_description_dict[asin] = data.get('User Description')
    #packaging_description_dict[asin] = data.get('Packaging')
    #season_description_dict[asin] = data.get('Season')

list_of_product_data_dictionaries = [product_summary_dict, what_is_in_the_box_dict, technical_facts_dict, features_dict, how_product_use_dict, where_product_use_dict, user_description_dict]

chatbot_responses = {}


### Product Summary

In [108]:
# https://github.com/openai/openai-cookbook/blob/main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb


User_Prompt_1 = """
Format your response as a JSON object with: \
{\
"Technical Facts"\
}
Write a single product fact sheet summary of a product based on these observations,\
PRODUCT SUMMARIES from an ecommerce site delimited with triple backticks.```\
Identify if any outliers exist and explain them. Ignore 'unknown' values. Max 100 words.\

PRODUCT SUMMARIES ```\
{\
'B07XCRT49W': 'A portable and mess-free magnetic drawing pad that stimulates learning through play. Perfect for long drives and travel.',\
'B09VBZZ9C8': 'This package includes 3 mini magnetic drawing boards with 120 magnetic beads and a stylus for easy drawing and erasing. Suitable for children over 6 years old, it can be used as an educational gift to stimulate and develop sensory receptors while relieving mental stress and improving focus. The board is easy to erase and reset, and the stylus is stored at the bottom for easy storage and transportation. Made of quality plastic, it is safe for children to use and encourages imagination and creativity.',\
'B07XCRVK2Y': 'A portable and mess-free magnetic drawing pad that keeps kids entertained and stimulates learning through magnetism. No eraser needed, just use the side of the pen to push the magnetic beads back down. Makes a fun and unique gift for toddlers and the whole family.',\
'B07Q899BPB': 'A magnetic drawing board that promotes problem-solving, hand-eye coordination, and creativity in children. The board features magnetized beads and a stylus pen for easy drawing and erasing. It is portable and lightweight, making it a great travel companion. The board can be used for diverse applications, including message taking, classroom assignments, and alphabet and number practicing. It is made of child-safe, non-toxic, BPA-free, and lead-free materials, ensuring quality and safety.',\
'B09R9MMW6J': "The Original Magnatab is a sensory-based creative play experience designed with the Montessori learning style in mind. Children can use the magnetic stylus to draw pictures, letters, numbers, and shapes while feeling the pull of the magnets, hearing their clicking sound, and seeing what they are drawing. The beads are magically 'erased' when pushed down by the tip of a finger or the swipe of the side of the stylus. Great for at-home use or for classroom activities!",\
'B085Q3TLF8': 'A magnetic drawing board that offers unlimited potential for kids to draw and create. Sensory-based with a satisfying clicking sound and glowing in the dark. Easy to clean up and suitable for at-home or classroom use.'\
}```"""

AI_Prompt_1 = """\
{\
"Product Summary": "These magnetic drawing boards offer a portable, \
mess-free, and creative play experience for children, with a focus on sensory stimulation and learning. \
They promote problem-solving, hand-eye coordination, and imagination. \
Made of safe, non-toxic materials, these boards can be used for drawing, \
writing, and practicing letters and numbers. They are suitable for both home and classroom use.",\
"Outliers": "B09VBZZ9C8 is an outlier as it includes 3 mini magnetic drawing boards \
instead of a single board, and B085Q3TLF8 stands out for its glowing in the dark feature."\
}"""


# Get the current batch of up to batch_size items
User_Prompt_2 = f"```PRODUCT SUMMARIES:``` {product_summary_dict}"

# Send the prompt to the chatbot and get the response
response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "user", "content": User_Prompt_1},
                {"role": "assistant", "content": AI_Prompt_1},
                {"role": "user", "content": User_Prompt_2} ],
            temperature=0.2
)

# Process the response and store in the dictionary
chatbot_responses['product_summary_dict'] = response["choices"][0]["message"]["content"]
print(product_summary_dict)
print(chatbot_responses['product_summary_dict'])

{'B07XCRT49W': 'A portable and mess-free magnetic drawing pad that stimulates learning through play. Perfect for long drives and travel.', 'B09VBZZ9C8': 'This package includes 3 mini magnetic drawing boards with 120 magnetic beads and a stylus for easy drawing and erasing. Suitable for children over 6 years old, it can be used as an educational gift to stimulate and develop sensory receptors while relieving mental stress and improving focus. The board is easy to erase and reset, and the stylus is stored at the bottom for easy storage and transportation. Made of quality plastic, it is safe for children to use and encourages imagination and creativity.', 'B07XCRVK2Y': 'A portable and mess-free magnetic drawing pad that keeps kids entertained and stimulates learning through magnetism. No eraser needed, just use the side of the pen to push the magnetic beads back down. Makes a fun and unique gift for toddlers and the whole family.', 'B07ZJ8FDPK': 'The Magblock Magnetic Writing Board is a u

### What is in the box

In [109]:

User_Prompt_1 = """
Format your response as a JSON object with: \
{\
'What is in the box'\
}\

Write a single what is in the box of a product based on these OBSERVATIONS. Select the most common values from OBSERVATIONS. \
OBSERVATIONS delimited with triple backticks.```\
Identify if any outliers exist and explain them. Ignore 'unknown' values."\

"OBSERVATIONS": ```\
{\
'B07XCRT49W': 'Magnetic board, magnetic stylus pen',\
'B09VBZZ9C8': '3 mini magnetic drawing boards, stylus',\
'B07XCRVK2Y': 'Magnetic drawing pad, magnetic stylus pen',\
'B07ZJ8FDPK': 'Magblock Magnetic Writing Board',\
'B09R9MMW6J': 'The Original Magnatab, magnetic stylus',\
'B01M4OV4Q4': 'Magnetic drawing board, stylus pen'\
}```"""

AI_Prompt_1 = """\
{\
"What is in the box": "Magnetic drawing board, magnetic stylus pen",\
"Outliers": [\
{\
"ProductID": "B07ZJ8FDPK",\
"Reason": "Incomplete information, missing stylus"\
},\
{\
"ProductID": "B085Q4W3WX",\
"Reason": "Incomplete information, missing drawing board"\
}\
]\
}"""


# Get the current batch of up to batch_size items
User_Prompt_2 = f"```OBSERVATIONS``` {what_is_in_the_box_dict}"

# Send the prompt to the chatbot and get the response
response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "user", "content": User_Prompt_1},
                {"role": "assistant", "content": AI_Prompt_1},
                {"role": "user", "content": User_Prompt_2} ],
            temperature=0.4
)

# Process the response and store in the dictionary
chatbot_responses['what_is_in_the_box_dict'] = response["choices"][0]["message"]["content"]
print(what_is_in_the_box_dict)
print(chatbot_responses['what_is_in_the_box_dict'])

{'B07XCRT49W': 'Magnetic board, magnetic stylus pen', 'B09VBZZ9C8': '3 mini magnetic drawing boards, stylus', 'B07XCRVK2Y': 'Magnetic drawing pad, magnetic stylus pen', 'B07ZJ8FDPK': 'Magblock Magnetic Writing Board', 'B07Q899BPB': 'Magnetic drawing board, stylus pen', 'B09R9MMW6J': 'The Original Magnatab, magnetic stylus', 'B085Q4W3WX': 'Stylus', 'B07X7YFZWG': 'Magnetic drawing pad, magnetic stylus pen', 'B085Q3TLF8': 'Magnetic drawing board, stylus', 'B07ZJ8HRN8': 'Magblock Magnetic Writing Board', 'B07QXMLSS5': 'Magnetic drawing board, stylus pen', 'B01M4OV4Q4': 'Magnetic drawing board, stylus pen'}
{"What is in the box": "Magnetic drawing board, magnetic stylus pen","Outliers": [{"ProductID": "B07ZJ8FDPK","Reason": "Incomplete information, missing stylus"},{"ProductID": "B085Q4W3WX","Reason": "Incomplete information, missing drawing board"},{"ProductID": "B07ZJ8HRN8","Reason": "Incomplete information, missing stylus"}]}


### Technical Facts

In [110]:
User_Prompt_1 = """
Format your response as a JSON object with: \
{\
"Technical Facts"
}
Write the technical facts / details of a single product from the feat sheets of a product\
based on these OBSERVATIONS. Don't write the benefits, focus only on characteristics. \
OBSERVATIONS delimited with triple backticks.```\
Identify if any outliers exist and explain them. Ignore 'unknown' values.\

"OBSERVATIONS": ```\
{\
'B07XCRT49W': 'No-mess, learning disguised as play, portable and travel-friendly, no eraser needed',\
'B09VBZZ9C8': '120 magnetic beads, easy to erase and reset, stylus stored at the bottom for easy storage and transportation',\
'B07XCRVK2Y': 'No-mess, learning disguised as play, portable and travel-friendly, no eraser needed',\
'B07ZJ8FDPK': 'Tactile drawing experience, helps children learn and write the alphabet correctly, great tool for early writing education, brings more learning fun for kids',\
'B07Q899BPB': 'Magnetized beads, stylus pen, reusable and erasable, portable and lightweight, versatile use, STEM learning',\
'B09R9MMW6J': 'Sensory-based, unlimited potential for drawing, no cleanup, learning and fun, easy storage for magnetic stylus',\
'B085Q4W3WX': 'Sensory-based, versatile use (drawing, practicing letters/numbers/shapes), suitable for home or classroom use',\
'B07X7YFZWG': 'No-mess, learning disguised as play, portable, no eraser needed',\
'B085Q3TLF8': 'Unlimited potential for creativity, sensory-based, glows in the dark, easy cleanup, suitable for at-home or classroom use',\
'B07ZJ8HRN8': 'Tactile drawing experience, helps children learn and write the alphabet correctly, great tool for early writing education, brings more learning fun for kids',\
'B01M4OV4Q4': 'Mess-free, travel-ready, erasable, stimulates curiosity and creativity, develops pencil grasp'\
}```"""

AI_Prompt_1 = """\
{\
"Technical Facts": [\
"120 magnetic beads",\
"Stylus pen",\
"Reusable and erasable",\
"Portable and lightweight",\
"Stylus stored at the bottom",\
"Magnetized bead",\
],\
"Outliers": [\
{\
"ASIN": "B085Q3TLF8",\
"Feature": "Glows in the dark"\
}\
],\
"OutliersExplanation": {\
"GlowsInTheDark": "The 'glows in the dark' feature is an outlier because \
it's unique to one product (ASIN 'B085Q3TLF8') and not mentioned in \
other products' features. This characteristic allows the product \
to emit light in dark environments, making it more visually \
appealing and engaging for users in low-light situations."\
}\
}"""

# Get the current batch of up to batch_size items
User_Prompt_2 = f"```OBSERVATIONS``` {technical_facts_dict}"

# Send the prompt to the chatbot and get the response
response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "user", "content": User_Prompt_1},
                {"role": "assistant", "content": AI_Prompt_1},
                {"role": "user", "content": User_Prompt_2} ],
            temperature=0.2
)

# Process the response and store in the dictionary
chatbot_responses['technical_facts_dict'] = response["choices"][0]["message"]["content"]
print(technical_facts_dict)
print(chatbot_responses['technical_facts_dict'])

{'B07XCRT49W': 'unknown', 'B09VBZZ9C8': 'Made of quality plastic, safe for children to use', 'B07XCRVK2Y': 'unknown', 'B07ZJ8FDPK': 'unknown', 'B07Q899BPB': 'Child safe, non-toxic, BPA-free, lead-free, tested in CPC accredited lab', 'B09R9MMW6J': 'unknown', 'B085Q4W3WX': 'unknown', 'B07X7YFZWG': 'unknown', 'B085Q3TLF8': 'unknown', 'B07ZJ8HRN8': 'unknown', 'B07QXMLSS5': 'Child safe, non-toxic, BPA-free, lead-free, tested in CPC accredited lab', 'B01M4OV4Q4': 'Made of high-quality ABS plastic, fully contained magnetic beads, suitable for ages 3 to adult'}
{"Technical Facts": ["Made of quality plastic","Child safe","Non-toxic","BPA-free","Lead-free","Tested in CPC accredited lab","Made of high-quality ABS plastic","Fully contained magnetic beads","Suitable for ages 3 to adult"],"Outliers": [],"OutliersExplanation": {}}


### Features

In [111]:
User_Prompt_1 = """
Format your response as a JSON object with: \
{\
"Features"\
}
Write the features of a single product from the feat sheets of a product\
based on these OBSERVATIONS. Focus on the benefits that using the product brings. \
OBSERVATIONS delimited with triple backticks.```\
Identify if any outliers exist and explain them. Ignore 'unknown' values.\

"OBSERVATIONS": ```\
{\
'B07XCRT49W': 'No-mess, learning disguised as play, portable and travel-friendly, no eraser needed',
'B09VBZZ9C8': '120 magnetic beads, easy to erase and reset, stylus stored at the bottom for easy storage and transportation',
'B07Q899BPB': 'Magnetized beads, stylus pen, reusable and erasable, portable and lightweight, versatile use, STEM learning',
'B09R9MMW6J': 'Sensory-based, unlimited potential for drawing, no cleanup, learning and fun, easy storage for magnetic stylus',
'B085Q4W3WX': 'Sensory-based, versatile use (drawing, practicing letters/numbers/shapes), suitable for home or classroom use',
'B07X7YFZWG': 'No-mess, learning disguised as play, portable, no eraser needed',
'B085Q3TLF8': 'Unlimited potential for creativity, sensory-based, glows in the dark, easy cleanup, suitable for at-home or classroom use',
'B07QXMLSS5': 'Magnetized beads, stylus pen, reusable and erasable, portable and lightweight, versatile use, STEM learning',
'B01M4OV4Q4': 'Mess-free, travel-ready, erasable, stimulates curiosity and creativity, develops pencil grasp'
}```"""

AI_Prompt_1 = """\
{\
"Features": {\
"No-mess": "Keeps the area clean while in use",\
"Learning disguised as play": "Makes learning fun and engaging",\
"Portable and travel-friendly": "Easy to carry and use on the go",\
"No eraser needed": "Effortless erasing and reusing",\
"120 magnetic beads": "Provides ample space for creativity and learning",\
"Easy to erase and reset": "Convenient and time-saving",\
"Stylus stored at the bottom": "Ensures easy storage and transportation",\
"Magnetized beads": "Allows for smooth drawing and tactile learning",\
"Stylus pen": "Enables precise control and encourages proper grip",\
"Reusable and erasable": "Environmentally friendly and cost-effective",\
"Portable and lightweight": "Comfortable to use and carry",\
"Versatile use": "Applicable to various learning activities",\
"STEM learning": "Promotes science, technology, engineering, and math skills",\
"Sensory-based": "Stimulates multiple senses for enhanced learning",\
"Unlimited potential for drawing": "Encourages creativity and exploration",\
"No cleanup": "Minimizes maintenance and hassle",\
"Easy storage for magnetic stylus": "Prevents loss and keeps workspace organized",\
"Suitable for home or classroom use": "Adaptable to different environments",\
"Glows in the dark": "Adds a fun and unique element to learning",\
"Easy cleanup": "Reduces time spent on maintenance",\
"Stimulates curiosity and creativity": "Inspires imagination and problem-solving",\
"Develops pencil grasp": "Helps improve handwriting and fine motor skills"\
},\
"Outliers": [\
{\
"ASIN": "B085Q3TLF8",\
"Feature": "Glows in the dark"\
}\
],\
"OutliersExplanation": {\
"GlowsInTheDark": "The 'glows in the dark' feature is an outlier because it's \
unique to one product (ASIN 'B085Q3TLF8') and not mentioned in other products' features. \
This characteristic allows the product to emit light in dark environments, \
making it more visually appealing and engaging for users in low-light situations."\
}\
}"""

# Get the current batch of up to batch_size items
User_Prompt_2 = f"```OBSERVATIONS``` {features_dict}"

# Send the prompt to the chatbot and get the response
response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "user", "content": User_Prompt_1},
                {"role": "assistant", "content": AI_Prompt_1},
                {"role": "user", "content": User_Prompt_2} ],
            temperature=0.2
)

# Process the response and store in the dictionary
chatbot_responses['features_dict'] = response["choices"][0]["message"]["content"]
print(features_dict)
print(chatbot_responses['features_dict'])

{'B07XCRT49W': 'No-mess, learning disguised as play, portable and travel-friendly, no eraser needed', 'B09VBZZ9C8': '120 magnetic beads, easy to erase and reset, stylus stored at the bottom for easy storage and transportation', 'B07XCRVK2Y': 'No-mess, learning disguised as play, portable and travel-friendly, no eraser needed', 'B07ZJ8FDPK': 'Tactile drawing experience, helps children learn and write the alphabet correctly, great tool for early writing education, brings more learning fun for kids', 'B07Q899BPB': 'Magnetized beads, stylus pen, reusable and erasable, portable and lightweight, versatile use, STEM learning', 'B09R9MMW6J': 'Sensory-based, unlimited potential for drawing, no cleanup, learning and fun, easy storage for magnetic stylus', 'B085Q4W3WX': 'Sensory-based, versatile use (drawing, practicing letters/numbers/shapes), suitable for home or classroom use', 'B07X7YFZWG': 'No-mess, learning disguised as play, portable, no eraser needed', 'B085Q3TLF8': 'Unlimited potential f

### How to use the product

In [112]:
User_Prompt_1 = """
Format your response as a JSON object with: \
{\
"How the product is used"\
}
"Write how a single product is used / can be used based on these OBSERVATIONS.\
OBSERVATIONS delimited with triple backticks.```\
Identify if any outliers exist and explain them. Ignore 'unknown' values.\

"OBSERVATIONS": ```\
{\
'B07XCRT49W': 'Pointing the magnetic nib of the stylus on each hole to move the magnetic beads to the surface',
'B09VBZZ9C8': 'Drawing, designing, creating',
'B07XCRVK2Y': 'Drawing and playing with magnetic beads',
'B07ZJ8FDPK': 'Teaching children how to write and draw',
'B07Q899BPB': 'Drawing, message taking, classroom assignments, alphabet and number practicing',
'B09R9MMW6J': 'Drawing pictures, letters, numbers, and shapes',
'B085Q3TLF8': 'Drawing and creating',
'B01M4OV4Q4': 'Drawing, sketching, passing time'
}```"""

AI_Prompt_1 = """\
{\
"How the product is used": "The product is primarily used for drawing, \
designing, creating, and playing with magnetic beads. \
It can also be used for teaching children how to write and draw, \
taking messages, completing classroom assignments, and practicing alphabets and numbers. \
There are no significant outliers in the given observations."\
}"""

# Get the current batch of up to batch_size items
User_Prompt_2 = f"```OBSERVATIONS``` {how_product_use_dict}"

# Send the prompt to the chatbot and get the response
response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "user", "content": User_Prompt_1},
                {"role": "assistant", "content": AI_Prompt_1},
                {"role": "user", "content": User_Prompt_2} ],
            temperature=0.2
)

# Process the response and store in the dictionary
chatbot_responses['how_product_use_dict'] = response["choices"][0]["message"]["content"]
print(how_product_use_dict)
print(chatbot_responses['how_product_use_dict'])

{'B07XCRT49W': 'Pointing the magnetic nib of the stylus on each hole to move the magnetic beads to the surface', 'B09VBZZ9C8': 'Drawing, designing, creating', 'B07XCRVK2Y': 'Drawing and playing with magnetic beads', 'B07ZJ8FDPK': 'Teaching children how to write and draw', 'B07Q899BPB': 'Drawing, message taking, classroom assignments, alphabet and number practicing', 'B09R9MMW6J': 'Drawing pictures, letters, numbers, and shapes', 'B085Q4W3WX': 'Drawing, practicing letters/numbers/shapes', 'B07X7YFZWG': 'Pointing the magnetic nib of the stylus on each hole to move the magnetic beads to the surface', 'B085Q3TLF8': 'Drawing and creating', 'B07ZJ8HRN8': 'Teaching children how to write and draw', 'B07QXMLSS5': 'Drawing, message taking, classroom assignments, alphabet and number practicing', 'B01M4OV4Q4': 'Drawing, sketching, passing time'}
{"How the product is used": "The product is primarily used for drawing, designing, creating, and playing with magnetic beads. It can also be used for teac

### Where the product is used

In [113]:
User_Prompt_1 = """
Format your response as a JSON object with: \
{\
"Where the product is used"\
}
"Write where a single product is used based on these OBSERVATIONS.\
OBSERVATIONS delimited with triple backticks.```\
Identify if any outliers exist and explain them. Ignore 'unknown' values.\

"OBSERVATIONS": ```\
{\
'B09VBZZ9C8': "Long drives, doctor's office, waiting for a flight, classroom activities",
'B07ZJ8FDPK': 'Pre-schools, homes, classrooms',
'B07Q899BPB': 'Home, school, travel',
'B09R9MMW6J': 'At home or in the classroom',
'B01M4OV4Q4': "Car, restaurants, doctor's offices, on-the-go"
}```"""

AI_Prompt_1 = """\
{\
"Where the product is used": "Home, schools, classrooms, long drives, \
doctor's offices, waiting for a flight, restaurants, on-the-go, and travel"\
}"""

# Get the current batch of up to batch_size items
User_Prompt_2 = f"```OBSERVATIONS``` {where_product_use_dict}"

# Send the prompt to the chatbot and get the response
response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "user", "content": User_Prompt_1},
                {"role": "assistant", "content": AI_Prompt_1},
                {"role": "user", "content": User_Prompt_2} ],
            temperature=0.2
)

# Process the response and store in the dictionary
chatbot_responses['where_product_use_dict'] = response["choices"][0]["message"]["content"]
print(where_product_use_dict)
print(chatbot_responses['where_product_use_dict'])

{'B07XCRT49W': 'Long drives, flights, train rides', 'B09VBZZ9C8': "Long drives, doctor's office, waiting for a flight, classroom activities", 'B07XCRVK2Y': 'Long drives, flights, train rides', 'B07ZJ8FDPK': 'Pre-schools, homes, classrooms', 'B07Q899BPB': 'Home, school, travel', 'B09R9MMW6J': 'At home or in the classroom', 'B085Q4W3WX': 'Home or classroom', 'B07X7YFZWG': 'Long drives, flights, train rides', 'B085Q3TLF8': 'At-home or classroom', 'B07ZJ8HRN8': 'Pre-schools, homes, classrooms', 'B07QXMLSS5': 'Home, school, travel', 'B01M4OV4Q4': "Car, restaurants, doctor's offices, on-the-go"}
{"Where the product is used": "Home, schools, classrooms, long drives, flights, train rides, doctor's offices, waiting for a flight, restaurants, on-the-go, and travel"}


### User Description

In [114]:
User_Prompt_1 = """
Format your response as a JSON object with: \
{\
"User Description"\
}
"Write a user description of a single product based on these OBSERVATIONS.\
OBSERVATIONS delimited with triple backticks.```\
Identify if any outliers exist and explain them. Ignore 'unknown' values.\

"OBSERVATIONS": ```\
{\
'B07XCRT49W': 'Kids and toddlers',
'B09VBZZ9C8': 'Children over 6 years old',
'B07ZJ8FDPK': 'Toddlers and children in pre-school',
'B07QXMLSS5': 'School-age preschool kindergarten children',
'B01M4OV4Q4': 'Ages 3 to adult'
}```"""

AI_Prompt_1 = """\
{\
"User Description": "This product is primarily designed for children, \
including kids, toddlers, and preschoolers, with a broad age range from 3 years old \
up to adults. The outlier in this dataset is 'B09VBZZ9C8' which is only \
suitable for children over 6 years old, while the others are targeted at younger \
age groups or have a wider age range."
}"""

# Get the current batch of up to batch_size items
User_Prompt_2 = f"```OBSERVATIONS``` {user_description_dict}"

# Send the prompt to the chatbot and get the response
response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "user", "content": User_Prompt_1},
                {"role": "assistant", "content": AI_Prompt_1},
                {"role": "user", "content": User_Prompt_2} ],
            temperature=0.2
)

# Process the response and store in the dictionary
chatbot_responses['user_description_dict'] = response["choices"][0]["message"]["content"]
print(user_description_dict)
print(chatbot_responses['user_description_dict'])

{'B07XCRT49W': 'Kids and toddlers', 'B09VBZZ9C8': 'Children over 6 years old', 'B07XCRVK2Y': 'Toddlers and families', 'B07ZJ8FDPK': 'Toddlers and children in pre-school', 'B07Q899BPB': 'School-age preschool kindergarten children', 'B09R9MMW6J': 'Children', 'B085Q4W3WX': 'Children', 'B07X7YFZWG': 'Kids, toddlers, families', 'B085Q3TLF8': 'Children', 'B07ZJ8HRN8': 'Toddlers and children in pre-school', 'B07QXMLSS5': 'School-age preschool kindergarten children', 'B01M4OV4Q4': 'Ages 3 to adult'}
{"User Description": "This dataset contains a variety of products that are designed for children of different ages, from toddlers to school-age preschool and kindergarten children. Some products are also suitable for families to use together. The age range of the products varies, with one product ('B01M4OV4Q4') suitable for ages 3 to adult. There are no clear outliers in this dataset."}


### Export the data

In [115]:
# Fix allocations inside the JSON objects
temp_cr = chatbot_responses.copy()
for key in temp_cr.keys():
    new_dict = temp_cr[key]
    new_dict = json.loads(new_dict)
    temp_cr[key] = new_dict

In [116]:
with open('summarised_product_information.json', 'w') as f:
    json.dump(temp_cr, f)

In [117]:
# Check if the JSON file is valid
import json
with open('/Users/vladbordei/Documents/Development/oaie2/summarised_product_information.json') as file:
    try:
        data = json.load(file)
    except json.JSONDecodeError as e:
        print("JSONDecodeError:", e)
        file.seek(0)
        lines = file.readlines()
        for i, line in enumerate(lines):
            if e.lineno <= i + 1:
                col = e.colno - 1
                print(f"{i + 1}: {line}")
                print(" " * col + "^")
                break

In [None]:
###########
########### TESTING #############

In [1]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine, text
import json
import os

import tiktoken
from openai.embeddings_utils import get_embedding
from sklearn.cluster import AgglomerativeClustering

import openai
from dotenv import load_dotenv
load_dotenv()
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
if os.getenv("OPENAI_API_KEY") is not None:
    print ("OPENAI_API_KEY is ready")
else:
    print ("OPENAI_API_KEY environment variable not found")

# Create an SQLAlchemy engine to connect to the database
engine = create_engine('postgresql://postgres:mysecretpassword@localhost/postgres')

# Read the ASIN values from the CSV file
asin_list = pd.read_csv('asin_list.csv')['asin'].tolist()

OPENAI_API_KEY is ready


In [4]:
# Read data about the product

with open('/Users/vladbordei/Documents/Development/oaie2/summarised_product_information.json') as file:
    json_string = file.read()
    general_product_data = json.loads(json_string)

In [5]:
# Reduce the token cost of general_product_data dictionary
# Remove "Outliers" from general_product_data dictionary
if 'Outliers' in general_product_data['product_summary_dict']:
    del general_product_data['product_summary_dict']['Outliers']

if 'Outliers' in general_product_data['what_is_in_the_box_dict']:
    del general_product_data['what_is_in_the_box_dict']['Outliers']

if 'Outliers' in general_product_data['technical_facts_dict']:
    del general_product_data['technical_facts_dict']['Outliers']

if 'Outliers' in general_product_data['features_dict']:
    del general_product_data['features_dict']['Outliers']

# Optional: If you want to remove the 'OutliersExplanation' as well
if 'OutliersExplanation' in general_product_data['technical_facts_dict']:
    del general_product_data['technical_facts_dict']['OutliersExplanation']

if 'OutliersExplanation' in general_product_data['features_dict']:
    del general_product_data['features_dict']['OutliersExplanation']


In [6]:
query = text("SELECT * FROM products WHERE asin IN :asin_list")

product = pd.read_sql_query(query, engine, params={"asin_list": tuple(asin_list)})

product['product_description_data'] = product['product_description_data'].apply(lambda x: eval(x))
median_product_price = round(product.price_current_price.median(),0)

In [7]:
def remove_brand(strings, brand_column):
    cleaned_strings = []
    for string, brand in zip(strings, brand_column):
        cleaned_string = string.replace(brand, '').strip()
        cleaned_strings.append(cleaned_string)
    return cleaned_strings

product['product_information_title'] = remove_brand(product.title, product.product_information_brand)
product_tile = product['product_information_title'].iloc[0]

In [20]:
User_Prompt_1 = """\
Check for repetitive or redundant information and reduce the number of those observations.\
Remove ASIN refferences.\
Understanding the product is very important so don't reduce the information provided.\
Resulting dictionary should have a token count of 70% of curent dictionary.\
\
```GENERAL PRODUCT DATA: ```{'product_summary_dict': {'Product Summary': 'These magnetic drawing boards offer a mess-free and creative play experience for children, promoting problem-solving, hand-eye coordination, and imagination. They feature magnetized beads and a stylus pen for easy drawing and erasing, and are made of child-safe, non-toxic, BPA-free, and lead-free materials. Some boards are portable and travel-friendly, making them great companions for long drives and trips. Some boards also offer a tactile drawing experience and help children learn and write the alphabet correctly. These boards are suitable for both home and classroom use, and make great gifts for toddlers and children.'},
 'what_is_in_the_box_dict': {'What is in the box': 'Magnetic drawing board, magnetic stylus pen'},
 'technical_facts_dict': {'Technical Facts': ['Made of quality plastic',
   'Child safe',
   'Non-toxic',
   'BPA-free',
   'Lead-free',
   'Tested in CPC accredited lab',
   'Made of high-quality ABS plastic',
   'Fully contained magnetic beads',
   'Suitable for ages 3 to adult']},
 'features_dict': {'Features': {'No-mess': 'Keeps the area clean while in use',
   'Learning disguised as play': 'Makes learning fun and engaging',
   'Portable and travel-friendly': 'Easy to carry and use on the go',
   'No eraser needed': 'Effortless erasing and reusing',
   '120 magnetic beads': 'Provides ample space for creativity and learning',
   'Easy to erase and reset': 'Convenient and time-saving',
   'Stylus stored at the bottom': 'Ensures easy storage and transportation',
   'Tactile drawing experience': 'Provides a hands-on learning experience',
   'Helps children learn and write the alphabet correctly': 'Promotes early writing education',
   'Great tool for early writing education': 'Encourages learning and fun',
   'Brings more learning fun for kids': 'Engages children in the learning process',
   'Magnetized beads': 'Allows for smooth drawing and tactile learning',
   'Stylus pen': 'Enables precise control and encourages proper grip',
   'Reusable and erasable': 'Environmentally friendly and cost-effective',
   'Portable and lightweight': 'Comfortable to use and carry',
   'Versatile use': 'Applicable to various learning activities',
   'STEM learning': 'Promotes science, technology, engineering, and math skills',
   'Sensory-based': 'Stimulates multiple senses for enhanced learning',
   'Unlimited potential for drawing': 'Encourages creativity and exploration',
   'No cleanup': 'Minimizes maintenance and hassle',
   'Easy storage for magnetic stylus': 'Prevents loss and keeps workspace organized',
   'Versatile use (drawing, practicing letters/numbers/shapes)': 'Adaptable to different learning activities',
   'Suitable for home or classroom use': 'Applicable to different environments',
   'Glows in the dark': 'Adds a fun and unique element to learning',
   'Stimulates curiosity and creativity': 'Inspires imagination and problem-solving',
   'Develops pencil grasp': 'Helps improve handwriting and fine motor skills'}},
 'how_product_use_dict': {'How the product is used': 'The product is primarily used for drawing, designing, creating, and playing with magnetic beads. It can also be used for teaching children how to write and draw, taking messages, completing classroom assignments, and practicing alphabets and numbers. Users can point the magnetic nib of the stylus on each hole to move the magnetic beads to the surface. There are no significant outliers in the given observations.'},
 'where_product_use_dict': {'Where the product is used': "Home, schools, classrooms, long drives, flights, train rides, doctor's offices, waiting for a flight, restaurants, on-the-go, and travel"},
 'user_description_dict': {'User Description': "This dataset contains a variety of products that are designed for children of different ages, from toddlers to school-age preschool and kindergarten children. Some products are also suitable for families to use together. The age range of the products varies, with one product ('B01M4OV4Q4') suitable for ages 3 to adult. There are no clear outliers in this dataset."}}
"""

In [21]:
AI_Prompt_1 = """\
{
'product_summary_dict': {'Product Summary': 'These magnetic drawing boards provide a creative and mess-free play experience, enhancing problem-solving, hand-eye coordination, and imagination. The boards, made of child-safe, non-toxic, BPA-free, and lead-free materials, are great companions for travel and also promote learning of alphabet writing. They are suitable for both home and classroom use.'},
 
'what_is_in_the_box_dict': {'What is in the box': 'Magnetic drawing board, magnetic stylus pen'},

'technical_facts_dict': {'Technical Facts': ['Made of quality ABS plastic',
   'Child safe',
   'Non-toxic',
   'BPA-free',
   'Lead-free',
   'CPC accredited lab tested',
   'Fully contained magnetic beads',
   'Suitable for ages 3 to adult']},

'features_dict': {'Features': {'No-mess': 'Keeps the area clean while in use',
   'Learning disguised as play': 'Makes learning engaging',
   'Portable': 'Easy to carry and use on the go',
   'Easy to erase and reset': 'Convenient and environmentally friendly',
   'Stylus stored at the bottom': 'Prevents loss and keeps workspace organized',
   'Tactile drawing experience': 'Promotes a hands-on learning experience',
   'Helps children learn and write the alphabet correctly': 'Encourages early writing education',
   'Versatile use': 'Applicable to various learning activities',
   'STEM learning': 'Enhances science, technology, engineering, and math skills',
   'Sensory-based': 'Stimulates multiple senses for enhanced learning',
   'Glows in the dark': 'Adds a unique element to learning',
   'Develops pencil grasp': 'Helps improve handwriting and fine motor skills'}},

'how_product_use_dict': {'How the product is used': 'The product is used for drawing, teaching, and playing with magnetic beads. Users point the magnetic nib of the stylus on each hole to move the magnetic beads to the surface.'},

'where_product_use_dict': {'Where the product is used': 'Home, schools, travel, and waiting areas'},

'user_description_dict': {'User Description': 'The product is designed for different ages, from toddlers to adults. It is ideal for use by families and in classrooms.'}
}
"""

In [22]:
User_Prompt_2 = f"""\
Check for repetitive or redundant information and reduce the number of those observations.\
Remove ASIN refferences.\
Understanding the product is very important so don't reduce the information provided.\
Resulting dictionary should have a token count of 70% of curent dictionary.\
\
    ```GENERAL PRODUCT DATA: ```{general_product_data},\
    """

In [23]:
try:
    response = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                messages=[
                    {"role": "user", "content": User_Prompt_1},
                    {"role": "assistant", "content": AI_Prompt_1},
                    {"role": "user", "content": User_Prompt_2} ],
                temperature=0.2,
                api_key=OPENAI_API_KEY
    )
    chatbot_response = response["choices"][0]["message"]["content"]
    print(chatbot_response)
except Exception as e:
    print(f"An error occurred during the OpenAI ChatCompletion API call: {e}")



{
'product_summary_dict': {'Product Summary': 'These magnetic drawing boards provide a mess-free and creative play experience for children, promoting problem-solving, hand-eye coordination, and imagination. They feature magnetized beads and a stylus pen for easy drawing and erasing, and are made of child-safe, non-toxic, BPA-free, and lead-free materials. Some boards are portable and travel-friendly, making them great companions for long drives and trips. Some boards also offer a tactile drawing experience and help children learn and write the alphabet correctly. These boards are suitable for both home and classroom use, and make great gifts for toddlers and children.'},
 
'what_is_in_the_box_dict': {'What is in the box': 'Magnetic drawing board, magnetic stylus pen'},

'technical_facts_dict': {'Technical Facts': ['Made of quality ABS plastic',
   'Child safe',
   'Non-toxic',
   'BPA-free',
   'Lead-free',
   'CPC accredited lab tested',
   'Fully contained magnetic beads',
   'Suitab

In [31]:
import json

def process_json_string(json_string):
    # Replace single quotes with double quotes
    try:
        json_string = json_string.replace("'", '"')
    except:
        pass
    # Load the processed JSON string
    data = json.loads(json_string)
    
    return data

simplified_product_data = process_json_string(chatbot_response)

In [32]:
simplified_product_data['median_price_usd'] = median_product_price
simplified_product_data['product_title']  = product_tile

In [33]:
with open('summarised_simplified_product_information.json', 'w') as f:
    json.dump(simplified_product_data, f)

In [34]:
# Check if the JSON file is valid
import json
with open('/Users/vladbordei/Documents/Development/oaie2/summarised_simplified_product_information.json') as file:
    try:
        data = json.load(file)
    except json.JSONDecodeError as e:
        print("JSONDecodeError:", e)
        file.seek(0)
        lines = file.readlines()
        for i, line in enumerate(lines):
            if e.lineno <= i + 1:
                col = e.colno - 1
                print(f"{i + 1}: {line}")
                print(" " * col + "^")
                break