In [1]:
import pandas as pd
import numpy as np
import os
import json
import pickle
from collections import defaultdict
from datetime import datetime
import openai
import requests
from tenacity import retry, wait_random_exponential, stop_after_attempt
from termcolor import colored

In [2]:
item_information_path = './data/item_information.json'
with open(item_information_path, 'r') as f:
    item_information = json.load(f)
print(len(item_information))

6089


In [7]:
print(f"title: {item_information['87']['title']}")
print(f"brand: {item_information['87']['brand']}")
print(f"price: {item_information['87']['price']}")
print(f"reviews: {item_information['87']['reviews_for_description']}")

title: PiercingJ 8-12pcs Unisex Cubic Zirconia Gem Stainlss Steel Barbell Earring/Cartilage Helix Earring/Stud Earring
brand: PiercingJ
price: $9.29
reviews: ['Very cheap looking. Prongs cover most of the stone. Metal looks tarnished and not safe to wear. Huge waste of money.', 'This is a great set or earrings at a great price. These are both cute and sturdy. I ordered these with the hopes of wearing them in both my lobe and cartilage piercings. However, I believe they are a larger gauge than 18 as they will not fit into my cartilage piercings and I had to work them into my lobe piercings. I have had them in my ears for several days now and have had no allergic reaction to the metal. I would definitely recommend these to a friend.', "It's very cute. Great $. I give it 4 stars because the gauge was a little to thick. Had a hard time putting them through the piercing.", "My one year old daughter still have them in after a month. That's good for her.", 'I absolutely love them! I will be o

In [10]:
for item, information in item_information.items():
    print(item, [len(review.split(' ')) for review in information['reviews_for_description']])

4426 [20, 125]
2164 [73, 57, 92, 34, 53, 100, 51, 45, 24, 35]
1829 [75]
3506 [19, 15, 30, 30, 4, 19, 25, 20, 23, 15]
3362 [36, 46, 25, 20, 116, 86, 40, 91, 154, 9, 29]
5902 [189, 81, 41, 77, 189, 189, 27, 123, 109, 123]
5105 [46, 130, 49, 18, 28, 102, 283, 70, 8, 69]
838 [46, 107, 52, 29, 138, 93, 112, 36, 31, 87]
3573 [58, 76, 16, 20, 81, 133, 140, 94, 46, 35]
3574 [58, 76, 3, 11, 16, 133, 140, 94, 46, 35]
1075 []
1077 []
1076 []
5901 [59]
5894 [98]
4391 [21]
5898 [101]
4491 []
1079 [92]
1081 [291]
4492 []
1078 []
1080 []
1082 []
5897 [66]
5896 [128]
5900 [25]
5899 [103]
5895 [267]
5437 [39, 63]
4253 [30, 32, 35, 16, 55, 5, 13, 49, 58, 26]
4392 [21]
2323 [206, 38, 14, 7, 15, 59, 31, 30, 20, 102]
310 [54, 23, 102, 35, 137, 36, 14, 35, 58, 42]
870 [97, 203, 57, 59, 40, 54, 36, 35, 17, 152]
4389 [23, 44]
4390 [8, 77]
1083 [96]
3101 [40, 66, 9, 52, 35, 437, 49, 113, 140, 48, 36]
394 [8, 53, 78, 22, 28, 22, 80, 46, 92, 19]
2238 [59, 15, 19, 208, 48, 33, 23, 13, 40, 22]
3087 [101, 49, 28, 2

In [21]:
prompt = f"""As an expert fashion product recommender and advertiser, extract the strong (positive) and weak (negative) features or characteristics of the product from the given title and reviews. You are given the title of a fashion product and list of reviews about the product -
{item_information['87']['title']}
Reviews -
{item_information['87']['reviews_for_description']}
Give a 25 word concise product description mentioning strong and weak features of the product."""
print(prompt)

As an expert fashion product recommender and advertiser, extract the strong (positive) and weak (negative) features or characteristics of the product from the given title and reviews. You are given the title of a fashion product and list of reviews about the product -
PiercingJ 8-12pcs Unisex Cubic Zirconia Gem Stainlss Steel Barbell Earring/Cartilage Helix Earring/Stud Earring
Reviews -
['Very cheap looking. Prongs cover most of the stone. Metal looks tarnished and not safe to wear. Huge waste of money.', 'This is a great set or earrings at a great price. These are both cute and sturdy. I ordered these with the hopes of wearing them in both my lobe and cartilage piercings. However, I believe they are a larger gauge than 18 as they will not fit into my cartilage piercings and I had to work them into my lobe piercings. I have had them in my ears for several days now and have had no allergic reaction to the metal. I would definitely recommend these to a friend.', "It's very cute. Great $

In [15]:
openai.api_key = os.environ.get('OPENAI_API_KEY')

In [22]:
response = openai.completions.create(
    # model = "gpt-3.5-turbo-0125",
    model="gpt-3.5-turbo-instruct",
    prompt = prompt,
    temperature=0.3,
    max_tokens=50,
    # top_p=0.3,
    # frequency_penalty=0.5,
    # presence_penalty=0.5
)
print(response.choices[0].text)



"PiercingJ's 8-12pcs Unisex Cubic Zirconia Gem Stainlss Steel Barbell Earrings are cute and sturdy, but have a thick gauge and some quality issues."


### Getting Product Descriptions

In [66]:
def generate_prompt(title, reviews):
    prompt = f"""As an expert fashion product recommender and advertiser, extract the strong (positive) and weak (negative) features or characteristics of the product from the given reviews. You are given the list of reviews about the product -
            {reviews}
            Give a 25 word concise product description mentioning strong and weak features of the product."""
    return prompt

In [67]:
items_no_title = []
items_no_reviews = []
content_to_summarize = dict()
for item_id, item_values in item_information.items():
    if 'title' in item_values:
        if len(item_values['reviews_for_description']) > 0:
            content_to_summarize[item_id] = generate_prompt(item_values['title'], item_values['reviews_for_description'])
        else:
            items_no_reviews.append(item_id)
            continue
    else:
        items_no_title.append(item_id)
    # break

In [68]:
print(len(content_to_summarize), content_to_summarize['123'])

5858 As an expert fashion product recommender and advertiser, extract the strong (positive) and weak (negative) features or characteristics of the product from the given reviews. You are given the list of reviews about the product -
            ['I love these inox earrings I am allergic to fake earrings and these earrings I can wear I love them I recommend you buy them they go with any outfit I love these earrings they are worth paying for']
            Give a 25 word concise product description mentioning strong and weak features of the product.


In [69]:
print(items_no_title)

['2565']


In [70]:
print(len(items_no_reviews))

230


In [71]:
content_list = list(content_to_summarize.values())
print(len(content_list))

5858


In [72]:
print(content_list[0])
print(content_to_summarize[list(content_to_summarize.keys())[0]])

As an expert fashion product recommender and advertiser, extract the strong (positive) and weak (negative) features or characteristics of the product from the given reviews. You are given the list of reviews about the product -
            ['Hello good evening, excellent product quality, comfortable, nice, good wreck 100% recommend them very beautiful, I gusta.soy of Venezuela v', "This is a beautiful watch, bigger than I thought it would be, which made me happy. The back light works very well, the shine/glitter it has is beautiful, it doesn't look like too much. It is easy to put on and take off, perfect for workouts. The only bad thing I have to say about this product is that the lifespan of the rubber pieces at the end of the band is Very short and you have no way of getting a replacement, because they are not sold separately. I took mine off and I wear the watch with no rubbers at the ends, its not that ugly you cant even tell actually. All the same Im happy with my purchase, I wou

### FIlling in the item_descriptions

In [3]:
item_description_path = './data/item_description.json'
with open(item_description_path, 'r') as f:
    item_description = json.load(f)
print(len(item_description))

5856


In [4]:
items_with_no_description = list(set(list(item_information.keys())) - set(list(item_description.keys())))
print(len(items_with_no_description))

233


In [13]:
processed_item_information = defaultdict(dict)
for item, information in item_information.items():
    # print(item, information)
    if 'title' in information:
        processed_item_information[item]['title'] = information['title']
    if 'brand' in information:
        processed_item_information[item]['brand'] = information['brand']
    if 'price' in information:
        processed_item_information[item]['price'] = information['price']
    if item in item_description:
        processed_item_information[item]['description'] = item_description[item]
    else:
        processed_item_information[item]['description'] = ''
    # break

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



In [16]:
processed_item_information['362']

{'title': "Choies Women's Strapless Mixed Folk Print Bohemian Beach A-line Dress L",
 'brand': 'Choies',
 'description': '\n\nThe product is a cute dress with nice colors, but it has poor construction, is tight-fitting, and may be short for taller individuals. Check sizing guide for accurate fit.'}

In [15]:
print(len(processed_item_information))

6089


In [18]:
with open('./processed_data/processed_item_information.json', 'w+') as f:
    json.dump(processed_item_information, f)