In [1]:
import torch
import torch.nn as nn
import clip
from PIL import Image
import pandas as pd
import requests
import os.path as osp
import pickle
import random
import numpy as np
from pathlib import Path
import sys
from operator import itemgetter
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import time
import shutil

  warn(f"Failed to load image Python extension: {e}")


In [2]:
def read_pickle(dir):
    with open(dir, 'rb') as handle:
        b = pickle.load(handle)
    return b


def write_pickle(dir, data):
    with open(dir, 'wb') as handle:
        pickle.dump(data, handle, protocol=pickle.HIGHEST_PROTOCOL)
        

class Timer:
    def __init__(self):

        self.t1 = None

    @staticmethod
    def delta_to_string(td):

        res_list = []

        def format():
            return ", ".join(reversed(res_list)) + " elapsed."

        seconds = td % 60
        td //= 60
        res_list.append(f"{round(seconds,3)} seconds")

        if td <= 0:
            return format()

        minutes = td % 60
        td //= 60
        res_list.append(f"{minutes} minutes")

        if td <= 0:
            return format()

        hours = td % 24
        td //= 24
        res_list.append(f"{hours} hours")

        if td <= 0:
            return format()

        res_list.append(f"{td} days")

        return format()

    def __enter__(self):

        self.t1 = time.time()

    def __exit__(self, *args, **kwargs):

        t2 = time.time()
        td = t2 - self.t1

        print(self.delta_to_string(td))


def top_n(input_dict, n):
    return dict(sorted(input_dict.items(), key=itemgetter(1), reverse=True)[:n])


def find_products(text_input, category_df, image_pickle_path):

    text_input = [text_input]

    # stage one, compare categories
    category_df = category_df[~category_df["encoded_category"].isna()]
    categories = list(category_df["category"].values)

    categories_features = torch.stack(list(category_df["encoded_category"].values))
    encoded_texts = clip.tokenize(text_input).to(device)

    with torch.no_grad():

        text_features = model.encode_text(encoded_texts)

        categories_features /= categories_features.norm(dim=-1, keepdim=True)
        text_features /= text_features.norm(dim=-1, keepdim=True)
        similarity =  100 * categories_features @ text_features.T

    res = dict(zip(categories, similarity.reshape(-1).tolist()))

    res = sorted(res.items(), key=itemgetter(1), reverse=True)

    n = 10
    res = res[:n]
    res_set = set([r[0] for r in res])
    
    # do image matching
    res = []
    for cat in res_set:
        store_path = osp.join(image_pickle_path, f"{cat}.pkl")
        cat_res = read_pickle(store_path)
        res.append(cat_res)
    res = pd.concat(res, axis=0)
    
    uniq_ids = list(res["uid"].values)
    image_features = torch.stack(list(res["encoded_image"].values))
    similarity =  100 * image_features @ text_features.T
    res = dict(zip(uniq_ids, similarity.reshape(-1).tolist()))
    res = sorted(res.items(), key=itemgetter(1), reverse=True)
    
    n = 5
    res = res[:n]
    res_set = set([r[0] for r in res])
    
    return res_set


def show_images(res):
    n = len(res)
    fig, ax = plt.subplots(1, n)

    fig.set_figheight(5)
    fig.set_figwidth(5 * n)
    
    iterable = True
    try:
       it = ax[0]
    except:
        iterable = False

    if not iterable:
        img_path = image_path(res[0])
        img = mpimg.imread(img_path)
        ax.imshow(img)
        ax.axis("off")
    else:
        for i, image in enumerate(res):
            img_path = image_path(image)
            img = mpimg.imread(img_path)

            ax[i].imshow(img)
            ax[i].axis('off')
            # ax[i].set_title(get_label(image), fontsize=8)

    plt.subplots_adjust(wspace=0, hspace=0.1)
    plt.show()
    
    
def image_path(uid):
    return osp.join(image_storage, f"{uid}.jpg")


def load_data(pickle_path):
    category_df = read_pickle(osp.join(pickle_path, "categories.pkl"))
    meta_df = read_pickle(osp.join(pickle_path, "meta_data.pkl"))
    
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model, preprocess = clip.load("ViT-B/32", device=device)
    
    return device, model, preprocess, category_df, meta_df

In [3]:
image_storage = "demo_data/target_images"
pickle_path = "demo_data/data3_pickle"
image_pickle_path = "demo_data/data3_image_pickle"

In [5]:
meta_df = read_pickle(osp.join(pickle_path, "meta_data.pkl"))

In [34]:
sub_meta_df = meta_df[:100]

In [35]:
sub_meta_df["title"][0]

'Salinas Mission Desk & Hutch Antique White - Bush Furniture, white, MDF Composite (Medium-Density Fiberboard) (Frame) '

In [13]:
import openai
openai.api_key = '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'

In [20]:
def get_chat_gpt_reply(title):

    user_query = f"I am looking for {title}"

    prefix = (
        "considering what the user asked before, what is the user looking for with the following request."
        " Only respond with the product description no more than 30 words:"
    )

    messages = [{"role": "user", "content": f"{prefix} {user_query}"}]
    chat = openai.ChatCompletion.create(
        model="gpt-3.5-turbo", messages=messages
    )
    reply = chat.choices[0].message.content
    
    return reply

In [30]:
converted_titles = set()
res = []

In [36]:
count = 0
for title in sub_meta_df["title"]:
    if title not in converted_titles:
        while True:
            try:
                res.append(get_chat_gpt_reply(title))
                converted_titles.add(title)
                break
            except:
                time.sleep(10)
    count += 1
    if count % 10 == 0:
        print(count)

10
20
30
40
50
60
70
80
90
100


In [37]:
sub_meta_df["chatgpt_title"] = res

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sub_meta_df["chatgpt_title"] = res


In [38]:
write_pickle(osp.join(pickle_path, "meta_data_chatgpt.pkl"), sub_meta_df)

In [41]:
sub_meta_df

Unnamed: 0,uniq_id,combined_category,title,chatgpt_title
0,c456e14d-0693-5552-8f90-eead4a0f1f50,"Home Office Furniture, Desks, white",Salinas Mission Desk & Hutch Antique White - B...,The user is looking for a Salinas Mission Desk...
1,b020d91e-77ab-5c3e-b1e6-2fe01abe41b6,"Bedroom Furniture, Dressers, white","Finish Dresser with 6 Drawers White - Benzara,...",The user is looking for a white Finish Dresser...
2,d0972ad8-c10a-5d84-86a1-4d25cd361229,"Bedroom Furniture, Beds, pewter",Full Arcadia Nailbutton Metallic Upholstered B...,The user is looking for a Full Arcadia Nailbut...
3,dade134d-f251-5120-97be-23eb1e01c0f8,"Living Room Furniture, Storage Furniture, black","Broadway Side Cabinet Black - Lifestorey, blac...",The user is looking for a black Broadway Side ...
6,72b5d68b-c33e-57cd-be90-cfc145266fb7,"Living Room Furniture, TV Stands & Entertainme...","Lexington 70"" Solid Wood TV Stand Dark Mocha B...","The user is looking for a dark mocha brown, pi..."
...,...,...,...,...
140,885817ac-cd39-56fa-852f-b4ad3ca31aa0,"Kitchen & Dining Furniture, Dining Chairs & Be...",Flash Furniture Advantage Black Vinyl Church C...,The user is looking for a black vinyl church c...
142,a66ae727-4519-539d-acb8-8c88b83126d0,"Living Room Furniture, Console Tables, black, ...",Chesley Handcrafted Modern Industrial Acacia W...,The user is looking for a black and gray Chesl...
143,00d81e6f-a48a-5b47-8f9a-dacd59d73174,"Living Room Furniture, End & Side Tables, chrome",Coaster Home Furnishings Rectangular Tempered ...,The user is looking for a Coaster Home Furnish...
144,fadc94ea-b24d-5044-9782-f8ccd6b2266e,"Bedroom Furniture, Bedroom Sets & Collections,...",6pc Queen Ingram Bedroom Set with 2 Nightstand...,The user is looking for the product descriptio...
