<a href="https://colab.research.google.com/github/swang225/W210_capstone/blob/main/notebook/KK_sw0006_clip_predict_demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install clip



In [None]:
import torch
import torch.nn as nn
import clip
from PIL import Image
import pandas as pd
import requests
import os.path as osp
import pickle
import random
import numpy as np
from pathlib import Path
import sys
from operator import itemgetter
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import time

In [None]:
class Timer:
    def __init__(self):

        self.t1 = None

    @staticmethod
    def delta_to_string(td):

        res_list = []

        def format():
            return ", ".join(reversed(res_list)) + " elapsed."

        seconds = td % 60
        td //= 60
        res_list.append(f"{round(seconds,3)} seconds")

        if td <= 0:
            return format()

        minutes = td % 60
        td //= 60
        res_list.append(f"{minutes} minutes")

        if td <= 0:
            return format()

        hours = td % 24
        td //= 24
        res_list.append(f"{hours} hours")

        if td <= 0:
            return format()

        res_list.append(f"{td} days")

        return format()

    def __enter__(self):

        self.t1 = time.time()

    def __exit__(self, *args, **kwargs):

        t2 = time.time()
        td = t2 - self.t1

        print(self.delta_to_string(td))


In [None]:
def image_path(uid):
    return osp.join(image_storage, f"{uid}.jpg")

In [None]:
def read_pickle(dir):
    with open(dir, 'rb') as handle:
        b = pickle.load(handle)
    return b


def write_pickle(dir, data):
    with open(dir, 'wb') as handle:
        pickle.dump(data, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
def find_products(text_input, data):
    print(f"finding products for query: {text_input}...")
    text_input = [text_input]

    data = data[~data["encoded_image"].isna()]
    image_uids = list(data["uid"].values)

    encoded_images = torch.cat(list(data["encoded_image"].values)).to(device)
    encoded_texts = clip.tokenize(text_input).to(device)

    with torch.no_grad():
        logits_per_image, logits_per_text = model(encoded_images, encoded_texts)
        probs = logits_per_text.softmax(dim=-1).cpu().numpy()

    res = dict(zip(image_uids, probs[0] * 100))
    res = dict(sorted(res.items(), key=itemgetter(1), reverse=True)[:5])

    return res


def show_images(res):
    n = len(res)
    fig, ax = plt.subplots(1, n)

    fig.set_figheight(5)
    fig.set_figwidth(5 * n)

    for i, image in enumerate(res.keys()):
        img_path = image_path(image)
        img = mpimg.imread(img_path)
        ax[i].imshow(img)
        ax[i].axis('off')
        # ax[i].set_title(get_label(image), fontsize=8)

    plt.subplots_adjust(wspace=0, hspace=0.1)
    plt.show()

In [None]:
def save_processed_data(name, uid_list, eimage_list):
    df = pd.DataFrame(data={
        "uid": uid_list,
        "encoded_image": eimage_list
    })

    write_pickle(name, df)

In [None]:
image_storage = "demo_data/image"
pickle_path = "demo_data/pickle"

Path(image_storage).mkdir(parents=True, exist_ok=True)
Path(pickle_path).mkdir(parents=True, exist_ok=True)

In [None]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

In [None]:
!pip install --upgrade git+https://github.com/openai/CLIP.git

Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /tmp/pip-req-build-rchrlw5b
  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git /tmp/pip-req-build-rchrlw5b
  Resolved https://github.com/openai/CLIP.git to commit a1d071733d7111c9c014f024669f959182114e33
  Preparing metadata (setup.py) ... [?25l[?25hdone


In [None]:
!pip install openai-clip



In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)

AttributeError: ignored

In [None]:
demo_pickle_path = osp.join(pickle_path, f"demo.pkl")

if not osp.exists(demo_pickle_path):
    print("demo pickle does not exist, converting demo images to demo pickle...")

    uid_list = []
    eimage_list = []
    for filename in os.listdir(image_storage):
        f = os.path.join(image_storage, filename)
        # checking if it is a file
        if os.path.isfile(f):
            uid = filename.split(".")[0]
            uid_list.append(uid)

            image = preprocess(Image.open(image_path(uid))).unsqueeze(0)
            eimage_list.append(image)

    save_processed_data(demo_pickle_path, uid_list, eimage_list)
    print("done")

print("reading demo pickle")
data = read_pickle(demo_pickle_path)

In [None]:
data.head()

In [None]:
text_input = "I am looking for a large beige office chair"

with Timer():
    res1 = find_products(text_input, data)

print(res1)
show_images(res1)

### DEMO: ChatGPT + CLIP

In [None]:
!pip install openai

In [None]:
import openai
#openai.api_key = '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'
openai.api_key = 'key'


In [None]:
messages = []

res_list = []

prefix = (
    "considering what the user asked before, what is the user looking for with the following request."
    " Only respond with the product description no more than 30 words:"
)
while True:
    message = input("User : ")
    if message:
        print(f"User entered: {message}")
        messages.append(
            {"role": "user", "content": f"{prefix} {message}"},
        )
        chat = openai.ChatCompletion.create(
            model="gpt-3.5-turbo", messages=messages
        )

        reply = chat.choices[0].message.content
        print(f"ChatGPT: {reply}")

        with Timer():
            print("looking for products...")
            res_list.append(find_products(reply, data))
            show_images(res_list[-1])
            print("found products")

        messages.append({"role": "assistant", "content": reply})

In [None]:
# this demo uses 1000 products in the dataset

# TODO
# append product description shown to chatGPT for history reference
# have chatGPT determine whether to search for image or ask for additional information
# also ask chatGPT whethere the user has ended the conversation.
# add  additional logic for situations such as: user complaining that none of the products match what was asked, etc.

In [None]:
# User input history:

# I am looking for a large beige office chair
# Actually I am looking for something darker
# I don't need the wheels.

## Kisha Update below

In [None]:
!pip install langchain

In [None]:
pip install faiss-gpu

In [None]:
pip install faiss-cpu

In [None]:
pip install tiktoken

In [None]:
#import streamlit as st
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.vectorstores import FAISS
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain
#from dotenv import load_dotenv

#load_dotenv()
import io


import openai

import os


In [None]:
#Mount google drive
from google.colab import drive, files
import io
drive.mount('/content/drive')



In [None]:
os.environ['OPENAI_API_KEY'] = "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"


# 1 - I am loading best practice examples of furniture description to train GPT for better output

In [None]:
# 1. Load best practice examples
#(later) can use textspliter for larger document
#This is a sample best practice description & furniture style we are feeding into LLM. Action - look for sample data (we just need description, and furniture style column to feed in here)
loadfile = CSVLoader(file_path='/content/drive/MyDrive/W210/Datasets/DescriptionExample.csv')
data = loadfile.load()
#print(data)

embeddings = OpenAIEmbeddings()
#vetorizing and creating embedding using open source from Meta - FAISS
db = FAISS.from_documents(data, embeddings)


In [None]:
# 2. Function for similarity search

def retrieve_info(query):
  #getting 3 top results that are similar
    similar_response = db.similarity_search(query, k=3)

    page_contents_array = [doc.page_content for doc in similar_response]

    # print(page_contents_array)

    return page_contents_array

In [None]:
# 3. Setup LLMChain & prompts so that GPT can generate more descriptive output than baseline gpt
llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-16k-0613")

template="""
Interpret the user needs by understanding the input along with metadata.


Below is the example of user needs:
{description}

Here is the list of metadata we normally need:
{metadata}


Please explain in one or two sentence what the user wants:"""

prompt = PromptTemplate(
    input_variables=["description", "metadata"],
    template=template
)


chain = LLMChain(llm=llm, prompt=prompt)



# 4. Retrieval augmented generation
def generate_description_for_clip(description):
   #step 1 - does similarity search
    metadata = retrieve_info(description)

    #step 2 - puts the similar best practice in the chain model
    response=chain.run({'description': description,'metadata': metadata})
    return response

#2 - Updated so it asks back questions until all metadata is collected

In [None]:
messages = []

res_list = []

prefix = (
    "considering what the user asked before, what is the user looking for with the following request."
    " Only respond with the product description no more than 30 words:"
)
while True:
    message = input("User : ")
    if message:

      print(f"System msg - User entered: {message}")
      messages.append(
            {"role": "user", "content": f"{prefix} {message}"},
        )
        # chat = openai.ChatCompletion.create(
        #     model="gpt-3.5-turbo", messages=messages
        # )
        # #reply=generate_description_for_clip(message)
        # reply = chat.choices[0].message.content
        # #reply=generate_description_for_clip(str(messages))
        # print(f"ChatGPT: {reply}")

    #if message:

      needs=message
      for i in ['color','price','material','room']:

        messages2=[]
        prefix_question =("Does this have information on " +str(i)+ " ?" + "Respond in yes or no")

        messages2.append(
              {"role": "user", "content": f"{prefix_question} {needs}"},
            )
        chat = openai.ChatCompletion.create(
                    model="gpt-3.5-turbo", messages=messages2
                )
        reply2 = chat.choices[0].message.content
        print()
        print("System msg - Did it have information on " +str(i)+ " ?" , reply2)
        if "no" in str.lower(reply2):

          print("System msg - No info on " +str(i) + " was provided")
          data_asset=input("Is there a specific "+ str(i) +" you are looking for? :")
          needs=str(needs) + ' in ' +str(i) +' '+ str(data_asset)
          print("System msg - ", needs)
          messages.append(
                    {"role": "user", "content": f"{prefix} {needs}"},
                )
        else:
          print("System msg - Info on " +str(i) + " was provided")
          print("System msg - ", needs)
          messages.append(
                    {"role": "user", "content": f"{prefix} {needs}"},
                )
      # chat = openai.ChatCompletion.create(
      #               model="gpt-3.5-turbo", messages=messages2
      #           )
      #reply_base_chat = chat.choices[0].message.content

      #using all info collected, generate description for clip, instead of baseline openai.ChatCompletion.create function
      print()
      print("System msg - FINAL ", needs)
      reply=generate_description_for_clip(needs)


      print("Description to put to Clip:" , reply)





# 3 - Using one of reply to retrieve description that matches from our target data set

In [None]:
reply

Compute similarity score to match with description

In [None]:

sampleloadfile = CSVLoader(file_path='/content/drive/MyDrive/W210/Datasets/sub_target_store_furniture_datasets_sample.csv')
sampledata = sampleloadfile.load()
#print(data)

embeddings = OpenAIEmbeddings()
#vetorizing and creating embedding using open source from Meta - FAISS
sampledb = FAISS.from_documents(sampledata, embeddings)


In [None]:
def similar_search(query):
  #getting 3 top results that are similar
  similar_response = sampledb.similarity_search(query, k=3)
  page_contents_array = [doc.page_content for doc in similar_response]

  return page_contents_array

In [None]:
search_output=similar_search(reply)
search_output

#4 - Rouge score to evaluate the top output

This can later be integrated with clip generated output

Steps:

1. User inputs


2. Llamaindex/knowledge graph powered GPT generates user needs

    a. Feeds to Clip - Clip embedding similarity score
    
    b. Similarity score using description

3. With i & ii , we use description vs user need and create rouge score

4. Output image/url/description of top 5 items

Conclusion: This way we are not only using clip for image search & but also using provided description to find items


In [None]:
!pip install rouge-score

In [None]:
from rouge_score import rouge_scorer


def rouge(reply, search_output):
  rank={}
  reference = reply
  for i in search_output:
    hypothesis = i
    print("Scores for ", i[0:100])

  # Initialize the ROUGE scorer
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)

    # Calculate ROUGE scores
    scores = scorer.score(reference, hypothesis)
    #Used rouge L for now - can update later
    evaluation_score=scores["rougeL"].precision
    rank[i]=evaluation_score

    # Print the ROUGE scores
    # print("ROUGE-1 Precision:", scores["rouge1"].precision)
    # print("ROUGE-1 Recall:", scores["rouge1"].recall)
    # print("ROUGE-1 F1 Score:", scores["rouge1"].fmeasure)

    # print("ROUGE-2 Precision:", scores["rouge2"].precision)
    # print("ROUGE-2 Recall:", scores["rouge2"].recall)
    # print("ROUGE-2 F1 Score:", scores["rouge2"].fmeasure)

    print("ROUGE-L Precision:", scores["rougeL"].precision)
    print("ROUGE-L Recall:", scores["rougeL"].recall)
    print("ROUGE-L F1 Score:", scores["rougeL"].fmeasure)
    print()

  max_key = max(rank, key=lambda k: rank[k])
  print("Best match is item: ", max_key[0:100])


In [None]:
rouge(reply, search_output)