# Install the required python packages

In [None]:
!pip install langchain jq sentence_transformers
!pip install faiss-cpu
!pip install openai
!pip install tiktoken

# Import the necessary library

In [2]:
from bs4 import BeautifulSoup
import json
import requests
import os
import time
import pandas as pd
pd.set_option("display.max_colwidth", 1000)
from langchain.document_loaders.json_loader import JSONLoader

**Add you OPENAI KEY**

In [3]:
os.environ["OPENAI_API_KEY"] = "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"

**Add your Test Shopify store URL**

In [4]:
shop_url = "https://quickstart-bde7c873.myshopify.com"
api_version = "2023-07"

## **Connect the shopify store and get the products list as JSON**

In [5]:
def get_all_products(shop_url, api_version):
    all_products = []
    url = f"{shop_url}/admin/api/{api_version}/products.json"
    headers = {"X-Shopify-Access-Token": "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"}
    params = {"limit": 250}
    response = requests.get(url, headers=headers, params=params)
    all_products.extend(response.json()["products"])
    save_json(response.json())
    try:
        while response.links["next"]:
            response = requests.get(response.links["next"]["url"], headers=headers)
            all_products.extend(response.json()["products"])
            time.sleep(2)
    except KeyError:
        return all_products

In [6]:
def save_json(data):
    with open('data.json', 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=4)

In [7]:
all_products = get_all_products(shop_url, api_version)

In [8]:
product_df = pd.DataFrame(all_products)

# **Read the JSON file and show as DataFrame**

In [9]:
def read_json_file(file_path):
    try:
        with open(file_path, 'r') as file:
            data = json.load(file)
        return data
    except FileNotFoundError:
        print(f"File not found: {file_path}")
        return None
    except json.JSONDecodeError:
        print(f"Error decoding JSON in file: {file_path}")
        return None

In [10]:
file_path = 'data.json'
json_data = read_json_file(file_path)
all_products = json_data['products']

In [11]:
product_df = pd.DataFrame(all_products)

In [None]:
product_df

# **PreProcessing Steps:**

In [13]:
def clean_html_tags(row):
    soup = BeautifulSoup(row["body_html"], "html.parser")
    text = soup.get_text()
    row["body_html"] = text
    return row

def get_img_src(row):
    all_images = []
    for image in row["images"]:
        all_images.append(image["src"])
    row["images_list"] = all_images
    return row

def create_expandend_description(row):
    if row["body_html"] == "" and row["tags"] == "":
        row["expanded_description"] = row["title"]
    elif row["body_html"] == "" and row["tags"] != "":
        row["expanded_description"] = "Title: " + row['title'] + " Tags: " + row['tags']
    elif row["body_html"] != "" and row["tags"] == "":
        row["expanded_description"] = "Title: " + row['title'] + " Description: " +row["body_html"]
    else:
        row["expanded_description"] = "Title: " + row['title'] + " Description: " +row["body_html"] + " Tags: " + row['tags']
    return row

def df_preprocessing(df):
    df = df[df["status"] == "active"]
    df.fillna("", inplace=True)
    df = df.apply(lambda row: get_img_src(row), axis=1)
    df = df.apply(lambda row: create_expandend_description(row), axis=1)
    df = df.apply(lambda row: clean_html_tags(row), axis=1)
    df = df.rename(columns={"body_html": "description"})
    df = df[["id", "title", "handle","description", "expanded_description", "images_list"]]
    return df

In [14]:
cleaned_df = df_preprocessing(product_df)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.fillna("", inplace=True)
  soup = BeautifulSoup(row["body_html"], "html.parser")


In [15]:
cleaned_df.to_csv("products.csv", index=False)

In [16]:
cleaned_products_json = cleaned_df.to_json(orient="records")
with open("products.json", "w") as f:
    f.write(cleaned_products_json)

# **Define the metadata extraction function.**

In [17]:
def metadata_func(record: dict, metadata: dict) -> dict:
    metadata["id"] = record.get("id")
    metadata["title"] = record.get("title")
    metadata["tags"] = record.get("tags")
    metadata["images_list"] = record.get("images_list")
    metadata["handle"] = record.get("handle")
    return metadata

# **Using Langchain JSONLoader and Create FAISS vectorstore**

In [20]:
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings

def create_vectorstore(documents, embeddings):
    vectorstore = FAISS.from_documents(documents=documents, embedding=embeddings)
    return vectorstore

def save_vectorstore(vectorstore, save_path, index_name):
    vectorstore.save_local(save_path, index_name)
    print("Vectorstore saved to: ", save_path)

loader = JSONLoader(
    file_path='./products.json',
    jq_schema='.[]',
    content_key="expanded_description",
    metadata_func=metadata_func
)

In [None]:
documents = loader.load()
embeddings = OpenAIEmbeddings()
vectorstore = create_vectorstore(documents, embeddings)
save_vectorstore(vectorstore, save_path="./shopify_langchaintesting_vectorstore", index_name="products")

# **Load vectorstore from the local**

In [22]:
vectorstore = FAISS.load_local(folder_path="./shopify_langchaintesting_vectorstore", index_name="products", embeddings=OpenAIEmbeddings())

Similarity Search

In [23]:
result = vectorstore.similarity_search("I want a adidas shoe for my child?")

In [24]:
for product in result:
    print(product.page_content)
    print(product.metadata["images_list"])

Title: ADIDAS | KID'S STAN SMITH Description: The Stan Smith owned the tennis court in the '70s. Today it runs the streets with the same clean, classic style. These kids' shoes preserve the iconic look of the original, made in leather with punched 3-Stripes, heel and tongue logos and lightweight step-in cushioning. Tags: adidas, egnition-sample-data, kid
['https://cdn.shopify.com/s/files/1/0640/5003/6932/products/7883dc186e15bf29dad696e1e989e914.jpg?v=1707455380', 'https://cdn.shopify.com/s/files/1/0640/5003/6932/products/8cd561824439482e3cea5ba8e3a6e2f6.jpg?v=1707455380', 'https://cdn.shopify.com/s/files/1/0640/5003/6932/products/2e1f72987692d2dcc3c02be2f194d6c5.jpg?v=1707455380', 'https://cdn.shopify.com/s/files/1/0640/5003/6932/products/6216e82660d881e6f2b0e46dc3f8844a.jpg?v=1707455380', 'https://cdn.shopify.com/s/files/1/0640/5003/6932/products/e5247cc373e3b61f18013282a6d9c3c0.jpg?v=1707455380']
Title: NIKE | TODDLER ROSHE ONE Description: The Nike Roshe One Infant Shoe offers brea

In [25]:
result

[Document(page_content="Title: ADIDAS | KID'S STAN SMITH Description: The Stan Smith owned the tennis court in the '70s. Today it runs the streets with the same clean, classic style. These kids' shoes preserve the iconic look of the original, made in leather with punched 3-Stripes, heel and tongue logos and lightweight step-in cushioning. Tags: adidas, egnition-sample-data, kid", metadata={'source': '/content/products.json', 'seq_num': 3, 'id': 7571883000004, 'title': "ADIDAS | KID'S STAN SMITH", 'tags': None, 'images_list': ['https://cdn.shopify.com/s/files/1/0640/5003/6932/products/7883dc186e15bf29dad696e1e989e914.jpg?v=1707455380', 'https://cdn.shopify.com/s/files/1/0640/5003/6932/products/8cd561824439482e3cea5ba8e3a6e2f6.jpg?v=1707455380', 'https://cdn.shopify.com/s/files/1/0640/5003/6932/products/2e1f72987692d2dcc3c02be2f194d6c5.jpg?v=1707455380', 'https://cdn.shopify.com/s/files/1/0640/5003/6932/products/6216e82660d881e6f2b0e46dc3f8844a.jpg?v=1707455380', 'https://cdn.shopify.com

In [None]:
!zip -r ./shopify_langchaintesting_vectorstore.zip ./shopify_langchaintesting_vectorstore/