# Install the required python packages

In [None]:
!pip install langchain jq sentence_transformers
!pip install faiss-cpu
!pip install openai
!pip install tiktoken

# Import the necessary library

In [None]:
from bs4 import BeautifulSoup
import json
import requests
import os
import time
import pandas as pd
pd.set_option("display.max_colwidth", 1000)
from langchain.document_loaders.json_loader import JSONLoader

**Add you OPENAI KEY**

In [None]:
os.environ["OPENAI_API_KEY"] = ""

**Add your Test Shopify store URL**

In [None]:
shop_url = "https://quickstart-bde7c873.myshopify.com"
api_version = "2023-07"

## **Connect the shopify store and get the products list as JSON**

In [None]:
def get_all_products(shop_url, api_version):
    all_products = []
    url = f"{shop_url}/admin/api/{api_version}/products.json"
    headers = {"X-Shopify-Access-Token": os.getenv("SHOPIFY_API_KEY")}
    params = {"limit": 250}
    response = requests.get(url, headers=headers, params=params)
    all_products.extend(response.json()["products"])
    save_json(response.json())
    try:
        while response.links["next"]:
            response = requests.get(response.links["next"]["url"], headers=headers)
            all_products.extend(response.json()["products"])
            time.sleep(2)
    except KeyError:
        return all_products

In [None]:
def save_json(data):
    with open('data.json', 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=4)

In [None]:
all_products = get_all_products(shop_url, api_version)

In [None]:
product_df = pd.DataFrame(all_products)

# **Read the JSON file and show as DataFrame**

In [None]:
def read_json_file(file_path):
    try:
        with open(file_path, 'r') as file:
            data = json.load(file)
        return data
    except FileNotFoundError:
        print(f"File not found: {file_path}")
        return None
    except json.JSONDecodeError:
        print(f"Error decoding JSON in file: {file_path}")
        return None

In [None]:
file_path = 'data.json'
json_data = read_json_file(file_path)
all_products = json_data['products']

In [None]:
product_df = pd.DataFrame(all_products)

In [None]:
product_df

# **PreProcessing Steps:**

In [None]:
def clean_html_tags(row):
    soup = BeautifulSoup(row["body_html"], "html.parser")
    text = soup.get_text()
    row["body_html"] = text
    return row

def get_img_src(row):
    all_images = []
    for image in row["images"]:
        all_images.append(image["src"])
    row["images_list"] = all_images
    return row

def create_expandend_description(row):
    if row["body_html"] == "" and row["tags"] == "":
        row["expanded_description"] = row["title"]
    elif row["body_html"] == "" and row["tags"] != "":
        row["expanded_description"] = "Title: " + row['title'] + " Tags: " + row['tags']
    elif row["body_html"] != "" and row["tags"] == "":
        row["expanded_description"] = "Title: " + row['title'] + " Description: " +row["body_html"]
    else:
        row["expanded_description"] = "Title: " + row['title'] + " Description: " +row["body_html"] + " Tags: " + row['tags']
    return row

def df_preprocessing(df):
    df = df[df["status"] == "active"]
    df.fillna("", inplace=True)
    df = df.apply(lambda row: get_img_src(row), axis=1)
    df = df.apply(lambda row: create_expandend_description(row), axis=1)
    df = df.apply(lambda row: clean_html_tags(row), axis=1)
    df = df.rename(columns={"body_html": "description"})
    df = df[["id", "title", "handle","description", "expanded_description", "images_list"]]
    return df

In [None]:
cleaned_df = df_preprocessing(product_df)

In [None]:
cleaned_df.to_csv("products.csv", index=False)

In [None]:
cleaned_products_json = cleaned_df.to_json(orient="records")
with open("products.json", "w") as f:
    f.write(cleaned_products_json)

# **Define the metadata extraction function.**

In [None]:
def metadata_func(record: dict, metadata: dict) -> dict:
    metadata["id"] = record.get("id")
    metadata["title"] = record.get("title")
    metadata["tags"] = record.get("tags")
    metadata["images_list"] = record.get("images_list")
    metadata["handle"] = record.get("handle")
    return metadata

# **Using Langchain JSONLoader and Create FAISS vectorstore**

In [None]:
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings

def create_vectorstore(documents, embeddings):
    vectorstore = FAISS.from_documents(documents=documents, embedding=embeddings)
    return vectorstore

def save_vectorstore(vectorstore, save_path, index_name):
    vectorstore.save_local(save_path, index_name)
    print("Vectorstore saved to: ", save_path)

loader = JSONLoader(
    file_path='./products.json',
    jq_schema='.[]',
    content_key="expanded_description",
    metadata_func=metadata_func
)

In [None]:
documents = loader.load()
embeddings = OpenAIEmbeddings()
vectorstore = create_vectorstore(documents, embeddings)
save_vectorstore(vectorstore, save_path="./shopify_langchaintesting_vectorstore", index_name="products")

# **Load vectorstore from the local**

In [None]:
vectorstore = FAISS.load_local(folder_path="./shopify_langchaintesting_vectorstore", index_name="products", embeddings=OpenAIEmbeddings())

Similarity Search

In [None]:
result = vectorstore.similarity_search("I want a adidas shoe for my child?")

In [None]:
for product in result:
    print(product.page_content)
    print(product.metadata["images_list"])

In [None]:
result

In [None]:
!zip -r ./shopify_langchaintesting_vectorstore.zip ./shopify_langchaintesting_vectorstore/