# Read in cleaned dataset
(JSON_wrangler.ipynb was ran just prior to this)

In [None]:
import pandas as pd
pd.set_option('display.max_columns', None)
import edgedb
import json
import numpy as np
import os
import time
import textwrap
import ast
import warnings
warnings.filterwarnings('ignore')

from typing import List
import openai
from openai.embeddings_utils import cosine_similarity, distances_from_embeddings, indices_of_nearest_neighbors_from_distances
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
openai.api_key = os.environ["OPENAI_API_KEY"]
import tiktoken


# These are the results of cleaning the web-scraped data:
full_product_df = pd.read_excel("full_product_data.xlsx")
full_product_df = full_product_df.dropna()

reviews_df = pd.read_excel("reviews_df.xlsx")
reviews_df = reviews_df.dropna()

# Drop products that are not in both dataframes (i.e. products missing reviews or product info)
non_matchers = list(set(full_product_df.asin.unique()) - set(reviews_df.productAsin.unique())) + \
    list(set(reviews_df.productAsin.unique()) - set(full_product_df.asin.unique()))
reviews_df = reviews_df[~reviews_df.productAsin.isin(non_matchers)]
full_product_df = full_product_df[~full_product_df.asin.isin(non_matchers)]

print(f'unique products in reviews_df: {reviews_df.productAsin.nunique()}')
print(f'unique products in product_df: {full_product_df.asin.nunique()}')

unique products in reviews_df: 1007
unique products in product_df: 1007


# Generate Embeddings

## Product text field embeddings

In [None]:
display(full_product_df.head())
display(reviews_df.head())

Unnamed: 0,asin,title_text,category,Series,Brand,Item model number,Operating System,price,RAM,Hard Drive,Processor Brand,Processor,Chipset Brand,Graphics Coprocessor,bestseller,seller_text,url,stars,reviewsCount,thumbnailImage,variantAsins
0,B099P4T81H,"HP Chromebase 21.5"" All-in-One Desktop, Intel ...",Desktops,22-aa0022,HP,22-aa0022,chrome os,545.0,4 GB DDR4,128 GB SSD,Intel,2.4 GHz pentium_gold_g5600,Intel,UHD Graphics 600,0,FLEXIBLE FAMILY FUN Designed to live at the h...,https://www.amazon.com/dp/B099P4T81H,4.3,264,https://m.media-amazon.com/images/I/81w3miL-DH...,[]
1,B09YVWMLBP,Dell 2022 Newest Optiplex 3090 Micro Form Fact...,Desktops,Optiplex,Dell,3090,Windows,624.13,16 GB DDR4,512 GB SSD,Intel,2.3 GHz core_i5,Intel,UHD Graphics,0,High Speed RAM And Enormous Space16GB high-ban...,https://www.amazon.com/dp/B09YVWMLBP,5.0,10,https://m.media-amazon.com/images/I/61TIHYXkb4...,"['B0B2VB5ZT1', 'B0B2V1BJYX', 'B09YVWMLBP', 'B0..."
2,B0BS2LCB1X,2018 Apple Mac Mini with 3.2GHz Intel Core i7 ...,Desktops,Apple Mac Mini,Apple,MRTT2LL/A,macOS,515.0,DDR4,128 GB SSD,Intel,3.2 GHz apple_ci7,Intel,UHD Graphics 630,0,"This pre-owned product is not Apple certified,...",https://www.amazon.com/dp/B0BS2LCB1X,4.6,29,https://m.media-amazon.com/images/I/61mujJvG+C...,[]
3,B0BWPKK7RN,Dell OptiPlex 7080 Micro Form Factor Mini Busi...,Desktops,OptiPlex,Dell,7080,Windows,599.0,32 GB DDR4,1 TB SSD,Intel,2.3 GHz core_i5,Intel,UHD Graphics,0,High Speed RAM And Enormous Space32GB high-ban...,https://www.amazon.com/dp/B0BWPKK7RN,4.1,13,https://m.media-amazon.com/images/I/51qO-k6MY1...,"['B0BWQ3F343', 'B0BX21XTPP', 'B0BWQ1PXL3', 'B0..."
4,B0BM8YLTH8,[Gaming PC] KAMRUI Mini PC AMD Ryzen 5 5600U U...,Desktops,AMR5-Ryzen 5 5600U,KAMRUI,AMR5,Windows,479.0,16 GB DDR4,"512 GB 512GB M.2 SSD Included, Support NVME/NG...",AMD,4.2 GHz ryzen_5,AMD,Radeon Vega 7,1,THE KEY TO VICTORYThe KAMRUI AMR5 mini gaming ...,https://www.amazon.com/dp/B0BM8YLTH8,4.5,166,https://m.media-amazon.com/images/I/61oUaIuI0A...,"['B0BM8YLTH8', 'B0BX82ZBMG']"


Unnamed: 0,productAsin,reviewUrl,ratingScore,reviewTitle,reviewDescription,date,all_review_text,wavgHelpfulness
2,B09BS2LFBN,https://www.amazon.com/gp/customer-reviews/R28...,4,"Great laptop, battery could be better","Great laptop, battery could be better",2023-04-24,"Great laptop, battery could be better. Great l...",0.0625
3,B09BS2LFBN,https://www.amazon.com/gp/customer-reviews/R2X...,5,A pleasant laptop with a lot of Horse Power!,"The laptop is fast, intuitive and does what i ...",2023-03-25,A pleasant laptop with a lot of Horse Power!. ...,0.0625
4,B09BS2LFBN,https://www.amazon.com/gp/customer-reviews/RP6...,5,Awesome Laptop,I purchased this for college online. I will be...,2020-10-27,Awesome Laptop. I purchased this for college o...,0.375
5,B09BS2LFBN,https://www.amazon.com/gp/customer-reviews/R38...,4,New computer.,We have only had this a week. It loaded progra...,2021-07-02,New computer.. We have only had this a week. I...,0.0625
6,B09BS2LFBN,https://www.amazon.com/gp/customer-reviews/RR0...,5,So far so good.,So far this has done what I have needed it to ...,2021-09-02,So far so good.. So far this has done what I h...,0.125


Get product `seller_text` embeddings  
(blocking API)

In [None]:
try:
    reviews_df.set_index('productAsin', inplace=True)
    full_product_df.set_index('asin', inplace=True)
except:
    # already set
    pass


def get_embedding(text, model="text-embedding-ada-002", max_tokens=8000):
    
    # Check length before embedding
    text = text.replace("\n", " ")
    encoding = tiktoken.encoding_for_model(model)
    text = encoding.decode(encoding.encode(text)[:max_tokens]) if len(encoding.encode(text)) >= max_tokens else text

    return openai.Embedding.create(input=[text], model=model)['data'][0]['embedding']


# Generate embeddings for product descriptions
full_product_df['embedding'] = full_product_df['seller_text'].apply(lambda x: get_embedding(x, model='text-embedding-ada-002'))

# SAVE EMBEDDINGS TO PKL
full_product_df.to_pickle("full_product_df.pkl")

In [None]:
full_product_df.head()

Unnamed: 0_level_0,title_text,category,Series,Brand,Item model number,Operating System,price,RAM,Hard Drive,Processor Brand,...,Chipset Brand,Graphics Coprocessor,bestseller,seller_text,url,stars,reviewsCount,thumbnailImage,variantAsins,embedding
asin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
B099P4T81H,"HP Chromebase 21.5"" All-in-One Desktop, Intel ...",Desktops,22-aa0022,HP,22-aa0022,chrome os,545.0,4 GB DDR4,128 GB SSD,Intel,...,Intel,UHD Graphics 600,0,FLEXIBLE FAMILY FUN Designed to live at the h...,https://www.amazon.com/dp/B099P4T81H,4.3,264,https://m.media-amazon.com/images/I/81w3miL-DH...,[],"[-0.0008702780469320714, 0.008141257800161839,..."
B09YVWMLBP,Dell 2022 Newest Optiplex 3090 Micro Form Fact...,Desktops,Optiplex,Dell,3090,Windows,624.13,16 GB DDR4,512 GB SSD,Intel,...,Intel,UHD Graphics,0,High Speed RAM And Enormous Space16GB high-ban...,https://www.amazon.com/dp/B09YVWMLBP,5.0,10,https://m.media-amazon.com/images/I/61TIHYXkb4...,"['B0B2VB5ZT1', 'B0B2V1BJYX', 'B09YVWMLBP', 'B0...","[-0.0066131725907325745, 0.009173321537673473,..."
B0BS2LCB1X,2018 Apple Mac Mini with 3.2GHz Intel Core i7 ...,Desktops,Apple Mac Mini,Apple,MRTT2LL/A,macOS,515.0,DDR4,128 GB SSD,Intel,...,Intel,UHD Graphics 630,0,"This pre-owned product is not Apple certified,...",https://www.amazon.com/dp/B0BS2LCB1X,4.6,29,https://m.media-amazon.com/images/I/61mujJvG+C...,[],"[0.015111690387129784, 0.0035265081096440554, ..."
B0BWPKK7RN,Dell OptiPlex 7080 Micro Form Factor Mini Busi...,Desktops,OptiPlex,Dell,7080,Windows,599.0,32 GB DDR4,1 TB SSD,Intel,...,Intel,UHD Graphics,0,High Speed RAM And Enormous Space32GB high-ban...,https://www.amazon.com/dp/B0BWPKK7RN,4.1,13,https://m.media-amazon.com/images/I/51qO-k6MY1...,"['B0BWQ3F343', 'B0BX21XTPP', 'B0BWQ1PXL3', 'B0...","[0.0010904214577749372, 0.006660059094429016, ..."
B0BM8YLTH8,[Gaming PC] KAMRUI Mini PC AMD Ryzen 5 5600U U...,Desktops,AMR5-Ryzen 5 5600U,KAMRUI,AMR5,Windows,479.0,16 GB DDR4,"512 GB 512GB M.2 SSD Included, Support NVME/NG...",AMD,...,AMD,Radeon Vega 7,1,THE KEY TO VICTORYThe KAMRUI AMR5 mini gaming ...,https://www.amazon.com/dp/B0BM8YLTH8,4.5,166,https://m.media-amazon.com/images/I/61oUaIuI0A...,"['B0BM8YLTH8', 'B0BX82ZBMG']","[-0.010817659087479115, 0.00640911515802145, 0..."


In [None]:
type(full_product_df.embedding[0])

list

## Reviews text field embeddings
Get `all_review_text` embedding

In [None]:
for text in reviews_df['all_review_text'][:5]:
    print(text)

Great laptop, battery could be better. Great laptop, battery could be better
A pleasant laptop with a lot of Horse Power!. The laptop is fast, intuitive and does what i want it to do!!Thank you for the fast shopping and will update the review if i run into any issues
Awesome Laptop. I purchased this for college online. I will be traveling and not have access to my desktop. I was surprised by how fast it started, and everything is smooth when transitioning between applications or documents. It comes with Windows already installed.  It only took about 10 minutes for all the updates.  After logging in to my OneDrive account, everything was linked, so there was no need to pull any files off my older devices to transfer to my new laptop.  This makes my assignments easy to edit or turn in no matter where I am.  Highly recommended.  It is light weight, relatively thin yet sturdy, big screen and full keyboard (with the numbers off to the right) I am very delighted with my purchace.
New compute

In [None]:
reviews_df.drop_duplicates(subset=['all_review_text'], keep='first', inplace=True)
len(reviews_df)

11750

### Re-clean and save product embeddings
(based on dropped duplicates in reviews_df)

In [None]:
# remove products from full_product_df that have no reviews (match on indices)
print(len(full_product_df))
full_product_df = full_product_df[full_product_df.index.isin(reviews_df.index)]
print(len(full_product_df))

1007
908


In [None]:
# re-export full_product_df to pickle
full_product_df.to_pickle("full_product_df.pkl")

In [None]:
product_embedding_only_df = full_product_df[['embedding']].reset_index()
# export to pickle and to jsonl
product_embedding_only_df.to_pickle("product_embedding_only_df.pkl")
product_embedding_only_df.to_json("product_embedding_only_df.jsonl", orient='records', lines=True)

### Use parallel processing to generate embeddings for reviews
First need to prepare review_text.jsonl from input into api_request_parallel_processor.py  
__BE VERY CAREFUL -- Need to provide a row_id to openai API, as results will not be returned in provided order__

In [None]:
with open('review_text.jsonl', 'w') as jsonlfile:
    encoding = tiktoken.encoding_for_model("text-embedding-ada-002")
    for ridx, text in enumerate(reviews_df['all_review_text']):
        text = text.replace("\n", " ")
        text = encoding.decode(encoding.encode(text)[:8000]) if len(encoding.encode(text)) >= 8000 else text
        data = {
            "model": "text-embedding-ada-002",
            "input": text,
            "metadata": {"row_id": ridx}
        }

        jsonlfile.write(json.dumps(data) + '\n')

In [None]:
!curl -O https://raw.githubusercontent.com/openai/openai-cookbook/main/examples/api_request_parallel_processor.py

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 20400  100 20400    0     0  75964      0 --:--:-- --:--:-- --:--:-- 75836--:--:-- --:--:-- --:--:--     0


In [None]:
pre_embeddings_reviews_text_df = reviews_df[['all_review_text']].reset_index()
# also export to pickle
pre_embeddings_reviews_text_df.to_pickle("pre_embeddings_reviews_text_df.pkl")

In [None]:
start = time.time()
!python api_request_parallel_processor.py \
    --requests_filepath review_text.jsonl \
    --save_filepath review_embeddings.jsonl \
    --request_url https://api.openai.com/v1/embeddings \
    --max_requests_per_minute 1500 \
    --max_tokens_per_minute 6250000 \
    --token_encoding_name cl100k_base \
    --max_attempts 5 \
    --logging_level 20
print(f'Time: {time.time() - start}')

In [None]:
# extract input from first column of reviews_jsonl_df (response df)
reviews_jsonl_df = pd.read_json('review_embeddings.jsonl', lines=True)
reviews_jsonl_df['input'] = reviews_jsonl_df.iloc[:, 0].apply(lambda x: x['input'])
reviews_jsonl_df['embedding'] = reviews_jsonl_df.iloc[:, 1].apply(lambda x: x['data'][0]['embedding'])
reviews_jsonl_df['row_id'] = reviews_jsonl_df.iloc[:, 2].apply(lambda x: x['row_id'])
reviews_jsonl_df.head()

Unnamed: 0,0,1,2,input,embedding,row_id
0,"{'model': 'text-embedding-ada-002', 'input': '...","{'object': 'list', 'data': [{'object': 'embedd...",{'row_id': 7},Great value. Great value for the money. .moni...,"[0.016028248000000002, 0.00076338614, -0.00106...",7
1,"{'model': 'text-embedding-ada-002', 'input': '...","{'object': 'list', 'data': [{'object': 'embedd...",{'row_id': 5},Laptop. At this point is doing fine. The key b...,"[-0.0128182, 0.0048254845, 0.010741327, -0.006...",5
2,"{'model': 'text-embedding-ada-002', 'input': '...","{'object': 'list', 'data': [{'object': 'embedd...",{'row_id': 39},I will buy it again. The Screen quality is awe...,"[0.0019536542, 0.0045495187000000005, -0.00649...",39
3,"{'model': 'text-embedding-ada-002', 'input': '...","{'object': 'list', 'data': [{'object': 'embedd...",{'row_id': 3},New computer.. We have only had this a week. I...,"[-0.0074131703, -0.0014511476, 0.0050639263, -...",3
4,"{'model': 'text-embedding-ada-002', 'input': '...","{'object': 'list', 'data': [{'object': 'embedd...",{'row_id': 25},Devolución. Buenos días. Quiero volver a inten...,"[-0.028042687, 0.0043717865000000005, -0.00236...",25


In [None]:
reviews_df.to_csv('reviews_df_pre_merge.csv')

In [None]:
reviews_df.reset_index(inplace=True)

extracted_reviews_df = reviews_jsonl_df[['embedding', 'row_id', 'input']]
review_index_embedding_dict = dict(zip(extracted_reviews_df.row_id, extracted_reviews_df.embedding))
review_index_text_dict = dict(zip(extracted_reviews_df.row_id, extracted_reviews_df.input))
reviews_df['api_input'] = reviews_df.index.map(review_index_text_dict)
reviews_df['embedding'] = reviews_df.index.map(review_index_embedding_dict)

reviews_df.rename_axis('row_id_non_api', inplace=True)
reviews_df.reset_index(inplace=True, drop=False)

# select all columns except embedding
reviews_df[reviews_df.columns.difference(['embedding'])].head(20)

Unnamed: 0,all_review_text,api_input,date,productAsin,ratingScore,reviewDescription,reviewTitle,reviewUrl,row_id_non_api,wavgHelpfulness
0,"Great laptop, battery could be better. Great l...","Great laptop, battery could be better. Great l...",2023-04-24,B09BS2LFBN,4,"Great laptop, battery could be better","Great laptop, battery could be better",https://www.amazon.com/gp/customer-reviews/R28...,0,0.0625
1,A pleasant laptop with a lot of Horse Power!. ...,A pleasant laptop with a lot of Horse Power!. ...,2023-03-25,B09BS2LFBN,5,"The laptop is fast, intuitive and does what i ...",A pleasant laptop with a lot of Horse Power!,https://www.amazon.com/gp/customer-reviews/R2X...,1,0.0625
2,Awesome Laptop. I purchased this for college o...,Awesome Laptop. I purchased this for college o...,2020-10-27,B09BS2LFBN,5,I purchased this for college online. I will be...,Awesome Laptop,https://www.amazon.com/gp/customer-reviews/RP6...,2,0.375
3,New computer.. We have only had this a week. I...,New computer.. We have only had this a week. I...,2021-07-02,B09BS2LFBN,4,We have only had this a week. It loaded progra...,New computer.,https://www.amazon.com/gp/customer-reviews/R38...,3,0.0625
4,So far so good.. So far this has done what I h...,So far so good.. So far this has done what I h...,2021-09-02,B09BS2LFBN,5,So far this has done what I have needed it to ...,So far so good.,https://www.amazon.com/gp/customer-reviews/RR0...,4,0.125
5,Laptop. At this point is doing fine. The key b...,Laptop. At this point is doing fine. The key b...,2021-05-28,B09BS2LFBN,5,At this point is doing fine. The key board is ...,Laptop,https://www.amazon.com/gp/customer-reviews/R1L...,5,0.0625
6,out of the box worked perfectly. unit worked a...,out of the box worked perfectly. unit worked a...,2021-04-23,B09BS2LFBN,5,"unit worked as expected, fast, clear, and easy...",out of the box worked perfectly,https://www.amazon.com/gp/customer-reviews/RFU...,6,0.0625
7,Great value. Great value for the money. .moni...,Great value. Great value for the money. .moni...,2020-12-08,B09BS2LFBN,5,Great value for the money. .monitor size & cl...,Great value,https://www.amazon.com/gp/customer-reviews/R1A...,7,0.125
8,Had the same computer but the 14 year older ve...,Had the same computer but the 14 year older ve...,2021-09-03,B09BS2LFBN,5,Trusted for many years,Had the same computer but the 14 year older ve...,https://www.amazon.com/gp/customer-reviews/R1W...,8,0.0625
9,excelente producto. excelente producto,excelente producto. excelente producto,2021-12-07,B0BKTSDFBP,5,excelente producto,excelente producto,https://www.amazon.com/gp/customer-reviews/R30...,9,0.5


In [None]:
reviews_df.set_index('productAsin', inplace=True)
reviews_df = reviews_df[['date',
                         'all_review_text',
                         'reviewTitle',
                         'reviewDescription',
                         'ratingScore',
                         'reviewUrl',
                         'wavgHelpfulness',
                         'embedding']]
# export reviews_df to xlsx and pickle (as post-merge_reviews_df)
reviews_df.to_pickle("post-merge_reviews_df.pkl")
reviews_df.to_excel("post-merge_reviews_df.xlsx")

['row_id_non_api',
 'date',
 'all_review_text',
 'ratingScore',
 'reviewDescription',
 'reviewTitle',
 'reviewUrl',
 'wavgHelpfulness',
 'api_input',
 'embedding']

# Create combined embeddings

In [None]:
full_product_df.head()

Unnamed: 0_level_0,title_text,category,Series,Brand,Item model number,Operating System,price,RAM,Hard Drive,Processor Brand,...,Chipset Brand,Graphics Coprocessor,bestseller,seller_text,url,stars,reviewsCount,thumbnailImage,variantAsins,embedding
asin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
B099P4T81H,"HP Chromebase 21.5"" All-in-One Desktop, Intel ...",Desktops,22-aa0022,HP,22-aa0022,chrome os,545.0,4 GB DDR4,128 GB SSD,Intel,...,Intel,UHD Graphics 600,0,FLEXIBLE FAMILY FUN Designed to live at the h...,https://www.amazon.com/dp/B099P4T81H,4.3,264,https://m.media-amazon.com/images/I/81w3miL-DH...,[],"[-0.0008702780469320714, 0.008141257800161839,..."
B09YVWMLBP,Dell 2022 Newest Optiplex 3090 Micro Form Fact...,Desktops,Optiplex,Dell,3090,Windows,624.13,16 GB DDR4,512 GB SSD,Intel,...,Intel,UHD Graphics,0,High Speed RAM And Enormous Space16GB high-ban...,https://www.amazon.com/dp/B09YVWMLBP,5.0,10,https://m.media-amazon.com/images/I/61TIHYXkb4...,"['B0B2VB5ZT1', 'B0B2V1BJYX', 'B09YVWMLBP', 'B0...","[-0.0066131725907325745, 0.009173321537673473,..."
B0BS2LCB1X,2018 Apple Mac Mini with 3.2GHz Intel Core i7 ...,Desktops,Apple Mac Mini,Apple,MRTT2LL/A,macOS,515.0,DDR4,128 GB SSD,Intel,...,Intel,UHD Graphics 630,0,"This pre-owned product is not Apple certified,...",https://www.amazon.com/dp/B0BS2LCB1X,4.6,29,https://m.media-amazon.com/images/I/61mujJvG+C...,[],"[0.015111690387129784, 0.0035265081096440554, ..."
B0BWPKK7RN,Dell OptiPlex 7080 Micro Form Factor Mini Busi...,Desktops,OptiPlex,Dell,7080,Windows,599.0,32 GB DDR4,1 TB SSD,Intel,...,Intel,UHD Graphics,0,High Speed RAM And Enormous Space32GB high-ban...,https://www.amazon.com/dp/B0BWPKK7RN,4.1,13,https://m.media-amazon.com/images/I/51qO-k6MY1...,"['B0BWQ3F343', 'B0BX21XTPP', 'B0BWQ1PXL3', 'B0...","[0.0010904214577749372, 0.006660059094429016, ..."
B0BM8YLTH8,[Gaming PC] KAMRUI Mini PC AMD Ryzen 5 5600U U...,Desktops,AMR5-Ryzen 5 5600U,KAMRUI,AMR5,Windows,479.0,16 GB DDR4,"512 GB 512GB M.2 SSD Included, Support NVME/NG...",AMD,...,AMD,Radeon Vega 7,1,THE KEY TO VICTORYThe KAMRUI AMR5 mini gaming ...,https://www.amazon.com/dp/B0BM8YLTH8,4.5,166,https://m.media-amazon.com/images/I/61oUaIuI0A...,"['B0BM8YLTH8', 'B0BX82ZBMG']","[-0.010817659087479115, 0.00640911515802145, 0..."


Looks good. Now let's take the weighted average embedding so we can do semantic search by product as well.

In [None]:
# Convert list of embeddings to numpy array for each row
reviews_df['np_embedding'] = reviews_df['embedding'].apply(np.array)

def weighted_average_embeddings(group):
    embeddings = np.array(group['np_embedding'].tolist()) # list of np arrays
    weights = np.array(group['wavgHelpfulness'].tolist())
    return list(np.average(embeddings, weights=weights, axis=0))

# Group by index (product ASINs) and calculate the weighted average embedding
review_embeddings = reviews_df.groupby(reviews_df.index).apply(weighted_average_embeddings)

# Save to pickle file. Don't write them back in to reviews_df as they are aggregated at product level
review_embeddings.to_pickle("aggregated_reviews_embeddings.pkl")
# also export to jsonl
review_embeddings.to_json("aggregated_reviews_embeddings.jsonl", orient='records', lines=True)

In [None]:
review_embeddings

productAsin
B00TTJYTRS    [-0.0073379597845945966, 0.00295375172972973, ...
B013HD3INW    [-0.007526235990263159, -0.004943612261842104,...
B01550MWHI    [-0.009526385103333333, -0.00617689943988889, ...
B01CZ2PDFC    [-0.009539026233333334, -0.004302835918666666,...
B01NA98UZX    [-0.003452308511111112, -0.0018490695962962963...
                                    ...                        
B0C2PM4F22    [-0.012277215000000001, -0.0048841042, -0.0180...
B0C339KVH9    [-0.004927401282352942, -0.003952159902941176,...
B0C33KJV5N    [-0.006954563745454546, -0.0024978443977272725...
B0C345J7B4    [-0.002057897227073171, 0.007013551408780487, ...
B0C3S9HKSZ    [-0.00324477514871795, -0.001037581305769231, ...
Length: 908, dtype: object

# Semantic Search for User Query on _Combined_ Embeddings 
(product review embeddings & product description embeddings averaged)

In [None]:
full_product_df['np_embedding'] = full_product_df['embedding'].apply(np.array)

# Combine product and review embeddings
def combine_embeddings(product_asin):
    if product_asin in review_embeddings.index:
        combined_embedding = (full_product_df.loc[product_asin, 'np_embedding'] + review_embeddings[product_asin]) / 2
    else:
        combined_embedding = full_product_df.loc[product_asin, 'np_embedding']
    return combined_embedding

full_product_df['combined_embedding'] = full_product_df.index.map(combine_embeddings)

Define some functions to find and print most similar products (based on combined embeddings)  
& most similar reviews for each of those products  
(using cosine similarity)

In [None]:
def recommend_products(query_embedding, df, n=5):
    df['similarities'] = df['combined_embedding'].apply(lambda x: cosine_similarity(x, query_embedding))
    # first return too many results (n*2) and then remove duplicates
    recommended = df.sort_values('similarities', ascending=False).head(n*2)
    # for any products whose Title's have the same first 2 words (case-insensitive), remove all records except the first
    recommended = recommended[~recommended['title_text'].str.lower().str.split().str[:2].duplicated(keep='first')]
    # return the top n results
    return recommended.head(n)

def get_most_similar_review(query_embedding, df, n=1):
    df['similarities'] = df['embedding'].apply(lambda x: cosine_similarity(x, query_embedding))
    return df.sort_values('similarities', ascending=False).head(n)

wrapper = textwrap.TextWrapper(width=80, initial_indent='         ', subsequent_indent='         ')
# make a function out of the above result-printing code
def print_recommendations(query, df_to_search, n=5, review=False):
    query_embedding = get_embedding(query, model='text-embedding-ada-002')
    recommendations = recommend_products(query_embedding, df_to_search, n)
    for i, (index, row) in enumerate(recommendations.iterrows(), start=1):
        print(f"RANK #{i}:\n")
        print(f"PRODUCT ID: {index}")
        print(f"PRODUCT SIMILARITY SCORE: {row['similarities']}")
        print(f"PRODUCT TITLE: {row['title_text']}")
        # chunk seller_text across multiple lines for readability
        if len(row['seller_text']) > 100:
            print(f"PRODUCT DESCRIPTION:\n{textwrap.fill(row['seller_text'], 100)}")
        # print(f"   *--DESC: {row['seller_text']}")
        print(f"NUMBER OF REVIEWS:{row['reviewsCount']} reviews")
        print(f"PRODUCT URL:{row['url']}")
        if review:
            print('-----')    
            print('         MOST SIMILAR REVIEW:\n')
            top_review = get_most_similar_review(query_embedding, reviews_df.loc[index], n=1)
            print('         REVIEW TITLE:', top_review.iloc[0]['reviewTitle'])
            print('         REVIEW TEXT:', wrapper.fill(top_review.iloc[0]['reviewDescription']))
            print('         REVIEW RATING:', top_review.iloc[0]['ratingScore'])
            print('         REVIEW DATE:', top_review.iloc[0]['date'])
            print('         REVIEW URL:', top_review.iloc[0]['reviewUrl'])
            print('         REVIEW SIMILARITY:', top_review.iloc[0]['similarities'])
            print('-----')    
        print('------------------------------------------------------------------------------\n\n')

In [None]:
user_query = "I'm looking for a lightweight laptop with long battery life."
# recommended_products = recommend_products(user_query, full_product_df, n=5) # <-- this function returns a df...used by print_recommendations
print_recommendations(user_query, full_product_df, n=5, review=True)

RANK #1:

PRODUCT ID: B08M2X68W5
PRODUCT SIMILARITY SCORE: 0.8890144427797319
PRODUCT TITLE: Samsung Chromebook 4 (2021 Model) 11.6" Intel UHD Graphics 600/ Celeron Processor N4020, 4GB, 32GB, Wi-Fi - (XE310XBA-KA1US)
PRODUCT DESCRIPTION:
Compact, light design with 11. 6 display. Military-grade durability. Ultra-fast connectivity with
Gigabit Wi-Fi. Advanced security with multiple layers of built-in virus protection. 12. 5 hours of
battery life.
NUMBER OF REVIEWS:2197 reviews
PRODUCT URL:https://www.amazon.com/dp/B08M2X68W5
-----
         MOST SIMILAR REVIEW:

         REVIEW TITLE: Serves its purpose
         REVIEW TEXT:          This is smaller than my previous Chromebook, but I think it will be
         great for traveling and it will still do the job as I do a lot of
         documents and typing.  I haven't used it yet for any continuous length
         of time so I can't comment on the battery life. It was effortless to
         get it going and it synced all my data from my pre

# VSS on Individual Embeddings (product reviews & product description separately)
### Note that results are different from above combined embeddings search

In [None]:
# from openai.embeddings_utils import cosine_similarity

def search_products_and_reviews(product_df, review_df, user_query, n=1, dataset="both"):
    query_embedding = get_embedding(user_query, model='text-embedding-ada-002')

    # Calculate similarity for products
    product_df['similarities'] = product_df['embedding'].apply(lambda x: cosine_similarity(x, query_embedding))
    top_products = product_df.nlargest(n, 'similarities')

    # Calculate similarity for reviews
    review_df['similarities'] = review_df['embedding'].apply(lambda x: cosine_similarity(x, query_embedding))
    top_reviews = review_df.nlargest(n, 'similarities')

    if dataset == "products" or dataset == "both":
        print("\nTop similar products:")
        for index, row in top_products.iterrows():
            print("----------")
            print(f"Product type: {row['category']}")
            print(f"Product ASIN: {index}")
            print(f"Title text: {row['title_text']}")
            print(f"Seller text: {textwrap.fill(row['seller_text'], 100)}")
            print(f"Similarity: {row['similarities']}")
            print(f"Variants: {row['variantAsins']}")

    if dataset == "reviews" or dataset == "both":
        print("\nTop similar reviews:")
        for index, row in top_reviews.iterrows():
            print("----------")
            print(f"Product ASIN: {index}")
            print(f"Review title: {row['reviewTitle']}")
            print(f"Stars: {row['ratingScore']}")
            print(f"All review text: {textwrap.fill(row['reviewDescription'], 100)}")
            # print(f"Num People who found this review helpful: {row['numPeopleFoundHelpful']}")
            print(f"Similarity: {row['similarities']}")
            
    if dataset == "both":
        return top_products, top_reviews
    elif dataset == "products":
        return top_products
    elif dataset == "reviews":
        return top_reviews

The search capabilities on the reviews is particularly impressive (see below examples)
This makes sense since  
* We have significantly more data for reviews than for products
* A typical user query can be expected to be more similar to a review (written by another user) than to the product description (written by the seller and typically focused more on specs)

Though my initial plan was to use the combined embeddings, I may have to consider incorporating a search directly on reviews into the application

In [None]:
user_query = "I'm looking for a lightweight laptop with long battery life."
top_reviews = search_products_and_reviews(full_product_df, reviews_df, user_query, n=1, dataset="reviews")


Top similar reviews:
----------
Product ASIN: B09S42WS5C
Review title: A decent laptop
Stars: 4
All review text: Good colors, lightweight and good battery life
Similarity: 0.9001835876711674


In [None]:
user_query = "Desktop for machine learning"
top_reviews = search_products_and_reviews(full_product_df, reviews_df, user_query, n=1, dataset="reviews")


Top similar reviews:
----------
Product ASIN: B0BPXKNYGN
Review title: machine learning portable PC
Stars: 5
All review text: Usage:After powering on, it was pre-installed, setting up my accounts. I was ready to go and use my
PC. Noise comparing to other similar rig is lower. Even at peak running the machine learning model,
it did not powered off or BSOD.Machine learning:Portable PC, compatible with my machine learning
workspace. I primarily has been running GPU intensve algorithm in Jupyter notebook. My few
impression for the size of the device. I am happy to use for most machine learning, adding deeper
neural networks. On some test SVM model, the device has performed significant better than my
expectation.
Similarity: 0.8590597941840741


In [None]:
user_query = "Laptop that supports 3 external monitors"
top_reviews = search_products_and_reviews(full_product_df, reviews_df, user_query, n=1, dataset="reviews")


Top similar reviews:
----------
Product ASIN: B08KGYDR16
Review title: Great computer for work
Stars: 5
All review text: I run ridiculously large Excel files and macros all day for work and this laptop has just enough
juice to get the job done. Specs are exactly how you'd expect them to be for a laptop dedicated to
working remotely. An issue I ran into when I wanted to add a third monitor to increase my workflow
was that I quickly learned the USB Type-C port does not support adding an additional display, since
there's only one HDMI port that made adding a third monitor pretty difficult, HOWEVER - I found a
great solution after hours of research and hopefully, this saves someone some time.
https://www.amazon.com/dp/B08HN2X88P?psc=1&ref=ppx_yo2_dt_b_product_details - This product works
great it plugs right into the USB Type-2 port and has its own external display drive and I am now
using 2 additional monitors on my laptop while working. Aside from that little hiccup from the
manufacture

In [None]:
user_query = "Desktop that supports playing Cyberpunk 2077"
top_reviews = search_products_and_reviews(full_product_df, reviews_df, user_query, n=1, dataset="reviews")


Top similar reviews:
----------
Product ASIN: 9484
Review title: Great Gaming Computer with Lit Keyboard & Mouse
Stars: 4
All review text: Really enjoying my new Gaming Computer, had it just over 2 weeks with maybe 120 hours so far. The
Lighting & mouse & keyboard effects are cool & great in low light. So far this Beast has taken every
Game I've loaded onto it without a hiccup.I bought it to play CYBERPUNK 2077 & it's performed
flawlessly (The Game has some glitches that are NOT Computer related) so while I wait for those to
be patched I'm playing all the Games I'd purchased over the last 3 years my old System couldn't
play, like JUST CAUSE 3 & MASTER CHIEF-HALO COLLECTION.I'm not a techie so I can't Tech Splain the
nuts & bolts, all I can say is I am VERY happy with this Unit's performance, graphics & speed.I just
if I do have a complaint it's that the Hard Drive is "only" 500 GB so my # of  ready to play games
is limited.I do recommend this Gaming System!
Similarity: 0.8254174532257

In [None]:
user_query = "3D modeling laptop"
top_reviews = search_products_and_reviews(full_product_df, reviews_df, user_query, n=1, dataset="reviews")


Top similar reviews:
----------
Product ASIN: B0B9RZCFSR
Review title: Wonderful laptop!
Stars: 5
All review text: This laptop is great for the two things I use it mainly for, those being gaming and making 3D art
(Blender). The screen is very vibrant and the extra exterior lights make the laptop very unique
Similarity: 0.8706951564174887


In [None]:
user_query = "Video editing without lagging"
top_reviews = search_products_and_reviews(full_product_df, reviews_df, user_query, n=1, dataset="reviews")


Top similar reviews:
----------
Product ASIN: B0BS2LCB1X
Review title: Still useful in 2023
Stars: 5
All review text: Fast enough for video editing 1080p
Similarity: 0.8451266286747238


# Ensure that saving to csv works as expected
.to_csv() --> .read_csv() automatically converts list embedding to str  
no truncation occurs, so ast.literal_eval() works in converting back to list  
when queried from db, will need to use ast.literal_eval() then convert to np array as necessary  
__Note that the np_embedding field does get converted to a truncated string when saving/loading from csv__

In [None]:
print(f'Final number of products: {len(full_product_df)}')
print(f'Final number of reviews: {len(reviews_df)}')

Final number of products: 908
Final number of reviews: 11750


In [None]:
full_product_df.drop(columns=['similarities'], inplace=True)
full_product_df.reset_index(inplace=True, drop=False)
full_product_df['combined_embedding'] = full_product_df['combined_embedding'].apply(lambda x: x.tolist()) # <-- product only

full_product_df.to_csv('final_full_product_df.csv', index=False)

test_product_df = pd.read_csv('final_full_product_df.csv')
type(test_product_df.embedding[0])

str

In [None]:
print(len(full_product_df.embedding[0]))
print(len(ast.literal_eval(test_product_df.embedding[0])))

print(len(full_product_df.combined_embedding[0]))
print(len(ast.literal_eval(test_product_df.combined_embedding[0])))

1536
1536
1536
1536


In [None]:
reviews_df.drop(columns=['similarities'], inplace=True)
reviews_df.reset_index(inplace=True, drop=False)
reviews_df.to_csv('final_reviews_df.csv', index=False)

test_reviews_df = pd.read_csv('final_reviews_df.csv')
type(test_reviews_df.embedding[0])

str

In [None]:
print(len(reviews_df.embedding[0]))
print(len(ast.literal_eval(test_reviews_df.embedding[0])))

1536
1536


## Compare loaded vs. existing

In [None]:
print(test_product_df.shape)
print(test_reviews_df.shape)

(908, 25)
(11750, 10)


In [None]:
print(full_product_df.shape)
print(reviews_df.shape)

(908, 25)
(11750, 10)


In [None]:
print('*** Product dtypes ***\n')
loaded_product_column_dtypes = [type(full_product_df[c][0]) for c in full_product_df.columns]
test_product_column_dtypes = [type(test_product_df[c][0]) for c in test_product_df.columns]

for c in full_product_df.columns:
    # check if they are the same type
    if type(full_product_df[c][0]) != type(test_product_df[c][0]):
        print(f'Column {c} has different types: {type(full_product_df[c][0])} vs {type(test_product_df[c][0])}')

print('\n*** Review dtypes ***\n')
# repeat for reviews
loaded_review_column_dtypes = [type(reviews_df[c][0]) for c in reviews_df.columns]
test_review_column_dtypes = [type(test_reviews_df[c][0]) for c in test_reviews_df.columns]

for c in reviews_df.columns:
    # check if they are the same type
    if type(reviews_df[c][0]) != type(test_reviews_df[c][0]):
        print(f'Column {c} has different types: {type(reviews_df[c][0])} vs {type(test_reviews_df[c][0])}')

*** Product dtypes ***

Column embedding has different types: <class 'list'> vs <class 'str'>
Column np_embedding has different types: <class 'numpy.ndarray'> vs <class 'str'>
Column combined_embedding has different types: <class 'list'> vs <class 'str'>

*** Review dtypes ***

Column embedding has different types: <class 'list'> vs <class 'str'>
Column np_embedding has different types: <class 'numpy.ndarray'> vs <class 'str'>


# Functions

In [None]:
def get_embedding(text, model="text-embedding-ada-002", max_tokens=8000):
    
    # Check length before embedding
    text = text.replace("\n", " ")
    encoding = tiktoken.encoding_for_model(model)
    text = encoding.decode(encoding.encode(text)[:max_tokens]) if len(encoding.encode(text)) >= max_tokens else text

    return openai.Embedding.create(input=[text], model=model)['data'][0]['embedding']

In [None]:
def weighted_average_embeddings(group):
    embeddings = np.array(group['np_embedding'].tolist()) # list of np arrays
    weights = np.array(group['wavgHelpfulness'].tolist())
    return list(np.average(embeddings, weights=weights, axis=0))

In [None]:
# Combine product and review embeddings
def combine_embeddings(product_asin):
    if product_asin in review_embeddings.index:
        combined_embedding = (full_product_df.loc[product_asin, 'np_embedding'] + review_embeddings[product_asin]) / 2
    else:
        combined_embedding = full_product_df.loc[product_asin, 'np_embedding']
    return combined_embedding

In [None]:
def recommend_products(query_embedding, df, n=5):
    df['similarities'] = df['combined_embedding'].apply(lambda x: cosine_similarity(x, query_embedding))
    # first return too many results (n*2) and then remove duplicates
    recommended = df.sort_values('similarities', ascending=False).head(n*2)
    # for any products whose Title's have the same first 2 words (case-insensitive), remove all records except the first
    recommended = recommended[~recommended['title_text'].str.lower().str.split().str[:2].duplicated(keep='first')]
    # return the top n results
    return recommended.head(n)

def get_most_similar_review(query_embedding, df, n=1):
    df['similarities'] = df['embedding'].apply(lambda x: cosine_similarity(x, query_embedding))
    return df.sort_values('similarities', ascending=False).head(n)

wrapper = textwrap.TextWrapper(width=80, initial_indent='         ', subsequent_indent='         ')
# make a function out of the above result-printing code
def print_recommendations(query, df_to_search, n=5, review=False):
    query_embedding = get_embedding(query, model='text-embedding-ada-002')
    recommendations = recommend_products(query_embedding, df_to_search, n)
    for i, (index, row) in enumerate(recommendations.iterrows(), start=1):
        print(f"RANK #{i}:\n")
        print(f"PRODUCT ID: {index}")
        print(f"PRODUCT SIMILARITY SCORE: {row['similarities']}")
        print(f"PRODUCT TITLE: {row['title_text']}")
        # chunk seller_text across multiple lines for readability
        if len(row['seller_text']) > 100:
            print(f"PRODUCT DESCRIPTION:\n{textwrap.fill(row['seller_text'], 100)}")
        # print(f"   *--DESC: {row['seller_text']}")
        print(f"NUMBER OF REVIEWS:{row['reviewsCount']} reviews")
        print(f"PRODUCT URL:{row['url']}")
        if review:
            print('-----')    
            print('         MOST SIMILAR REVIEW:\n')
            top_review = get_most_similar_review(query_embedding, reviews_df.loc[index], n=1)
            print('         REVIEW TITLE:', top_review.iloc[0]['reviewTitle'])
            print('         REVIEW TEXT:', wrapper.fill(top_review.iloc[0]['reviewDescription']))
            print('         REVIEW RATING:', top_review.iloc[0]['ratingScore'])
            print('         REVIEW DATE:', top_review.iloc[0]['date'])
            print('         REVIEW URL:', top_review.iloc[0]['reviewUrl'])
            print('         REVIEW SIMILARITY:', top_review.iloc[0]['similarities'])
            print('-----')    
        print('------------------------------------------------------------------------------\n\n')

In [None]:
# from openai.embeddings_utils import cosine_similarity

def search_products_and_reviews(product_df, review_df, user_query, n=1, dataset="both"):
    query_embedding = get_embedding(user_query, model='text-embedding-ada-002')

    # Calculate similarity for products
    product_df['similarities'] = product_df['embedding'].apply(lambda x: cosine_similarity(x, query_embedding))
    top_products = product_df.nlargest(n, 'similarities')

    # Calculate similarity for reviews
    review_df['similarities'] = review_df['embedding'].apply(lambda x: cosine_similarity(x, query_embedding))
    top_reviews = review_df.nlargest(n, 'similarities')

    if dataset == "products" or dataset == "both":
        print("\nTop similar products:")
        for index, row in top_products.iterrows():
            print("----------")
            print(f"Product type: {row['category']}")
            print(f"Product ASIN: {index}")
            print(f"Title text: {row['title_text']}")
            print(f"Seller text: {textwrap.fill(row['seller_text'], 100)}")
            print(f"Similarity: {row['similarities']}")
            print(f"Variants: {row['variantAsins']}")

    if dataset == "reviews" or dataset == "both":
        print("\nTop similar reviews:")
        for index, row in top_reviews.iterrows():
            print("----------")
            print(f"Product ASIN: {index}")
            print(f"Review title: {row['reviewTitle']}")
            print(f"Stars: {row['ratingScore']}")
            print(f"All review text: {textwrap.fill(row['reviewDescription'], 100)}")
            # print(f"Num People who found this review helpful: {row['numPeopleFoundHelpful']}")
            print(f"Similarity: {row['similarities']}")
            
    if dataset == "both":
        return top_products, top_reviews
    elif dataset == "products":
        return top_products
    elif dataset == "reviews":
        return top_reviews

# Load and check

In [None]:
test_product_df = pd.read_csv('final_full_product_df.csv')
type(test_product_df.embedding[0])

str

In [None]:
print(len(ast.literal_eval(test_product_df.embedding[0])))

print(len(ast.literal_eval(test_product_df.combined_embedding[0])))

1536
1536


In [None]:
test_reviews_df = pd.read_csv('final_reviews_df.csv')
type(test_reviews_df.embedding[0])

str

In [None]:
print(len(ast.literal_eval(test_reviews_df.embedding[0])))

1536


## Compare loaded vs. existing

In [None]:
print(test_product_df.shape)
print(test_reviews_df.shape)

(908, 25)
(11750, 10)


In [None]:
# from openai.embeddings_utils import cosine_similarity

def search_products_and_reviews(product_df, review_df, user_query, n=1, dataset="both"):
    query_embedding = get_embedding(user_query, model='text-embedding-ada-002')

    # Calculate similarity for products
    product_df['similarities'] = product_df['embedding'].apply(lambda x: cosine_similarity(x, query_embedding))
    top_products = product_df.nlargest(n, 'similarities')

    # Calculate similarity for reviews
    review_df['similarities'] = review_df['embedding'].apply(lambda x: cosine_similarity(x, query_embedding))
    top_reviews = review_df.nlargest(n, 'similarities')

    if dataset == "products" or dataset == "both":
        print("\nTop similar products:")
        for index, row in top_products.iterrows():
            print("----------")
            print(f"Product type: {row['category']}")
            print(f"Product ASIN: {index}")
            print(f"Title text: {row['title_text']}")
            print(f"Seller text: {textwrap.fill(row['seller_text'], 100)}")
            print(f"Similarity: {row['similarities']}")
            print(f"Variants: {row['variantAsins']}")

    if dataset == "reviews" or dataset == "both":
        print("\nTop similar reviews:")
        for index, row in top_reviews.iterrows():
            print("----------")
            print(f"Product ASIN: {index}")
            print(f"Review title: {row['reviewTitle']}")
            print(f"Stars: {row['ratingScore']}")
            print(f"All review text: {textwrap.fill(row['reviewDescription'], 100)}")
            # print(f"Num People who found this review helpful: {row['numPeopleFoundHelpful']}")
            print(f"Similarity: {row['similarities']}")
            
    if dataset == "both":
        return top_products, top_reviews
    elif dataset == "products":
        return top_products
    elif dataset == "reviews":
        return top_reviews

In [None]:
# convert 'embedding' columns from string to list
test_product_df['embedding'] = test_product_df['embedding'].apply(lambda x: ast.literal_eval(x))
print(type(test_product_df['embedding'][0]))
test_product_df['combined_embedding'] = test_product_df['combined_embedding'].apply(lambda x: ast.literal_eval(x))
print(type(test_product_df['combined_embedding'][0]))
test_reviews_df['embedding'] = test_reviews_df['embedding'].apply(lambda x: ast.literal_eval(x))
print(type(test_reviews_df['embedding'][0]))

<class 'list'>
<class 'list'>


In [None]:
user_query = "Laptop that supports 3 external monitors"
top_reviews = search_products_and_reviews(test_product_df, test_reviews_df, user_query, n=1, dataset="reviews")


Top similar reviews:
----------
Product ASIN: 5630
Review title: Great computer for work
Stars: 5
All review text: I run ridiculously large Excel files and macros all day for work and this laptop has just enough
juice to get the job done. Specs are exactly how you'd expect them to be for a laptop dedicated to
working remotely. An issue I ran into when I wanted to add a third monitor to increase my workflow
was that I quickly learned the USB Type-C port does not support adding an additional display, since
there's only one HDMI port that made adding a third monitor pretty difficult, HOWEVER - I found a
great solution after hours of research and hopefully, this saves someone some time.
https://www.amazon.com/dp/B08HN2X88P?psc=1&ref=ppx_yo2_dt_b_product_details - This product works
great it plugs right into the USB Type-2 port and has its own external display drive and I am now
using 2 additional monitors on my laptop while working. Aside from that little hiccup from the
manufacturer I st

ready for upsert