# **Welcome to the notebook**

### Task 1 - Set up project environment

Installing the needed modules

In [1]:
!pip install openai==1.16.2 python-dotenv

Collecting openai==1.16.2
  Downloading openai-1.16.2-py3-none-any.whl (267 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/267.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m266.2/267.1 kB[0m [31m8.0 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m267.1/267.1 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting python-dotenv
  Downloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)
Collecting httpx<1,>=0.23.0 (from openai==1.16.2)
  Downloading httpx-0.27.0-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai==1.16.2)
  Downloading httpcore-1.0.5-py3-none-any.whl (77 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.9/77.9 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m


Importing the needed modules and setup the OpenAI API

In [4]:
import pandas as pd
import numpy as np
import os
from openai import OpenAI
from dotenv import load_dotenv
from matplotlib import pyplot as plt
import plotly.express as px

from sklearn.decomposition import PCA
from sklearn.metrics.pairwise import cosine_similarity

# Loading API key and organization ID from a dotenv file
load_dotenv(dotenv_path='apikey.env.txt')

# Retrieving API key and organization ID from environment variables
APIKEY = os.getenv("APIKEY")

# Creating an instance of the OpenAI client with the provided API key and organization ID
client = OpenAI(
  api_key=APIKEY
)

client

<openai.OpenAI at 0x7b20fb52b880>

Import our dataset

In [6]:
df = pd.read_csv('products_dataset.csv')
df.head()

Unnamed: 0,product_id,title,description
0,P0,Men's 3X Large Carbon Heather Cotton/Polyester...,"This heavyweight, water-repellent hooded sweat..."
1,P1,Turmode 30 ft. RP TNC Female to RP TNC Male Ad...,If you need more length between your existing ...
2,P2,Large Tapestry Bolster Bed,Polyester cover resembling rich Italian tapest...
3,P3,16-Gauge-Sinks Vessel Sink in White with Faucet,It features a rectangle shape. This vessel set...
4,P4,Men's Crazy Horse 9'' Logger Boot - Steel Toe ...,This 9 in. black full grain leather logger boo...


List of last 8 products recently viewed by the user.

In [7]:
searched_products_id = [
    'P1938',
    'P1970',
    'P1044',
    'P1838',
    'P1048',
    'P1017',
    'P1310',
    'P1444',
]

### Task 2 - Prepare the dataset

Let's label the data points that are recently veiwed.

In [15]:
df['product_status'] = 'not_viewed'
df.loc[df['product_id'].isin(searched_products_id), 'product_status'] = 'recently_viewed'
df[df.product_status == 'recently_viewed']

Unnamed: 0,product_id,title,description,product_status
1017,P1017,1 qt. #660D-7 Blackberry Farm Satin Enamel Int...,Love your space like never before with the hig...,recently_viewed
1044,P1044,1 qt. #M360-4 Marjoram One-Coat Hide Eggshell ...,Introducing the best of BEHR Paint. Featuring ...,recently_viewed
1048,P1048,5 gal. #640C-1 Hosta Flower Extra Durable Sati...,BEHR ULTRA SCUFF DEFENSE Stain-Blocking Paint ...,recently_viewed
1310,P1310,5 gal. #180A-2 Romantic Morn Extra Durable Sem...,BEHR ULTRA SCUFF DEFENSE Stain-Blocking Paint ...,recently_viewed
1444,P1444,5 gal. #PPU12-17 Cameroon Green Extra Durable ...,BEHR ULTRA SCUFF DEFENSE Stain-Blocking Paint ...,recently_viewed
1838,P1838,5 gal. #N340-2 Dune Grass Extra Durable Satin ...,BEHR ULTRA SCUFF DEFENSE Stain-Blocking Paint ...,recently_viewed
1938,P1938,1 gal. #HDC-SP16-10 Japanese Rose Garden Semi-...,Introducing the best of BEHR Paint. Featuring ...,recently_viewed
1970,P1970,8 oz. #510C-3 Rivers Edge Semi-Gloss Enamel St...,Introducing the best of BEHR Paint. Featuring ...,recently_viewed


Now let's combine the product `title` and `description` and store it into a column called `combined`.

In [16]:
df['combined'] = df['title'] + ' ' + df['description']
df.head()

Unnamed: 0,product_id,title,description,product_status,combined
0,P0,Men's 3X Large Carbon Heather Cotton/Polyester...,"This heavyweight, water-repellent hooded sweat...",not_viewed,Men's 3X Large Carbon Heather Cotton/Polyester...
1,P1,Turmode 30 ft. RP TNC Female to RP TNC Male Ad...,If you need more length between your existing ...,not_viewed,Turmode 30 ft. RP TNC Female to RP TNC Male Ad...
2,P2,Large Tapestry Bolster Bed,Polyester cover resembling rich Italian tapest...,not_viewed,Large Tapestry Bolster Bed Polyester cover res...
3,P3,16-Gauge-Sinks Vessel Sink in White with Faucet,It features a rectangle shape. This vessel set...,not_viewed,16-Gauge-Sinks Vessel Sink in White with Fauce...
4,P4,Men's Crazy Horse 9'' Logger Boot - Steel Toe ...,This 9 in. black full grain leather logger boo...,not_viewed,Men's Crazy Horse 9'' Logger Boot - Steel Toe ...


### Task 3 - Text embedding and visualization


Creating the text embedding vectors

In [17]:
response = client.embeddings.create(
  model="text-embedding-3-small",
  input=df['combined'].tolist(),
  dimensions=512
)
vectors = [d.embedding for d in response.data]
df['text_embedding'] = vectors

In [18]:
df.head()

Unnamed: 0,product_id,title,description,product_status,combined,text_embedding
0,P0,Men's 3X Large Carbon Heather Cotton/Polyester...,"This heavyweight, water-repellent hooded sweat...",not_viewed,Men's 3X Large Carbon Heather Cotton/Polyester...,"[0.042253270745277405, 0.021179627627134323, -..."
1,P1,Turmode 30 ft. RP TNC Female to RP TNC Male Ad...,If you need more length between your existing ...,not_viewed,Turmode 30 ft. RP TNC Female to RP TNC Male Ad...,"[0.04413316026329994, 0.009090634062886238, 0...."
2,P2,Large Tapestry Bolster Bed,Polyester cover resembling rich Italian tapest...,not_viewed,Large Tapestry Bolster Bed Polyester cover res...,"[0.042361605912446976, -0.06515178084373474, 0..."
3,P3,16-Gauge-Sinks Vessel Sink in White with Faucet,It features a rectangle shape. This vessel set...,not_viewed,16-Gauge-Sinks Vessel Sink in White with Fauce...,"[-0.049733716994524, -0.011679209768772125, 0...."
4,P4,Men's Crazy Horse 9'' Logger Boot - Steel Toe ...,This 9 in. black full grain leather logger boo...,not_viewed,Men's Crazy Horse 9'' Logger Boot - Steel Toe ...,"[0.026085881516337395, 0.048493191599845886, -..."


> We know that each vector has 512 dimensions. In order to be able to visualize the vectors in a scatter plot, we need to use Principal Component Analysis (PCA) to reduce the dimension from 512 to 2.

In [19]:
pca = PCA(n_components=2)
pca_result = pca.fit_transform(df['text_embedding'].tolist())
pca_result

array([[ 0.00050581, -0.06676073],
       [-0.36365291, -0.2354297 ],
       [-0.20928374,  0.21440343],
       ...,
       [-0.08170237, -0.10563211],
       [ 0.50257859, -0.00398109],
       [-0.27077434,  0.42985076]])

In [20]:
df['pca1'] = pca_result[:, 0]
df['pca2'] = pca_result[:, 1]
df.head()

Unnamed: 0,product_id,title,description,product_status,combined,text_embedding,pca1,pca2
0,P0,Men's 3X Large Carbon Heather Cotton/Polyester...,"This heavyweight, water-repellent hooded sweat...",not_viewed,Men's 3X Large Carbon Heather Cotton/Polyester...,"[0.042253270745277405, 0.021179627627134323, -...",0.000506,-0.066761
1,P1,Turmode 30 ft. RP TNC Female to RP TNC Male Ad...,If you need more length between your existing ...,not_viewed,Turmode 30 ft. RP TNC Female to RP TNC Male Ad...,"[0.04413316026329994, 0.009090634062886238, 0....",-0.363653,-0.23543
2,P2,Large Tapestry Bolster Bed,Polyester cover resembling rich Italian tapest...,not_viewed,Large Tapestry Bolster Bed Polyester cover res...,"[0.042361605912446976, -0.06515178084373474, 0...",-0.209284,0.214403
3,P3,16-Gauge-Sinks Vessel Sink in White with Faucet,It features a rectangle shape. This vessel set...,not_viewed,16-Gauge-Sinks Vessel Sink in White with Fauce...,"[-0.049733716994524, -0.011679209768772125, 0....",-0.179817,-0.039083
4,P4,Men's Crazy Horse 9'' Logger Boot - Steel Toe ...,This 9 in. black full grain leather logger boo...,not_viewed,Men's Crazy Horse 9'' Logger Boot - Steel Toe ...,"[0.026085881516337395, 0.048493191599845886, -...",-0.212958,-0.143704


Now that we have the text embedding vectors in two dimensions, we can use them to create a 2D plot.

In [21]:
px.scatter(df, x='pca1', y='pca2', color='product_status')

### Task 4 - Find similar products

In [22]:
df.head()

Unnamed: 0,product_id,title,description,product_status,combined,text_embedding,pca1,pca2
0,P0,Men's 3X Large Carbon Heather Cotton/Polyester...,"This heavyweight, water-repellent hooded sweat...",not_viewed,Men's 3X Large Carbon Heather Cotton/Polyester...,"[0.042253270745277405, 0.021179627627134323, -...",0.000506,-0.066761
1,P1,Turmode 30 ft. RP TNC Female to RP TNC Male Ad...,If you need more length between your existing ...,not_viewed,Turmode 30 ft. RP TNC Female to RP TNC Male Ad...,"[0.04413316026329994, 0.009090634062886238, 0....",-0.363653,-0.23543
2,P2,Large Tapestry Bolster Bed,Polyester cover resembling rich Italian tapest...,not_viewed,Large Tapestry Bolster Bed Polyester cover res...,"[0.042361605912446976, -0.06515178084373474, 0...",-0.209284,0.214403
3,P3,16-Gauge-Sinks Vessel Sink in White with Faucet,It features a rectangle shape. This vessel set...,not_viewed,16-Gauge-Sinks Vessel Sink in White with Fauce...,"[-0.049733716994524, -0.011679209768772125, 0....",-0.179817,-0.039083
4,P4,Men's Crazy Horse 9'' Logger Boot - Steel Toe ...,This 9 in. black full grain leather logger boo...,not_viewed,Men's Crazy Horse 9'' Logger Boot - Steel Toe ...,"[0.026085881516337395, 0.048493191599845886, -...",-0.212958,-0.143704


Get the data related to `recently_viewed` and `not_viewed` products

In [23]:
df_recently_viewed = df[df.product_status == 'recently_viewed']
df_not_viewed = df[df.product_status == 'not_viewed']

Convert the embedding vectors to Numpy arrays

In [24]:
df_recently_viewed

Unnamed: 0,product_id,title,description,product_status,combined,text_embedding,pca1,pca2
1017,P1017,1 qt. #660D-7 Blackberry Farm Satin Enamel Int...,Love your space like never before with the hig...,recently_viewed,1 qt. #660D-7 Blackberry Farm Satin Enamel Int...,"[0.05918155610561371, -0.02796226739883423, 0....",0.470231,0.057183
1044,P1044,1 qt. #M360-4 Marjoram One-Coat Hide Eggshell ...,Introducing the best of BEHR Paint. Featuring ...,recently_viewed,1 qt. #M360-4 Marjoram One-Coat Hide Eggshell ...,"[0.02989116683602333, -0.02771798148751259, 0....",0.470721,0.046263
1048,P1048,5 gal. #640C-1 Hosta Flower Extra Durable Sati...,BEHR ULTRA SCUFF DEFENSE Stain-Blocking Paint ...,recently_viewed,5 gal. #640C-1 Hosta Flower Extra Durable Sati...,"[0.0008034154889173806, -0.027133531868457794,...",0.457055,0.029323
1310,P1310,5 gal. #180A-2 Romantic Morn Extra Durable Sem...,BEHR ULTRA SCUFF DEFENSE Stain-Blocking Paint ...,recently_viewed,5 gal. #180A-2 Romantic Morn Extra Durable Sem...,"[0.002219964750111103, -0.00657818466424942, 0...",0.466635,0.048867
1444,P1444,5 gal. #PPU12-17 Cameroon Green Extra Durable ...,BEHR ULTRA SCUFF DEFENSE Stain-Blocking Paint ...,recently_viewed,5 gal. #PPU12-17 Cameroon Green Extra Durable ...,"[0.05091223120689392, -0.016536731272935867, 0...",0.464626,0.051656
1838,P1838,5 gal. #N340-2 Dune Grass Extra Durable Satin ...,BEHR ULTRA SCUFF DEFENSE Stain-Blocking Paint ...,recently_viewed,5 gal. #N340-2 Dune Grass Extra Durable Satin ...,"[0.00441074650734663, -0.014054427854716778, 0...",0.459179,0.051007
1938,P1938,1 gal. #HDC-SP16-10 Japanese Rose Garden Semi-...,Introducing the best of BEHR Paint. Featuring ...,recently_viewed,1 gal. #HDC-SP16-10 Japanese Rose Garden Semi-...,"[0.006750619970262051, -0.060344669967889786, ...",0.469535,0.05044
1970,P1970,8 oz. #510C-3 Rivers Edge Semi-Gloss Enamel St...,Introducing the best of BEHR Paint. Featuring ...,recently_viewed,8 oz. #510C-3 Rivers Edge Semi-Gloss Enamel St...,"[0.03179488703608513, -0.06424853950738907, 0....",0.471998,0.045673


In [25]:
df_not_viewed

Unnamed: 0,product_id,title,description,product_status,combined,text_embedding,pca1,pca2
0,P0,Men's 3X Large Carbon Heather Cotton/Polyester...,"This heavyweight, water-repellent hooded sweat...",not_viewed,Men's 3X Large Carbon Heather Cotton/Polyester...,"[0.042253270745277405, 0.021179627627134323, -...",0.000506,-0.066761
1,P1,Turmode 30 ft. RP TNC Female to RP TNC Male Ad...,If you need more length between your existing ...,not_viewed,Turmode 30 ft. RP TNC Female to RP TNC Male Ad...,"[0.04413316026329994, 0.009090634062886238, 0....",-0.363653,-0.235430
2,P2,Large Tapestry Bolster Bed,Polyester cover resembling rich Italian tapest...,not_viewed,Large Tapestry Bolster Bed Polyester cover res...,"[0.042361605912446976, -0.06515178084373474, 0...",-0.209284,0.214403
3,P3,16-Gauge-Sinks Vessel Sink in White with Faucet,It features a rectangle shape. This vessel set...,not_viewed,16-Gauge-Sinks Vessel Sink in White with Fauce...,"[-0.049733716994524, -0.011679209768772125, 0....",-0.179817,-0.039083
4,P4,Men's Crazy Horse 9'' Logger Boot - Steel Toe ...,This 9 in. black full grain leather logger boo...,not_viewed,Men's Crazy Horse 9'' Logger Boot - Steel Toe ...,"[0.026085881516337395, 0.048493191599845886, -...",-0.212958,-0.143704
...,...,...,...,...,...,...,...,...
1995,P1995,Dotty Black and White Black and White Wallpape...,"With a stylish monochrome look, this dotty wal...",not_viewed,Dotty Black and White Black and White Wallpape...,"[0.09004916995763779, -0.055176541209220886, -...",-0.038058,0.195890
1996,P1996,Abrielle Brown/Light Gray 8 ft. x 10 ft. Orien...,The Abrielle collection features a stunning as...,not_viewed,Abrielle Brown/Light Gray 8 ft. x 10 ft. Orien...,"[0.013627329841256142, -0.04656423255801201, 0...",-0.245515,0.483503
1997,P1997,20 in. x 2-1/2 in. x 2-1/2 in. Polyurethane As...,"With Fypon balustrade systems, you can transfo...",not_viewed,20 in. x 2-1/2 in. x 2-1/2 in. Polyurethane As...,"[-0.029632434248924255, -0.0093860924243927, 0...",-0.081702,-0.105632
1998,P1998,1 gal. #P120-6 Diva Glam Flat Exterior Paint &...,BEHR PREMIUM PLUS Exterior Paint & Primer is a...,not_viewed,1 gal. #P120-6 Diva Glam Flat Exterior Paint &...,"[-0.012979477643966675, -0.00922132097184658, ...",0.502579,-0.003981


In [26]:
vectors_recently_viewed = np.array(df_recently_viewed['text_embedding'].tolist())
vectors_not_viewed = np.array(df_not_viewed['text_embedding'].tolist())

In [28]:
vectors_not_viewed

array([[ 0.04225327,  0.02117963, -0.01352211, ...,  0.0242709 ,
         0.00989207,  0.04151136],
       [ 0.04413316,  0.00909063,  0.02689831, ...,  0.04190824,
        -0.01510124, -0.00232662],
       [ 0.04236161, -0.06515178,  0.00300138, ...,  0.1061075 ,
        -0.01914597,  0.07214422],
       ...,
       [-0.02963243, -0.00938609,  0.04948691, ..., -0.00773   ,
        -0.03022956,  0.02616163],
       [-0.01297948, -0.00922132,  0.04216991, ...,  0.0037894 ,
        -0.06205868,  0.03520706],
       [-0.00715765, -0.01552045,  0.07947019, ...,  0.0478956 ,
        -0.07058661,  0.03291747]])

In [27]:
vectors_recently_viewed

array([[ 0.05918156, -0.02796227,  0.05370384, ...,  0.03199653,
        -0.0369931 ,  0.02376145],
       [ 0.02989117, -0.02771798,  0.05270961, ...,  0.01695084,
        -0.08368737,  0.05685841],
       [ 0.00080342, -0.02713353,  0.04528342, ..., -0.00164107,
        -0.04122982,  0.04933701],
       ...,
       [ 0.00441075, -0.01405443,  0.05610869, ..., -0.00937568,
        -0.03830218,  0.05352857],
       [ 0.00675062, -0.06034467,  0.04007325, ..., -0.02216943,
        -0.0787768 ,  0.08100744],
       [ 0.03179489, -0.06424854,  0.01230332, ..., -0.00732387,
        -0.02857859,  0.0742462 ]])

Find the similarity between each viewed product and all the unviewed products.

In [29]:
similarity_matrix = cosine_similarity(vectors_recently_viewed, vectors_not_viewed)

In [37]:
similarity_matrix.shape

(8, 1992)

In [39]:
top_ids = []
for row in similarity_matrix:
  top_id = np.argmax(row)
  top_ids.append(top_id)

top_ids

[854, 1058, 1700, 733, 1323, 1700, 1496, 314]

### Task 5 - Recommend products based on the searched products

Let's update the status of the top similar products to `recommended`.

In [41]:
most_similar_products = list(df_not_viewed.iloc[top_ids].product_id)

In [42]:
most_similar_products

['P854', 'P1061', 'P1705', 'P733', 'P1327', 'P1705', 'P1501', 'P314']

Let's visualize the recommended products.

In [43]:
df.loc[df['product_id'].isin(most_similar_products), 'product_status'] = 'recommended'
df[df.product_status == 'recommended']

Unnamed: 0,product_id,title,description,product_status,combined,text_embedding,pca1,pca2
314,P314,8 oz. #230F-7 Florence Brown Semi-Gloss Enamel...,Introducing the best of BEHR Paint. Featuring ...,recommended,8 oz. #230F-7 Florence Brown Semi-Gloss Enamel...,"[-0.003989859018474817, -0.060487765818834305,...",0.493713,0.052178
733,P733,5 gal. #N440-1 Streetwise Extra Durable Semi-G...,BEHR ULTRA SCUFF DEFENSE Stain-Blocking Paint ...,recommended,5 gal. #N440-1 Streetwise Extra Durable Semi-G...,"[0.009646818973124027, -0.017993971705436707, ...",0.459838,0.004507
854,P854,1 qt. #N460-1 Evening White Satin Enamel Inter...,Love your space like never before with the hig...,recommended,1 qt. #N460-1 Evening White Satin Enamel Inter...,"[0.04255978390574455, -0.019562887027859688, 0...",0.491745,0.053064
1061,P1061,1 gal. #MQ1-28 Orange Flambe One-Coat Hide Egg...,Introducing the best of BEHR Paint. Featuring ...,recommended,1 gal. #MQ1-28 Orange Flambe One-Coat Hide Egg...,"[0.01663183607161045, -0.026091130450367928, 0...",0.496062,0.063849
1327,P1327,5 gal. #MQ4-44 Green Dynasty Extra Durable Egg...,BEHR ULTRA SCUFF DEFENSE Stain-Blocking Paint ...,recommended,5 gal. #MQ4-44 Green Dynasty Extra Durable Egg...,"[0.0505719892680645, -0.01987646520137787, 0.0...",0.468602,0.063281
1501,P1501,1 gal. #S-H-620 Midnight Sky Semi-Gloss Enamel...,Introducing the best of BEHR Paint. Featuring ...,recommended,1 gal. #S-H-620 Midnight Sky Semi-Gloss Enamel...,"[0.020269175991415977, -0.041455335915088654, ...",0.492106,0.039515
1705,P1705,5 gal. #310D-4 Gold Buff Extra Durable Satin E...,BEHR ULTRA SCUFF DEFENSE Stain-Blocking Paint ...,recommended,5 gal. #310D-4 Gold Buff Extra Durable Satin E...,"[-0.002052590949460864, -0.012245520949363708,...",0.460867,0.032166


In [44]:
px.scatter(df, x='pca1', y='pca2', color='product_status', hover_data=['product_id', 'title'])