In [1]:
import pandas as pd

gift_dataset_final = pd.read_csv('data_gifts.csv', sep=';', encoding="latin1")

In [33]:
gift_dataset_final.sample(10)

Unnamed: 0,name_of_the_product,main_category,sub_category,ratings,no_of_ratings,discounted_price,actual_price
217,Realme GT,Phones,Smartphones,4.7,14400,449.99,649.99
273,Black+Decker 12-Cup Programmable Coffee Maker,Home Appliances,Kitchen Appliances,4.7,14600,49.99,69.99
94,Fitbit Versa 3,Wearable Technology,Smartwatches,4.6,1800,229.99,279.99
137,Asics Gel-Nimbus 23,Shoes,Athletic Shoes,4.7,2650,139.99,169.99
103,Bose QuietComfort 35 II,Audio,Headphones,4.6,2500,299.99,349.99
281,Apple Watch Series 6,Wearable Technology,Smartwatches,4.8,14000,399.99,499.99
480,Personalized Wedding Album: Leather Bound,Wedding Gifts,Personalized Gifts,4.7,14300,179.99,229.99
349,Lego Architecture Trafalgar Square,Building Sets,Architecture,4.7,12600,79.99,99.99
323,Nike Air Max 270,Athletic Shoes,Running Shoes,4.8,13000,129.99,159.99
292,Bose QuietComfort 45,Audio,Headphones,4.8,14000,279.99,329.99


In [26]:
sub_category = gift_dataset_final["sub_category"].unique()

In [4]:
pip install transformers

Note: you may need to restart the kernel to use updated packages.


In [5]:
from transformers import BertTokenizer, BertModel
import torch

In [27]:
# Charger le modèle et le tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

# Fonction pour obtenir les embeddings
def obtenir_embeddings(texte):
    inputs = tokenizer(texte, return_tensors='pt', padding=True, truncation=True)
    outputs = model(**inputs)
    embeddings = outputs.last_hidden_state.mean(dim=1)
    return embeddings

# Calculer les embeddings pour chaque idée de cadeau
embeddings_cadeaux = [obtenir_embeddings(idea) for idea in sub_category]

# Convertir les embeddings en liste de tensors
embeddings_cadeaux = torch.stack(embeddings_cadeaux).squeeze()

# Afficher les embeddings (seulement les 5 premiers éléments pour chaque embedding)
for i, embedding in enumerate(embeddings_cadeaux):
    print(f"Embedding pour l'idée de cadeau {i+1}: {embedding[:5]}")

Embedding pour l'idée de cadeau 1: tensor([ 0.2902,  0.2751, -0.4873, -0.3871,  0.1275], grad_fn=<SliceBackward0>)
Embedding pour l'idée de cadeau 2: tensor([ 0.3013,  0.0496, -0.2024,  0.1459,  0.2160], grad_fn=<SliceBackward0>)
Embedding pour l'idée de cadeau 3: tensor([ 0.1041, -0.3579, -0.1523,  0.1184,  0.1730], grad_fn=<SliceBackward0>)
Embedding pour l'idée de cadeau 4: tensor([ 0.2074, -0.3238, -0.1434,  0.1510, -0.1093], grad_fn=<SliceBackward0>)
Embedding pour l'idée de cadeau 5: tensor([ 0.2480,  0.1472, -0.0465,  0.0514,  0.0927], grad_fn=<SliceBackward0>)
Embedding pour l'idée de cadeau 6: tensor([ 0.2346,  0.5764, -0.3144,  0.0999,  0.3174], grad_fn=<SliceBackward0>)
Embedding pour l'idée de cadeau 7: tensor([ 0.3376, -0.4088, -0.0149,  0.0122,  0.3175], grad_fn=<SliceBackward0>)
Embedding pour l'idée de cadeau 8: tensor([0.5394, 0.0499, 0.0326, 0.1633, 0.5266], grad_fn=<SliceBackward0>)
Embedding pour l'idée de cadeau 9: tensor([ 0.5355, -0.2999,  0.1422,  0.2664,  0.555

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

# Fonction pour trouver les trois éléments les plus proches
def trouver_elements_proches(element_entree):
    # Calculer l'embedding pour l'élément d'entrée
    embedding_entree = obtenir_embeddings(element_entree)

    # Calculer la similarité cosinus entre l'embedding de l'élément d'entrée et les embeddings de la liste
    similarities = cosine_similarity(embedding_entree.detach().numpy(), embeddings_cadeaux.detach().numpy())

    # Trouver les indices des trois éléments les plus similaires
    top_indices = similarities.argsort()[0][-3:]

    # Renvoyer les trois éléments les plus similaires
    top_cadeaux = [sub_category[i] for i in top_indices]
    return top_cadeaux

# Exemple d'utilisation
element_entree = ["Smartphones"]
elements_proches = trouver_elements_proches(element_entree)
print(f"Les trois éléments les plus proches de '{element_entree}' sont : {elements_proches}")

Les trois éléments les plus proches de '['Smartphones']' sont : ['Smartwatches', 'Android Tablets', 'Smartphones']


In [36]:
budget = 0

label = ["Classic Litterature"]

def pre_filtering(label, customer_budget):
    customer_needs = trouver_elements_proches(label)
    selection = gift_dataset_final[gift_dataset_final["sub_category"].isin(customer_needs)]
    selection_price = selection[selection["discounted_price"] < customer_budget]

    return selection_price


customer_budget = 700

test = pre_filtering(label, customer_budget)
test

Unnamed: 0,name_of_the_product,main_category,sub_category,ratings,no_of_ratings,discounted_price,actual_price
0,The Great Gatsby,Books,Classic Literature,4.5,1200,8.99,12.99
1,To Kill a Mockingbird,Books,Classic Literature,4.8,1500,6.99,9.99
2,1984,Books,Classic Literature,4.6,1450,7.99,10.99
3,Pride and Prejudice,Books,Classic Literature,4.7,1300,5.99,8.99
4,Moby Dick,Books,Classic Literature,4.4,1100,6.99,9.99
5,Jane Eyre,Books,Classic Literature,4.6,1250,7.99,10.99
6,Wuthering Heights,Books,Classic Literature,4.5,1200,6.99,9.99
7,The Catcher in the Rye,Books,Classic Literature,4.3,1000,7.99,10.99
8,The Odyssey,Books,Classic Literature,4.7,1350,8.99,12.99
9,The Scarlet Letter,Books,Classic Literature,4.6,1200,7.99,10.99
