### Working on product descriptions

This notebook serves the purpose of retrieving meaningful descriptions of the products from the OpenAI API, by providing their names obtained from the purchase receipts.

In [1]:
# Imports

from gensim.models import Word2Vec
from IPython.display import clear_output
from langdetect import detect, LangDetectException
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from numpy.linalg import norm
from openai import OpenAI
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
import os
import pandas as pd
import re
import string
import time
import uuid
import yaml

In [2]:
# OpenAI API

openai_key = os.environ.get('OPENAI_API_KEY')
openai_organization = os.environ.get('OPENAI_API_KEY')
client = OpenAI(api_key=openai_key)

In [3]:
# Load the config file

ROOT_DIR = os.path.dirname(os.getcwd())
CONFIG_PATH = os.path.join(ROOT_DIR, "config", "description_embedding_config.yaml")

with open(CONFIG_PATH, 'r') as file:
    config = yaml.safe_load(file)

In [4]:
# Configuration

DATA_FILE = os.path.join(ROOT_DIR, config['purchase_receipts'])
PRODUCTS_FILE = os.path.join(ROOT_DIR, config['products_file'])
GLOVE_DATA_FILE = os.path.join(ROOT_DIR, config['glove_data_file'])
WORD2VEC_MODEL_FILE = os.path.join(ROOT_DIR, config['word2vec_model'])
OPENAI_MODEL = config['openai_model']
BATCH_SIZE= config['batch_size']
WORDS_PER_ITEM = config['words_per_item']
MAX_RETRIES = config['max_retries']
RETRY_DELAY = config['retry_delay']
DESCRIPTION_FIELD = config['product_description_field']
EMBEDDING_SIZE = config['embedding_size']

In [5]:
def make_df(source_file, target_file, attribute) -> pd.DataFrame:
    """
    Creates a DataFrame from a source file, processes it to remove unidentified items, 
    and saves the resulting DataFrame to a target file. Each item in the DataFrame 
    is assigned a unique ID.

    Parameters:
    source_file (str): Path to the source file containing the data.
    target_file (str): Path where the processed DataFrame should be saved.
    attribute (str): Attribute name in the source DataFrame to be processed.

    Returns:
    DataFrame: A DataFrame with unique IDs, names of the items, 
               and placeholders for descriptions and tokens.

    Throws:
    FileNotFoundError: If the source_file does not exist.
    """
    
    os.path.isfile(source_file)
    df = pd.read_pickle(source_file)
    all_items: pd.DataFrame = df[attribute].drop_duplicates()

    # Finds the undenfitied items
    pattern = re.compile(r'^[^a-zA-Z]*$')
    undentified_items = [item for item in all_items if pattern.match(item)]

    # Remove the undentified items
    items = [item for item in all_items if item not in undentified_items]
    data = [{'id': uuid.uuid4().hex, 'name': item, 'description': '', 'tokens': ''} for item in items]
    items_df = pd.DataFrame(data)
    pd.to_pickle(items_df, target_file)
    return items_df

def answer_to_dict(answers: list) -> dict:
    """
    Converts a text string containing item descriptions in a specific format into a dictionary.
    The format is expected to be '<id>:<description>'.

    Parameters:
    text (str): String containing item descriptions.

    Returns:
    dict: A dictionary where each key is an item ID and each value is the corresponding description.
    """

    pattern = re.compile(r'([a-f0-9]+):(.*?)(?=\n|$)')
    matches: list = pattern.findall(answers)

    dict = {id.strip(): description.strip() for id, description in matches}
    return dict

def update_products_with_descriptions(products: pd.DataFrame, updates_dict: dict):
    """
    Updates a DataFrame of products with descriptions provided in a dictionary format.

    Parameters:
    products (DataFrame): The DataFrame of products to be updated.
    dict (dict): Dictionary containing item IDs as keys and descriptions as values.

    Returns:
    DataFrame: Updated DataFrame with descriptions added to the corresponding products.
    """

    for key, value in updates_dict.items():
        products.loc[products['id'] == key, 'description'] = value
    return products

def fetch_descriptions(client: OpenAI, model: str, items: pd.DataFrame, max_words=15, temperature=0.2) -> list:
    """
    Fetches descriptions for a list of products using a language model. Generates prompts 
    for each product and retrieves responses from the model.

    Parameters:
    client: Client interface to communicate with the model.
    model: The language model to be used.
    items (DataFrame): DataFrame containing product IDs and names.
    max_words (int, optional): Maximum number of words for each description.
    temperature (float, optional): Controls the randomness of the model's responses.

    Returns:
    list: A list containing the model's responses in the specified format.

    Throws:
    Exception: If there is an error in making the API request.
    """
 
    context_message = {
        "role": "system",
        "content": f"Given a list products with format of 'id: name', for each id write a description in english language of {max_words} words length. Focus of its specific benefits and unique features. Reply when you have all answers. Format your response as list of 'id: description'."
        }

    prompts = [{"role": "user", "content": f"{row['id']}: {row['name']}"} for index, row in items.iterrows()]
    prompts.insert(0, context_message)
        
    try:
        response = client.chat.completions.create(
            model=model,
            messages=prompts,
            temperature=temperature,
        )

        print("Responses:\n"
              f" - Completion tokens:\t{response.usage.completion_tokens}\n"
              f" - Prompt tokens:\t{response.usage.prompt_tokens}\n"
              f" - Total tokens:\t{response.usage.total_tokens}")
        
        if response and hasattr(response, 'choices'):
            return response.choices[0].message.content.strip()
        else:
            print("Invalid API response")
    except Exception as e:
        print(f"API request error:\n{e}")

    return []

def batch_fetch(client: OpenAI, model: str, df: pd.DataFrame, batch_size=10, max_response_words=15, max_retries=3, retry_delay=5) -> pd.DataFrame:
    """
    Processes items in batches to fetch descriptions. Handles retries and updates the items 
    with the fetched descriptions.

    Parameters:
    client: Client interface to communicate with the model.
    model: The language model to be used.
    items_file (str): Path to the file containing items data.
    batch_size (int, optional): Number of items to process in each batch.
    max_response_words (int, optional): Maximum number of words for each response.
    max_retries (int, optional): Maximum number of retry attempts for fetching descriptions.
    retry_delay (int, optional): Delay between retries in seconds.

    Throws:
    Exception: If there is an error in batch processing or fetching descriptions.
    """
    
    # items = pd.read_pickle(items_file) if os.path.isfile(items_file) else make_df(DATA_FILE, items_file, DESCRIPTION_FIELD)
    items = df.copy()
    descriptionless_items = items[(items['description'].str.len() < 80)]
    while (len(descriptionless_items) > 0):
        attempts = 0
        try:
            descriptionless_items.sample(frac=1)
            batch = descriptionless_items[:batch_size]
            print (f"\n{len(batch)} items evaluated. {len(descriptionless_items)} items left to describe.")
            
            answers = fetch_descriptions(client, model, batch, max_response_words)
            print(f"Answers:\n{answers}")
            description_dict = answer_to_dict(answers)
            items = update_products_with_descriptions(items, description_dict)
            
            # pd.to_pickle(df, items_file)
            clear_output(wait=True)
            time.sleep(retry_delay)
            descriptionless_items = items[(items['description'].str.len() < 80)]

        except Exception as e:
            print(f"Get batch descriptions exception:\n{e}")
            attempts += 1
            time.sleep(retry_delay)

        if attempts > max_retries:
            print("Max attempts reached. Aborting.")
            break
    
    print("Done.")
    return items

In [6]:
products = pd.read_pickle(PRODUCTS_FILE) if os.path.isfile(PRODUCTS_FILE) else make_df(DATA_FILE, PRODUCTS_FILE, DESCRIPTION_FIELD)
products.head()

Unnamed: 0,id,name,description,tokens,processed_description,weighted_vector,language
0,638fb851469e4fd39fecbde45b376217,BLANX WHITE SHOCK 50ML+LED,Achieve a bright white smile with Blanx White ...,achieve bright white smile blanx white shock t...,"[achieve, bright, white, smile, blanx, white, ...","[-0.0001711061833507951, 0.0002127292312447285...",en
1,320b67831f7f439e9e541d26cefa26f9,"EUMILL GOCCE OCULARI 10FL0,5ML",Relieve dry and irritated eyes with Eumill eye...,relieve dry irritated eyes eumill eye drops pr...,"[relieve, dry, irritated, eyes, eumill, eye, d...","[-0.0088921532329825, -0.014865235213382752, 0...",en
2,199578e19fe4415d9ce8861c13050616,PRIMUM DRENANTE MELA 250ML,Boost your metabolism and detoxify your body w...,boost metabolism detoxify body primum drenante...,"[boost, metabolism, detoxify, body, primum, dr...","[0.0030121083031417585, -0.0019495495646424388...",en
3,2162cbb2605f4be98e0b65aba31fe323,SUSTENIUM PLUS INT FORM 22BUST,Enhance your physical and mental performance w...,enhance physical mental performance sustenium ...,"[enhance, physical, mental, performance, suste...","[0.002032875472574839, 0.0006420587249067466, ...",en
4,e553bd5f5d914050a52185a670336f9d,EUFORTYN LIOS OROSOLUB 10BUST,Improve your immune system and protect against...,improve immune system protect oxidative stress...,"[improve, immune, system, protect, oxidative, ...","[0.001268330909087209, -0.005052558460694068, ...",en


In [7]:
def remove_product_from_description(row) -> str:
    product = row['name']
    description = row['description']
    
    if pd.isna(description):
        return description
    
    string_to_remove = f"{product} - "
    
    modified_description = description.replace(string_to_remove, '').strip()
    return modified_description

products['description'] = products.apply(remove_product_from_description, axis=1)

In [8]:
products.head()

Unnamed: 0,id,name,description,tokens,processed_description,weighted_vector,language
0,638fb851469e4fd39fecbde45b376217,BLANX WHITE SHOCK 50ML+LED,Achieve a bright white smile with Blanx White ...,achieve bright white smile blanx white shock t...,"[achieve, bright, white, smile, blanx, white, ...","[-0.0001711061833507951, 0.0002127292312447285...",en
1,320b67831f7f439e9e541d26cefa26f9,"EUMILL GOCCE OCULARI 10FL0,5ML",Relieve dry and irritated eyes with Eumill eye...,relieve dry irritated eyes eumill eye drops pr...,"[relieve, dry, irritated, eyes, eumill, eye, d...","[-0.0088921532329825, -0.014865235213382752, 0...",en
2,199578e19fe4415d9ce8861c13050616,PRIMUM DRENANTE MELA 250ML,Boost your metabolism and detoxify your body w...,boost metabolism detoxify body primum drenante...,"[boost, metabolism, detoxify, body, primum, dr...","[0.0030121083031417585, -0.0019495495646424388...",en
3,2162cbb2605f4be98e0b65aba31fe323,SUSTENIUM PLUS INT FORM 22BUST,Enhance your physical and mental performance w...,enhance physical mental performance sustenium ...,"[enhance, physical, mental, performance, suste...","[0.002032875472574839, 0.0006420587249067466, ...",en
4,e553bd5f5d914050a52185a670336f9d,EUFORTYN LIOS OROSOLUB 10BUST,Improve your immune system and protect against...,improve immune system protect oxidative stress...,"[improve, immune, system, protect, oxidative, ...","[0.001268330909087209, -0.005052558460694068, ...",en


In [9]:
# Check for items with short descriptions

descriptionless_items = products[(products['description'].str.len() < 80)]

if(len(descriptionless_items) == 0):
    print("All items have descriptions.")
else:
    print(f"N. {len(descriptionless_items)} still don't have descriptions.")
    products = batch_fetch(client, model=OPENAI_MODEL, df=products, batch_size=BATCH_SIZE, max_response_words=WORDS_PER_ITEM, max_retries=MAX_RETRIES, retry_delay=RETRY_DELAY)
    pd.to_pickle(products, PRODUCTS_FILE)

All items have descriptions.


In [10]:
products = pd.read_pickle(PRODUCTS_FILE)

In [11]:
def preprocess_text(text: str):
    """
    Processes a given text by converting it to lowercase, removing punctuation,
    and filtering out stopwords.

    Parameters:
    text (str): The text string to be preprocessed.

    Returns:
    list: A list of words (tokens) after preprocessing the text.
    """
    
    text = text.lower()
    text = text.translate(str.maketrans('', '', string.punctuation))
    words = word_tokenize(text)
    words = [word for word in words if word not in stopwords.words('english')]
    return words

In [12]:
products['processed_description'] = products['description'].apply(preprocess_text)

In [13]:
# Word2Vec training

corpus_w2v = products['processed_description'].tolist()

model_w2v = Word2Vec(sentences=corpus_w2v, vector_size=EMBEDDING_SIZE, window=5, min_count=2, workers=4)
model_w2v.save(WORD2VEC_MODEL_FILE)

In [14]:
# TF-IDF

# TF-IDF corpus
corpus_tfidf = [" ".join(doc) for doc in corpus_w2v]

# TF-IDF training
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(corpus_tfidf)
feature_names = vectorizer.get_feature_names_out()

In [15]:
products['tokens'] = corpus_tfidf

In [16]:
products.head()

Unnamed: 0,id,name,description,tokens,processed_description,weighted_vector,language
0,638fb851469e4fd39fecbde45b376217,BLANX WHITE SHOCK 50ML+LED,Achieve a bright white smile with Blanx White ...,achieve bright white smile blanx white shock t...,"[achieve, bright, white, smile, blanx, white, ...","[-0.0001711061833507951, 0.0002127292312447285...",en
1,320b67831f7f439e9e541d26cefa26f9,"EUMILL GOCCE OCULARI 10FL0,5ML",Relieve dry and irritated eyes with Eumill eye...,relieve dry irritated eyes eumill eye drops pr...,"[relieve, dry, irritated, eyes, eumill, eye, d...","[-0.0088921532329825, -0.014865235213382752, 0...",en
2,199578e19fe4415d9ce8861c13050616,PRIMUM DRENANTE MELA 250ML,Boost your metabolism and detoxify your body w...,boost metabolism detoxify body primum drenante...,"[boost, metabolism, detoxify, body, primum, dr...","[0.0030121083031417585, -0.0019495495646424388...",en
3,2162cbb2605f4be98e0b65aba31fe323,SUSTENIUM PLUS INT FORM 22BUST,Enhance your physical and mental performance w...,enhance physical mental performance sustenium ...,"[enhance, physical, mental, performance, suste...","[0.002032875472574839, 0.0006420587249067466, ...",en
4,e553bd5f5d914050a52185a670336f9d,EUFORTYN LIOS OROSOLUB 10BUST,Improve your immune system and protect against...,improve immune system protect oxidative stress...,"[improve, immune, system, protect, oxidative, ...","[0.001268330909087209, -0.005052558460694068, ...",en


In [17]:
# Combine word vectors with TF-IDF for weighted document vectors

def weighted_word_vector(word: str, model_w2v: Word2Vec, word_tfidf: dict):
    """
    Computes the weighted vector for a given word using Word2Vec and TF-IDF weights.

    Parameters:
    word (str): The word for which the vector is to be computed.
    model_w2v (Word2Vec model): Pre-trained Word2Vec model.
    word_tfidf (dict): Dictionary of TF-IDF weights with words as keys.

    Returns:
    numpy.ndarray: The weighted vector for the given word. Returns a zero vector 
                   if the word is not in the Word2Vec model or the TF-IDF dictionary.
    """
    
    if word in model_w2v.wv and word in word_tfidf:
        return model_w2v.wv[word] * word_tfidf[word]
    else:
        return np.zeros(model_w2v.vector_size)

def weighted_document_vector(doc: list, model_w2v: Word2Vec, tfidf_vector: dict):
    """
    Computes the weighted document vector by aggregating the weighted word vectors
    of all the words in the document.

    Parameters:
    doc (list): List of words in the document.
    model_w2v (Word2Vec model): Pre-trained Word2Vec model.
    tfidf_vector (dict): Dictionary of TF-IDF weights with words as keys.

    Returns:
    numpy.ndarray: Aggregated weighted vector representing the document.
    """

    doc_vector = np.zeros(model_w2v.vector_size)
    for word in doc:
        word_vector = weighted_word_vector(word, model_w2v, tfidf_vector)
        doc_vector += word_vector
    return doc_vector / len(doc) if doc else doc_vector

In [18]:
# Compute the TF-IDF for each word in each document
word_tfidf_dict = {}
for word in feature_names:
    word_index = vectorizer.vocabulary_[word]
    word_tfidf = tfidf_matrix[:, word_index].toarray()
    word_tfidf_dict[word] = np.mean(word_tfidf)

# Compute the weighted document vectors
products['description_word2vec'] = products['processed_description'].apply(lambda doc: weighted_document_vector(doc, model_w2v, word_tfidf_dict))
pd.to_pickle(products, PRODUCTS_FILE)

In [19]:
# Test the cosine similarity between two random products

random_sampling = products.sample(2)
random_sampling['description'].values

array(['Wrist support for sports activities, providing stability and preventing injuries.',
       'Hydrates and nourishes the skin with its hydrating box set, perfect for dry skin.'],
      dtype=object)

In [20]:
product_1 = random_sampling.iloc[0]
product_2 = random_sampling.iloc[1]

vector_1 = product_1['description_word2vec']
vector_2 = product_2['description_word2vec']

In [21]:
def cosine_similarity(vec_a: np.ndarray, vec_b: np.ndarray) -> float:
    """
    Compute the cosine similarity between two vectors.

    Parameters:
    vec_a (numpy.ndarray): The first vector.
    vec_b (numpy.ndarray): The second vector.

    Returns:
    float: Cosine similarity between vec_a and vec_b. Returns 0 if either vector has zero length.
    """
    
    if norm(vec_a) == 0 or norm(vec_b) == 0:  # Avoid division by zero
        return 0
    return np.dot(vec_a, vec_b) / (norm(vec_a) * norm(vec_b))

In [22]:
# Compute the cosine similarity
similarity = cosine_similarity(vector_1, vector_2)
print(f"The cosine similarity between the two products is of: {similarity}")

The cosine similarity between the two products is of: 0.1537490731112209


In [23]:
def cosine_reccomendation(product_id: str, products: pd.DataFrame, num_recommendations=5):
    """
    Recommends products similar to a given product ID based on cosine similarity 
    of their weighted vectors.

    Parameters:
    product_id (str): The ID of the product for which recommendations are to be made.
    products (DataFrame): DataFrame containing product details including 'description_word2vec'.
    num_recommendations (int, optional): Number of product recommendations to return.

    Returns:
    DataFrame: DataFrame containing details of the recommended products.

    Throws:
    ValueError: If the given product_id is not found in the products DataFrame.
    """
    
    # Get the vector for the product
    if product_id not in products[['id']].values:
        raise ValueError("The product_id is not in the products DataFrame.")

    product_vector = products.loc[products['id'] == product_id, 'description_word2vec'].iloc[0]

    # Compute the similarity between the product and all other products
    similarities = {}
    for other_product_id in products['id'].values:
        if other_product_id != product_id:
            other_product_vector = products.loc[products['id'] == other_product_id, 'description_word2vec'].iloc[0]
            sim = cosine_similarity(product_vector, other_product_vector)
            similarities[other_product_id] = sim

    # Sort the similarities
    recommended_product_ids = sorted(similarities, key=similarities.get, reverse=True)[:num_recommendations]

    return products.loc[recommended_product_ids]

In [24]:
def detect_language(text: str):
    try:
        return detect(text)
    except LangDetectException:
        return None

In [25]:
# Check the language of the descriptions

# products['language'] = products['description'].apply(detect_language)
# non_english_products = products['language'] != 'en'
# non_english_products.head()

In [26]:
# if (len(non_english_products) > 0):
#     mask = products['language'] != 'en'
#     print(f"Cleaning up {len(products[mask])} products with non-English descriptions.")
#     products.loc[mask, ["description", "tokens", "language", "description_word2vec", "processed_description"]] = ""

In [27]:
from gensim.scripts.glove2word2vec import glove2word2vec
from gensim.models import KeyedVectors

# Convertire il file GloVe in formato word2vec
word2vec_output_file = 'glove.6B.100d.txt.word2vec'
glove2word2vec(GLOVE_DATA_FILE, word2vec_output_file)

# Caricare il modello
glove_model = KeyedVectors.load_word2vec_format(word2vec_output_file, binary=False)

  glove2word2vec(GLOVE_DATA_FILE, word2vec_output_file)


In [28]:
import pandas as pd
from nltk.tokenize import word_tokenize
import string

# Funzione di pre-elaborazione
def glove_preprocess_text(text):
    text = text.lower()
    text = text.translate(str.maketrans('', '', string.punctuation))
    tokens = word_tokenize(text)
    return tokens

# Applica la pre-elaborazione
products['processed_description'] = products['description'].apply(glove_preprocess_text)


In [29]:
products.head()

Unnamed: 0,id,name,description,tokens,processed_description,weighted_vector,language,description_word2vec
0,638fb851469e4fd39fecbde45b376217,BLANX WHITE SHOCK 50ML+LED,Achieve a bright white smile with Blanx White ...,achieve bright white smile blanx white shock t...,"[achieve, a, bright, white, smile, with, blanx...","[-0.0001711061833507951, 0.0002127292312447285...",en,"[-0.0003201263859539592, -0.000297295161154969..."
1,320b67831f7f439e9e541d26cefa26f9,"EUMILL GOCCE OCULARI 10FL0,5ML",Relieve dry and irritated eyes with Eumill eye...,relieve dry irritated eyes eumill eye drops pr...,"[relieve, dry, and, irritated, eyes, with, eum...","[-0.0088921532329825, -0.014865235213382752, 0...",en,"[-0.007836564183425784, -0.015759744265918602,..."
2,199578e19fe4415d9ce8861c13050616,PRIMUM DRENANTE MELA 250ML,Boost your metabolism and detoxify your body w...,boost metabolism detoxify body primum drenante...,"[boost, your, metabolism, and, detoxify, your,...","[0.0030121083031417585, -0.0019495495646424388...",en,"[0.0029143962330408614, -0.0031249196467342697..."
3,2162cbb2605f4be98e0b65aba31fe323,SUSTENIUM PLUS INT FORM 22BUST,Enhance your physical and mental performance w...,enhance physical mental performance sustenium ...,"[enhance, your, physical, and, mental, perform...","[0.002032875472574839, 0.0006420587249067466, ...",en,"[0.0010998682741198234, 0.0005164739183977455,..."
4,e553bd5f5d914050a52185a670336f9d,EUFORTYN LIOS OROSOLUB 10BUST,Improve your immune system and protect against...,improve immune system protect oxidative stress...,"[improve, your, immune, system, and, protect, ...","[0.001268330909087209, -0.005052558460694068, ...",en,"[1.2886704945975163e-05, -0.007490050348751124..."


In [30]:
import numpy as np

def get_glove_embedding(tokens, glove_model):
    # Filtra le parole che sono presenti nel modello GloVe
    valid_tokens = [word for word in tokens if word in glove_model]
    
    # Se non ci sono token validi, restituisci un vettore di zeri
    if not valid_tokens:
        return np.zeros(glove_model.vector_size)

    # Calcola la media degli embedding dei token validi
    embedding = np.mean([glove_model[word] for word in valid_tokens], axis=0)
    return embedding

# Applica la funzione get_glove_embedding a ogni riga della colonna 'tokens'
glove_embedding = products['tokens'].apply(lambda tokens: get_glove_embedding(tokens, glove_model))


In [32]:
from sklearn.decomposition import PCA

pca = PCA(n_components=64)
reduced_glove_embedding  = pca.fit_transform(glove_embedding.tolist())


In [33]:
products['description_glove'] = list(reduced_glove_embedding)

In [35]:
len(products.iloc[0]['description_glove'])

64

In [38]:
products = products.drop(columns=['language', 'weighted_vector'])

In [39]:
products.head()

Unnamed: 0,id,name,description,tokens,processed_description,description_word2vec,description_glove
0,638fb851469e4fd39fecbde45b376217,BLANX WHITE SHOCK 50ML+LED,Achieve a bright white smile with Blanx White ...,achieve bright white smile blanx white shock t...,"[achieve, a, bright, white, smile, with, blanx...","[-0.0003201263859539592, -0.000297295161154969...","[-0.10971122485678504, -0.16961734861111075, -..."
1,320b67831f7f439e9e541d26cefa26f9,"EUMILL GOCCE OCULARI 10FL0,5ML",Relieve dry and irritated eyes with Eumill eye...,relieve dry irritated eyes eumill eye drops pr...,"[relieve, dry, and, irritated, eyes, with, eum...","[-0.007836564183425784, -0.015759744265918602,...","[-0.056279045973986724, 0.25549609312850746, -..."
2,199578e19fe4415d9ce8861c13050616,PRIMUM DRENANTE MELA 250ML,Boost your metabolism and detoxify your body w...,boost metabolism detoxify body primum drenante...,"[boost, your, metabolism, and, detoxify, your,...","[0.0029143962330408614, -0.0031249196467342697...","[-0.03291521010991158, 0.09198945395388057, -0..."
3,2162cbb2605f4be98e0b65aba31fe323,SUSTENIUM PLUS INT FORM 22BUST,Enhance your physical and mental performance w...,enhance physical mental performance sustenium ...,"[enhance, your, physical, and, mental, perform...","[0.0010998682741198234, 0.0005164739183977455,...","[-0.0728658227034427, -0.21033709534567546, 0...."
4,e553bd5f5d914050a52185a670336f9d,EUFORTYN LIOS OROSOLUB 10BUST,Improve your immune system and protect against...,improve immune system protect oxidative stress...,"[improve, your, immune, system, and, protect, ...","[1.2886704945975163e-05, -0.007490050348751124...","[-0.19108113853470834, -0.13978017052770847, 0..."


In [40]:
pd.to_pickle(products, PRODUCTS_FILE)