In [13]:
pip install sentence-transformers faiss-cpu pandas

Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0.post1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.0 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cublas_cu12-12.4.5.8-

In [1]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
import faiss
import re

# Load your CSV
df = pd.read_csv("Influenster Reviews.csv")

# Clean and prepare data
df['rating'] = df['user_rating'].str.extract(r'(\d(?:\.\d)?)').astype(float)
# df = df[df['rating'] >= 4.0]  # optional quality filter

# Combine into single text for embedding
df['text'] = df.apply(lambda row: f"Product: {row['product_name']}\nReview: {row['product_review']}", axis=1)
texts = df['text'].tolist()
metadata = df[['product_name', 'product_review', 'rating']].to_dict(orient='records')

# Generate embeddings
model = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = model.encode(texts, show_progress_bar=True)

# Create FAISS index
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(np.array(embeddings))

# Save metadata and index for later use
import pickle
with open("metadata.pkl", "wb") as f:
    pickle.dump(metadata, f)
faiss.write_index(index, "shampoo_reviews.index")

print("✅ RAG base built: embedded and indexed reviews")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Batches:   0%|          | 0/157 [00:00<?, ?it/s]

✅ RAG base built: embedded and indexed reviews


In [2]:
def recommend_products(user_query, top_k=5):
    query_embedding = model.encode([user_query])
    D, I = index.search(np.array(query_embedding), top_k)

    results = []
    for idx in I[0]:
        item = metadata[idx]
        results.append(f"🔹 {item['product_name']} (⭐ {item['rating']})\n🗣️ {item['product_review']}\n")

    return "\n".join(results)

# Example usage
print(recommend_products("I have curly, frizzy hair with dandruff"))

🔹 Head & Shoulders Classic Clean Anti-Dandruff Shampoo (⭐ 2.0)
🗣️ It would help with dandruff but at the same time it would make my hair feel super dry, especially the ends of my hair. I'd probably try pairing it with a super moisturizing conditioner depending on your hair type.

🔹 Head & Shoulders Smooth & Silky Anti-Dandruff Shampoo (⭐ 4.0)
🗣️ I actually use this product during the winter when the skin sloughs off. What they don't tell you if you have the medical condition of blepharitis or anything like that that you need to leave it on for 5 minutes before rinsing it out and it'll be effective and you can use it at least three times a week doing that and it's awesome ,smell is great it's moisturizing give it a 4 out of 5 for the frizziness cuz I still had to use an oil later to keep my frizziness down.

🔹 Head & Shoulders Smooth & Silky Anti-Dandruff Shampoo (⭐ 5.0)
🗣️ Love it on so many levels! Keeps the flakes away and doesnt smell medicinal. Smells wonderful. It also keeps my ha