In [1]:
import pandas as pd
import numpy as np
import pickle

In [2]:
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors

In [4]:
training_data = pd.read_csv("augmented_training_data.csv")
training_data['price'] = training_data['price'].fillna(0)
training_data.head()

Unnamed: 0,brand,name,price,rating,product_type,quantity_sold,recommendation
0,rejuva minerals,Multi Purpose Powder - Blush & Eye,0.0,3.1,blush,7,no
1,marienatie,Mineral Blush,0.0,1.3,blush,4,no
2,lotus cosmetics usa,Creme to Powder Blush,0.0,1.9,blush,8,no
3,glossier,Cloud Paint,22.0,3.2,blush,5,no
4,nyx,Sweet Cheeks Blush Palette,20.0,2.7,blush,7,no


In [14]:
ohe = OneHotEncoder(sparse_output=False, handle_unknown='ignore')

brand_encoded = ohe.fit_transform(training_data[['brand']])
with open("ohe.pkl", "wb") as f:
    pickle.dump(ohe, f)

bdf = pd.DataFrame(brand_encoded)
bdf

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,48,49,50,51,52,53,54,55,56,57
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
935,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
936,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
937,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
938,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [15]:
tfidf = TfidfVectorizer(max_features=50)
name_vectorized = tfidf.fit_transform(training_data['name'].fillna("")).toarray()
with open("tfidf.pkl", "wb") as f:
    pickle.dump(tfidf, f)

In [16]:
scaler = MinMaxScaler()
price_scaled = scaler.fit_transform(training_data[['price']])
with open("scaler.pkl", "wb") as f:
    pickle.dump(scaler, f)

In [17]:
feature_matrix = np.hstack([brand_encoded, name_vectorized, price_scaled])

In [32]:
nn_model = NearestNeighbors(n_neighbors=6, metric='cosine')
nn_model.fit(feature_matrix)
with open("nn_model.pkl", "wb") as f:
    pickle.dump(nn_model, f)

In [29]:

query_vector = feature_matrix[8].reshape(1, -1)
distances, indices = nn_model.kneighbors(query_vector)

print("You bought:")
print(training_data.iloc[8][['brand', 'name']])
print("\n Recommended Products:")
for idx in indices[0][1:]:
    rec = training_data.iloc[idx]
    print(f"- {rec['brand']} — {rec['name']}")

You bought:
brand            nyx
name     Baked Blush
Name: 8, dtype: object

 Recommended Products:


IndexError: index 8 is out of bounds for axis 0 with size 1

In [33]:
# ---- Query & Recommendation Preview ----
query_index = 641
query_vector = feature_matrix[query_index].reshape(1, -1)
distances, indices = nn_model.kneighbors(query_vector)

# Display the query product
print("You bought:")
print(training_data.iloc[query_index][['brand', 'name']])

print("\n Recommended Products:")

# --- Filter for uniqueness & skip the query product ---
seen_names = set()
unique_recs = []

query_name = training_data.iloc[query_index]['name'].strip().lower()

for idx in indices[0]:
    rec = training_data.iloc[idx]
    name = rec['name'].strip().lower()
    
    if name != query_name and name not in seen_names:
        seen_names.add(name)
        unique_recs.append(rec)

# Show results
for rec in unique_recs[:5]:  # Show top 5 unique matches
    print(f"- {rec['brand']} — {rec['name']} ({rec['product_type']}) • ${rec['price']}")


You bought:
brand                           nyx
name     Lip Lustre Glossy Lip Tint
Name: 641, dtype: object

 Recommended Products:
- nyx — Epic Ink Lip Dye (lipstick) • $7.0
- nyx — Lip Lingerie (lipstick) • $7.0
- nyx — Strictly Vinyl Lip Gloss (lipstick) • $8.0
- nyx — Duo Chromatic Lip Gloss (lipstick) • $8.0
- nyx — V'Amped Up! Lip Top Coat (lipstick) • $6.0
