In [1]:
# ----------------------------------------------
# Skin Product Recommendation using ML (KNN)
# ----------------------------------------------

import re
import os
import pandas as pd
from sklearn.neighbors import NearestNeighbors
import joblib

# -------------------------
# Step 1: Load Dataset
# -------------------------
csv_path = r"D:\sem 7 project\backend\model_csv\dataset.csv"
df = pd.read_csv(csv_path)

# -------------------------
# Normalize / clean text columns
# -------------------------
df["Product Type"] = df["Product Type"].astype(str).str.strip().str.title()
df["Brand"] = df["Brand"].astype(str).str.strip().str.title()

# -------------------------
# Step 2: Define helper function
# -------------------------
def select_option(prompt, options, allow_multiple=False):
    while True:
        print(f"\n{prompt}")
        for i, option in enumerate(options, 1):
            print(f"{i}. {option}")
        selection = input(
            "Enter number(s) separated by comma if multiple: " if allow_multiple else "Enter number: "
        )
        numbers = re.findall(r'\d+', selection)
        if not numbers:
            print("❌ Invalid input! Enter numbers only.")
            continue
        numbers = [int(x) for x in numbers]
        if all(1 <= n <= len(options) for n in numbers):
            return [options[n-1] for n in numbers] if allow_multiple else options[numbers[0]-1]
        else:
            print("❌ Number out of range!")

# -------------------------
# Step 3: Train KNN model and save as .pkl
# -------------------------
feature_columns = [
    "Fragrance", "Paraben", "Sulfate", "Alcohol", "Essential Oils",
    "Salicylic Acid", "Retinol", "Niacinamide",
    "Sun Allergy Risk", "Environment Allergy Risk", "Pet Allergy Risk",
    "Pollen Allergy Risk", "Fungal Allergy Risk",
    "Dry", "Normal", "Oily", "Combination"
]

X = df[feature_columns]

knn = NearestNeighbors(n_neighbors=5, metric="cosine")
knn.fit(X)

# Save model to specified folder
model_dir = r"D:\sem 7 project\backend\modelsoutput"
os.makedirs(model_dir, exist_ok=True)
model_path = os.path.join(model_dir, "knn_recommender_model.pkl")
joblib.dump(knn, model_path)

print(f"✅ KNN Model trained and saved at:\n{model_path}")

# Load model to verify
loaded_knn = joblib.load(model_path)
print("✅ Model successfully loaded from .pkl file.")

# -------------------------
# Step 4: Prepare options
# -------------------------
all_product_types = sorted(df["Product Type"].unique().tolist(), key=str.lower)
all_brands = sorted(df["Brand"].unique().tolist(), key=str.lower)

# Put Mamaearth first if present
preferred_first = "Mamaearth"
pf = preferred_first.title()
if pf in all_brands:
    all_brands.remove(pf)
    all_brands.insert(0, pf)

# -------------------------
# Step 5: Take user inputs
# -------------------------
skin_type = select_option("Select your skin type:", ["Dry", "Normal", "Oily", "Combination"])
skin_concern = select_option("Select your skin concern:", ["Acne", "Dark_Spot", "Dark_Circle", "Wrinkle"])

product_allergies = select_option(
    "Select product allergies (choose all that apply):",
    ["Fragrance", "Paraben", "Sulfate", "Alcohol", "Essential Oils", "Salicylic Acid", "Retinol", "Niacinamide"],
    allow_multiple=True
)

env_allergies = select_option(
    "Select environmental allergies (choose all that apply):",
    ["Sun Allergy Risk", "Environment Allergy Risk", "Pet Allergy Risk", "Pollen Allergy Risk", "Fungal Allergy Risk"],
    allow_multiple=True
)

allergy_cols = product_allergies + env_allergies

selected_product_types = select_option(
    "Select product types for recommendation:", all_product_types, allow_multiple=True
)
remaining_product_types = selected_product_types.copy()

current_brand = select_option("Select your preferred brand for recommendation:", all_brands)

# -------------------------
# Step 6: Recommendation logic
# -------------------------
final_recommendations = []

for pt in remaining_product_types:
    pt_df = df[df["Product Type"] == pt].copy()
    for col in allergy_cols:
        if col in pt_df.columns:
            pt_df = pt_df[pt_df[col] == 0]

    brand_df = pt_df[pt_df["Brand"] == current_brand]

    if not brand_df.empty:
        final_recommendations.append(brand_df.iloc[0:1])
    else:
        alt_brands = sorted(pt_df["Brand"].unique().tolist(), key=str.lower)
        if len(alt_brands) == 0:
            print(f"\n⚠ No safe product found for '{pt}' in any brand!")
        else:
            print(f"\n⚠ No safe '{pt}' found in brand '{current_brand}'.")
            print(f"Available brands for this product type: {', '.join(alt_brands)}")
            new_brand = select_option(f"Select a brand for '{pt}':", alt_brands)
            brand_df = pt_df[pt_df["Brand"] == new_brand]
            if not brand_df.empty:
                final_recommendations.append(brand_df.iloc[0:1])

# -------------------------
# Step 7: Combine & Display Results
# -------------------------
if not final_recommendations:
    print("\n🔹 No products found matching your selection. Try relaxing some filters.")
else:
    final_recommendations = pd.concat(final_recommendations).drop_duplicates().reset_index(drop=True)
    pd.set_option('display.max_colwidth', None)  # Show full links
    print("\n🔹 Recommended Products for you:")

    for idx, row in final_recommendations.iterrows():
        print(f"Brand: {row['Brand']}")
        print(f"Product Type: {row['Product Type']}")
        print(f"Product Name: {row['Product Name']}")
        print(f"Product Link: {row['Product Link']}")
        print("-" * 50)


✅ KNN Model trained and saved at:
D:\sem 7 project\backend\modelsoutput\knn_recommender_model.pkl
✅ Model successfully loaded from .pkl file.

Select your skin type:
1. Dry
2. Normal
3. Oily
4. Combination
Enter number: 3

Select your skin concern:
1. Acne
2. Dark_Spot
3. Dark_Circle
4. Wrinkle
Enter number: 1

Select product allergies (choose all that apply):
1. Fragrance
2. Paraben
3. Sulfate
4. Alcohol
5. Essential Oils
6. Salicylic Acid
7. Retinol
8. Niacinamide
Enter number(s) separated by comma if multiple: 1,3,6

Select environmental allergies (choose all that apply):
1. Sun Allergy Risk
2. Environment Allergy Risk
3. Pet Allergy Risk
4. Pollen Allergy Risk
5. Fungal Allergy Risk
Enter number(s) separated by comma if multiple: 1,3

Select product types for recommendation:
1. Cleanser
2. Essence / Serum
3. Exfoliator
4. Eye Cream
5. Face Mask
6. Mist / Toner
7. Moisturizer
8. Serum
9. Serum/Essence
10. Sunscreen
11. Toner
12. Toner/Essence
13. Toner/Mist
Enter number(s) separated