In [1]:
import pandas as pd
from sklearn.metrics.pairwise import linear_kernel
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt
import seaborn as sns
import gensim
from gensim.models import Word2Vec
import numpy as np


In [2]:

# Load the dataset
df = pd.read_csv(r'C:\Users\Samiksha Bhatia\Acne_gpu\myvenv\SkinCare_Recommendation_Final\skincare_products_1500_unique.csv')

In [3]:
df.head()

Unnamed: 0,Product Name,Brand,Skin Concern,Severity,Ingredients,Product Type,Skin Type,Price,Customer Rating,Reviews,Availability,Feature_Blob
0,La Roche-Posay Toner,Innisfree,"Dark Spot, Nodules, Papules",Medium,Centella Asiatica Vitamin C Salicylic Acid,Serum,oily,1588.49,4.6,174,Nykaa,"Dark Spot, Nodules, Papules Medium Centella As..."
1,Paula's Choice Serum,Clinique,"Nodules, Papules",Medium,Azelaic Acid Vitamin C Niacinamide Zinc PCA...,Exfoliator,"normal, dry, combination",1634.94,3.5,1601,"Official Website, Ulta","Nodules, Papules Medium Azelaic Acid Vitamin ..."
2,Clinique Moisturizer,La Roche-Posay,"Blackheads, Whiteheads",Low,Azelaic Acid Centella Asiatica Niacinamide ...,Cleanser,normal,2010.47,3.9,2100,"Official Website, Ulta","Blackheads, Whiteheads Low Azelaic Acid Cente..."
3,Paula's Choice Moisturizer,Bioderma,"Papules, Pustules",Medium,Niacinamide Glycolic Acid,Serum,"combination, oily",921.33,4.3,3815,"Nykaa, Dermstore","Papules, Pustules Medium Niacinamide Glycolic..."
4,Clinique Cleanser,Bioderma,"Dark Spot, Papules",Medium,Centella Asiatica Hyaluronic Acid,Exfoliator,"normal, dry",1420.46,4.7,3204,"Dermstore, Nykaa","Dark Spot, Papules Medium Centella Asiatica H..."


In [4]:
# Combine relevant text columns
df["text_features"] = df["Skin Concern"] + " " + df["Severity"] + " " + df["Ingredients"] + " " + df["Skin Type"]
df["tokens"] = df["text_features"].apply(lambda x: x.lower().split())

# Train Word2Vec model
word2vec_model = Word2Vec(sentences=df["tokens"], vector_size=100, window=5, min_count=1, workers=4)

# Function to get sentence vector
def get_sentence_vector(tokens, model):
    vectors = [model.wv[word] for word in tokens if word in model.wv]
    return np.mean(vectors, axis=0) if vectors else np.zeros(model.vector_size)

# Compute product embeddings
df["embedding"] = df["tokens"].apply(lambda x: get_sentence_vector(x, word2vec_model))

# Convert embeddings to a matrix
embedding_matrix = np.vstack(df["embedding"].values)

In [5]:
def get_recommendations_w(skin_concern, severity, price_range=(0, 100), top_n=5):
    """Get product recommendations based on user preferences using Word2Vec."""
    # Convert skin_concern and severity to lowercase
    skin_concern = [concern.lower() for concern in skin_concern]
    severity = severity.lower()

    # Filter by price range
    filtered_df = df[(df['Price'] >= price_range[0]) & (df['Price'] <= price_range[1])]
    if filtered_df.empty:
        return pd.DataFrame()

    # Filter by exact skin concern match (case-insensitive)
    concern_df = filtered_df[filtered_df['Skin Concern'].apply(
        lambda x: any(concern in x.lower().split(',') for concern in skin_concern)
    )]
    if concern_df.empty:
        return pd.DataFrame()

    # Filter by severity
    severity_df = concern_df[concern_df['Severity'].str.contains(severity, case=False, na=False)]
    if severity_df.empty:
        return pd.DataFrame()

    # Create user query embedding
    user_query = ' '.join(skin_concern + [severity])
    user_tokens = user_query.lower().split()
    user_embedding = get_sentence_vector(user_tokens, word2vec_model)

    # Compute cosine similarity
    product_embeddings = np.vstack(severity_df["embedding"].values)
    similarity_scores = cosine_similarity([user_embedding], product_embeddings).flatten()

    # Copy dataframe to avoid modifying the original
    severity_df_copy = severity_df.copy()
    severity_df_copy["Score"] = similarity_scores

    # Sort by similarity score and customer rating
    sorted_df = severity_df_copy.sort_values(by=["Score", "Customer Rating"], ascending=[False, False])

    # Reset index with custom numbering
    sorted_df = sorted_df.reset_index(drop=True)
    sorted_df.index = sorted_df.index + 1  # Set custom index starting from 1

    # Return top_n recommendations
    return sorted_df.head(top_n)[["Product Name", "Brand", "Ingredients", "Price", "Customer Rating", "Availability", "Score"]]



In [6]:
# Example Usage
recommended_products = get_recommendations_w(skin_concern=["Papules"], severity="High", price_range=(500, 2000), top_n=5)
recommended_products

Unnamed: 0,Product Name,Brand,Ingredients,Price,Customer Rating,Availability,Score
1,Say Cleanser,La Roche-Posay,Zinc PCA Centella Asiatica Salicylic Acid,1857.07,4.7,"Official Website, Amazon",0.999575
2,Institution Moisturizer,Eucerin,Niacinamide Vitamin C Retinol,1476.02,4.1,"Nykaa, Official Website, Amazon",0.999571
3,Senior Moisturizer,The Ordinary,Retinol Tea Tree Oil Glycolic Acid,671.27,3.8,"Nykaa, Dermstore",0.999562
4,Gas Exfoliator,Innisfree,Salicylic Acid Retinol Lactic Acid,790.63,3.9,"Dermstore, Nykaa",0.999518
5,Night Exfoliator,Clinique,Vitamin C Salicylic Acid Niacinamide,652.8,4.2,"Nykaa, Amazon",0.999514
