In [None]:
%pip install pandas
%pip install scikit-learn

/bin/bash: pip: command not found
/bin/bash: pip: command not found


In [25]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
import joblib

In [26]:
product_df = pd.read_csv('complete_skincare_dataset.csv')

In [None]:
def get_skin_type_description(row):
    skin_types = []
    if row['for_dry_skin'] == 1:
        skin_types.append('dry skin')
    if row['for_oily_skin'] == 1:
        skin_types.append('oily skin')
    if row['for_combination_skin'] == 1:
        skin_types.append('combination skin')
    return ' '.join(skin_types)

product_df['skin_text'] = product_df.apply(get_skin_type_description, axis=1)

# Enrich combined_text
product_df['combined_text'] = (
    product_df['ingredients_cleaned'].fillna('') + ' ' +
    product_df['highlights_cleaned'].fillna('') + ' ' +
    product_df['product_category'].fillna('') + ' ' +
    product_df['skin_text']
)

In [34]:
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(product_df['combined_text'])

# Save both
joblib.dump(vectorizer, 'tfidf_vectorizer.pkl')
joblib.dump(tfidf_matrix, 'tfidf_matrix.pkl')

['tfidf_matrix.pkl']

In [8]:
def recommend_similar(product_index, tfidf_matrix, product_df, top_n=10):
    cosine_sim = cosine_similarity(tfidf_matrix[product_index], tfidf_matrix).flatten()
    similar_indices = cosine_sim.argsort()[::-1][1:top_n+1]
    return product_df.iloc[similar_indices][['product_name', 'brand_name', 'price_usd', 'rating', 'product_category']]

In [9]:
example_index = 1542  # change this to any product index
recommendations = recommend_similar(example_index, tfidf_matrix, product_df)
print("Recommendations for:", product_df.iloc[example_index]['product_name'])
print(recommendations)

Recommendations for: Hyaluronic Acid Booster
                                           product_name         brand_name  \
1524         RESIST Perfectly Balanced Foaming Cleanser     Paula's Choice   
2124                 Ceramide Hydrating Night Treatment     The INKEY List   
1532                         CLEAR Oil-Free Moisturizer     Paula's Choice   
1924                                  Mini Jet Lag Mask     Summer Fridays   
1923                                       Jet Lag Mask     Summer Fridays   
11          GENIUS Ultimate Anti-Aging Vitamin C+ Serum           Algenist   
1083                            DeliKate Recovery Serum    Kate Somerville   
1661                              Signature Moisturizer   ROSE Ingleton MD   
1613  Max Matte Shine Control Sunscreen Broad Spectr...  Peter Thomas Roth   
1106                                DelIKate Try Me Kit    Kate Somerville   

      price_usd  rating   product_category  
1524      22.00  4.7300          Cleansers  
2124  

In [10]:
target = product_df.iloc[example_index]
recommended = recommend_similar(example_index, tfidf_matrix, product_df)

print("Original product category:", target['product_category'])
print("Recommended categories:\n", recommended['product_category'].value_counts())

Original product category: Treatments
Recommended categories:
 product_category
Treatments           3
Moisturizers         2
Masks                2
Cleansers            1
Sunscreen            1
Value & Gift Sets    1
Name: count, dtype: int64


In [None]:
joblib.dump(vectorizer, 'tfidf_vectorizer.pkl')
joblib.dump(tfidf_matrix, 'tfidf_matrix.pkl')