<a href="https://colab.research.google.com/github/shrut-ij/aidi-capstone-1/blob/main/capstone1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import csv

# Complete skincare ingredient database with multiple skin issues addressed
ingredients = [
    # Header
    ["ingredient", "skin_issue", "description"],

    # Hydrators
    ["Hyaluronic Acid", "Dryness, Dehydration, Fine Lines", "Attracts and retains moisture for plump skin"],
    ["Glycerin", "Dryness, Dehydration, Rough Texture", "Humectant that draws water into the skin"],
    ["Squalane", "Dryness, Aging, Barrier Repair", "Lightweight moisturizer that mimics skin's natural oils"],
    ["Panthenol", "Dryness, Irritation, Barrier Repair", "Pro-vitamin B5 that soothes and hydrates"],

    # Exfoliants
    ["Salicylic Acid", "Acne, Blackheads, Clogged Pores", "BHA that penetrates oil to unclog pores"],
    ["Glycolic Acid", "Dullness, Hyperpigmentation, Fine Lines", "AHA that exfoliates surface skin cells"],
    ["Lactic Acid", "Dryness, Dullness, Mild Hyperpigmentation", "Gentle AHA that exfoliates and hydrates"],
    ["Mandelic Acid", "Sensitive Skin, Hyperpigmentation", "Gentle AHA suitable for sensitive skin"],

    # Acne Fighters
    ["Benzoyl Peroxide", "Acne, Bacterial Acne", "Kills acne-causing bacteria"],
    ["Niacinamide", "Acne, Redness, Hyperpigmentation", "Reduces inflammation and regulates oil"],
    ["Tea Tree Oil", "Acne, Fungal Acne", "Natural antimicrobial that fights breakouts"],
    ["Zinc", "Acne, Inflammation, Healing", "Reduces oil production and soothes skin"],

    # Brighteners
    ["Vitamin C", "Hyperpigmentation, Dullness, Aging", "Antioxidant that brightens and protects"],
    ["Alpha Arbutin", "Hyperpigmentation, Dark Spots", "Gentle brightener that inhibits melanin"],
    ["Kojic Acid", "Hyperpigmentation, Sun Damage", "Lightens dark spots and evens tone"],
    ["Tranexamic Acid", "Melasma, Hyperpigmentation", "Targets stubborn discoloration"],

    # Anti-Aging
    ["Retinol", "Wrinkles, Fine Lines, Acne", "Gold-standard anti-aging ingredient"],
    ["Peptides", "Wrinkles, Loss of Firmness", "Stimulates collagen production"],
    ["Bakuchiol", "Wrinkles, Sensitivity", "Plant-based retinol alternative"],
    ["Coenzyme Q10", "Aging, Environmental Damage", "Antioxidant that energizes skin cells"],

    # Soothers
    ["Centella Asiatica", "Redness, Irritation, Healing", "Calms inflamed skin and repairs"],
    ["Aloe Vera", "Sunburn, Redness, Irritation", "Classic soothing and cooling agent"],
    ["Colloidal Oatmeal", "Eczema, Itching, Dryness", "Relieves itch and irritation"],
    ["Allantoin", "Irritation, Sensitivity", "Promotes healing and soothes"],

    # Barrier Repair
    ["Ceramides", "Dryness, Eczema, Barrier Damage", "Replenishes skin's protective barrier"],
    ["Cholesterol", "Dryness, Barrier Repair", "Essential for healthy skin barrier function"],
    ["Fatty Acids", "Dryness, Flaking", "Nourishes and protects skin barrier"],

    # Antioxidants
    ["Green Tea Extract", "Redness, Inflammation, Aging", "Powerful anti-inflammatory antioxidant"],
    ["Resveratrol", "Aging, Environmental Damage", "Protects against free radical damage"],
    ["Vitamin E", "Dryness, Aging, Healing", "Nourishes and protects skin"],

    # Specialty
    ["Azelaic Acid", "Rosacea, Acne, Hyperpigmentation", "Multi-action ingredient for sensitive skin"],
    ["Licorice Root Extract", "Redness, Hyperpigmentation", "Brightens while calming irritation"],
    ["Snail Mucin", "Healing, Dryness, Fine Lines", "Hydrates and promotes skin repair"]
]

# Save to CSV
with open('Ingredient_List.csv', 'w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerows(ingredients)

print("Comprehensive skincare ingredient database saved to Ingredient_List.csv")

Comprehensive skincare ingredient database saved to Ingredient_List.csv


In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from collections import defaultdict

class AdvancedSkincareRecommender:
    def __init__(self, ingredients_path='Ingredient_List.csv', products_path='products.csv'):
        # Load and preprocess data
        self.ingredients_df = pd.read_csv(ingredients_path)
        self.products_df = pd.read_csv(products_path)
        self._preprocess_data()

        # Build knowledge base
        self.issue_to_ingredients = self._build_issue_mapping()
        self.ingredient_to_issues = self._build_ingredient_mapping()

        # Initialize vectorizer
        self.vectorizer = TfidfVectorizer(tokenizer=lambda x: x.split(', '))
        self.product_vectors = self.vectorizer.fit_transform(self.products_df['ingredients'])

    def _preprocess_data(self):
        """Clean and standardize all text data"""
        # Standardize ingredients
        self.ingredients_df['ingredient'] = self.ingredients_df['ingredient'].str.lower().str.strip()
        self.ingredients_df['skin_issue'] = self.ingredients_df['skin_issue'].str.lower().str.strip()

        # Clean product data
        self.products_df['ingredients'] = self.products_df['ingredients'].str.lower().str.strip()
        self.products_df.fillna({'ingredients': ''}, inplace=True)

        # Create lowercase versions for matching
        self.products_df['ingredients_clean'] = self.products_df['ingredients'].str.replace('[^a-z0-9, ]', '')

    def _build_issue_mapping(self):
        """Create dictionary: {skin_issue: [ingredients]}"""
        mapping = defaultdict(list)
        for _, row in self.ingredients_df.iterrows():
            issues = [x.strip() for x in row['skin_issue'].split(',')]
            for issue in issues:
                mapping[issue].append(row['ingredient'])
        return dict(mapping)

    def _build_ingredient_mapping(self):
        """Create dictionary: {ingredient: [issues]}"""
        return {row['ingredient']: [x.strip() for x in row['skin_issue'].split(',')]
                for _, row in self.ingredients_df.iterrows()}

    def get_ingredient_info(self, ingredient_name):
        """Get detailed information about a specific ingredient"""
        info = self.ingredients_df[self.ingredients_df['ingredient'] == ingredient_name.lower()]
        if not info.empty:
            return {
                'name': info['ingredient'].values[0],
                'issues': info['skin_issue'].values[0].split(', '),
                'description': info['description'].values[0]
            }
        return None

    def recommend_products(self, skin_issues, top_n=5, min_match=1):
        """
        Enhanced recommendation with multiple features:

        Args:
            skin_issues (str/list): Single issue or list of issues
            top_n (int): Number of recommendations
            min_match (int): Minimum ingredient matches required

        Returns:
            DataFrame with recommendations and detailed match info
        """
        if isinstance(skin_issues, str):
            skin_issues = [skin_issues.lower().strip()]
        else:
            skin_issues = [x.lower().strip() for x in skin_issues]

        # Get all relevant ingredients
        beneficial_ings = set()
        for issue in skin_issues:
            beneficial_ings.update(self.issue_to_ingredients.get(issue, []))

        if not beneficial_ings:
            return pd.DataFrame()

        # Score products
        results = []
        for _, product in self.products_df.iterrows():
            product_ings = [x.strip() for x in product['ingredients_clean'].split(',')]
            matches = [ing for ing in product_ings if ing in beneficial_ings]

            if len(matches) >= min_match:
                # Calculate importance score (TF-IDF weighted)
                match_scores = []
                for ing in matches:
                    ing_idx = self.vectorizer.vocabulary_.get(ing, -1)
                    if ing_idx != -1:
                        tfidf = self.product_vectors[_, ing_idx]
                        match_scores.append(tfidf)

                avg_score = np.mean(match_scores) if match_scores else 0

                results.append({
                    'product': product['name'],
                    'brand': product['brand'],
                    'price': product['price'],
                    'match_count': len(matches),
                    'match_score': avg_score,
                    'matched_ingredients': ', '.join(matches),
                    'all_ingredients': product['ingredients']
                })

        # Sort and return results
        if results:
            df = pd.DataFrame(results)
            df = df.sort_values(['match_score', 'match_count'], ascending=False)
            return df.head(top_n)
        return pd.DataFrame()

    def explain_recommendation(self, product_name, skin_issues):
        """Generate detailed explanation for a recommendation"""
        product = self.products_df[self.products_df['name'] == product_name]
        if product.empty:
            return "Product not found"

        if isinstance(skin_issues, str):
            skin_issues = [skin_issues]

        explanation = []
        ingredients = product['ingredients_clean'].values[0].split(',')

        for ing in ingredients:
            if ing in self.ingredient_to_issues:
                common_issues = set(self.ingredient_to_issues[ing]) & set(skin_issues)
                if common_issues:
                    ing_info = self.get_ingredient_info(ing)
                    explanation.append(
                        f"{ing_info['name']} - helps with {', '.join(common_issues)}: "
                        f"{ing_info['description']}"
                    )

        if explanation:
            return (
                f"Product '{product_name}' is recommended because:\n" +
                "\n".join(f"• {x}" for x in explanation)
            )
        return "No matching ingredients found for the specified issues"

# Example Usage
if __name__ == "__main__":
    recommender = AdvancedSkincareRecommender()

    # 1. Basic recommendation
    print("=== SINGLE ISSUE RECOMMENDATION ===")
    acne_recs = recommender.recommend_products("acne")
    print(acne_recs[['product', 'brand', 'match_count']].to_string(index=False))

    # 2. Multiple issues recommendation
    print("\n=== MULTI-ISSUE RECOMMENDATION ===")
    combo_recs = recommender.recommend_products(["acne", "hyperpigmentation"])
    print(combo_recs[['product', 'brand', 'matched_ingredients']].to_string(index=False))

    # 3. Get explanation
    if not combo_recs.empty:
        print("\n=== EXPLANATION ===")
        print(recommender.explain_recommendation(
            combo_recs.iloc[0]['product'],
            ["acne", "hyperpigmentation"]
        ))



=== SINGLE ISSUE RECOMMENDATION ===
                                          product             brand  match_count
                                Retinol Fusion PM PETER THOMAS ROTH            1
Daily Cleanse™ Clear Skin and Breakout Supplement     HUM NUTRITION            1
              Facial Treatment Clear Lotion Toner             SK-II            1
                      Quench Hydrating Face Serum   KATE SOMERVILLE            1
                    Brightening Derm Revival Mask             SK-II            1

=== MULTI-ISSUE RECOMMENDATION ===
                                          product             brand matched_ingredients
             Turn Back Time Anti-Aging Supplement     HUM NUTRITION           vitamin c
                                Retinol Fusion PM PETER THOMAS ROTH             retinol
                  White Lucent Luminizing Infuser          SHISEIDO     tranexamic acid
Daily Cleanse™ Clear Skin and Breakout Supplement     HUM NUTRITION                zinc
  

In [None]:
# AI-powered workflow
# Initialize AI recommender
recommender = AdvancedSkincareRecommender()

# Get recommendations for acne + hyperpigmentation
recs = recommender.recommend_products(["acne", "hyperpigmentation"])

# Explain the top recommendation
print(recommender.explain_recommendation(recs.iloc[0]["product"], ["acne", "hyperpigmentation"]))



Product 'Turn Back Time Anti-Aging Supplement' is recommended because:
• vitamin c - helps with hyperpigmentation: Antioxidant that brightens and protects
