In [1]:
import re
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from typing import Dict, List, Tuple

In [2]:
# Initialize model and tokenizer
model_name = "deepseek-ai/deepseek-coder-6.7b-base" # had to downgrade model since notebook was crashing with V3
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

# Beauty product knowledge base
product_db = {
    "foundation": {
        "dry_skin": ["Nars Sheer Glow", "Charlotte Tilbury Light Wonder"],
        "oily_skin": ["Fenty Pro Filt'r Matte", "Estée Lauder Double Wear"],
        "combination_skin": ["Make Up For Ever HD Skin", "Giorgio Armani Luminous Silk"]
    },
    "moisturizer": {
        "dry_skin": ["Tatcha Dewy Skin Cream", "La Mer Moisturizing Cream"],
        "oily_skin": ["Belif Aqua Bomb", "Peter Thomas Roth Water Drench"],
        "sensitive_skin": ["First Aid Beauty Ultra Repair Cream", "Dr. Jart+ Ceramidin Cream"]
    }
}

def generate_response(user_input: str) -> str:
    """Generate a response to user input using the DeepSeek model."""
    system_prompt = """You are a knowledgeable beauty advisor at Sephora. 
    Help customers find the perfect products based on their skin type, concerns, and preferences. 
    Be friendly, professional, and provide specific product recommendations."""
    
    full_prompt = f"{system_prompt}\n\nCustomer: {user_input}\nAdvisor:"
    
    inputs = tokenizer(full_prompt, return_tensors="pt").to(device)
    outputs = model.generate(
        inputs["input_ids"],
        attention_mask=inputs["attention_mask"],
        max_length=3000,
        num_return_sequences=1,
        do_sample=True, 
        temperature=0.7,
        top_p=0.9,
        pad_token_id=tokenizer.eos_token_id
    )
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response.split("Advisor:")[-1].strip()

def get_product_recommendations(category: str, skin_type: str) -> List[str]:
    """Get specific product recommendations based on category and skin type."""
    try:
        return product_db[category.lower()][skin_type.lower()]
    except KeyError:
        return []

def analyze_skin_concerns(user_description: str) -> Dict[str, float]:
    """Analyze user's skin concerns and return confidence scores."""
    concerns = {
        "dryness": 0.0,
        "oiliness": 0.0,
        "sensitivity": 0.0,
        "aging": 0.0,
        "acne": 0.0
    }
    
    keywords = {
        "dryness": ["dry", "flaky", "tight", "dehydrated"],
        "oiliness": ["oily", "shiny", "greasy", "large pores"],
        "sensitivity": ["sensitive", "red", "irritated", "reactive"],
        "aging": ["wrinkle", "fine line", "aging", "mature"],
        "acne": ["acne", "breakout", "pimple", "blemish"]
    }
    
    user_description = user_description.lower()
    for concern, words in keywords.items():
        for word in words:
            if word in user_description:
                concerns[concern] += 0.25
    
    return concerns

# Example of how to use the functions
user_query = "I have dry, sensitive skin and I'm looking for a good foundation that won't irritate my skin."
skin_concerns = analyze_skin_concerns(user_query)
response = generate_response(user_query)
recommendations = get_product_recommendations("foundation", "dry_skin") # this is hard-coded and needs to be more dynamic based on parsing user_query

# Print results
print("Analyzed Skin Concerns:", skin_concerns)
print("\nBeauty Assistant Response:", response)
print("\nRecommended Products:", recommendations)

tokenizer_config.json:   0%|          | 0.00/793 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.37M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/632 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/119 [00:00<?, ?B/s]

Analyzed Skin Concerns: {'dryness': 0.25, 'oiliness': 0.0, 'sensitivity': 0.25, 'aging': 0.0, 'acne': 0.0}

Beauty Assistant Response: That's a great question. We have a range of foundations that are suitable for sensitive skin, including a sensitive-skin foundation

Recommended Products: ['Nars Sheer Glow', 'Charlotte Tilbury Light Wonder']


# ~Making input's from user_query into get_product_recommendations more dynamic - (took way too long and sometimes output was empty)~

In [3]:
# Initialize model and tokenizer
# model_name = "deepseek-ai/deepseek-coder-6.7b-base"
# tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
# model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
# device = "cuda" if torch.cuda.is_available() else "cpu"
# model.to(device)

# # Beauty product knowledge base
# product_db = {
#     "foundation": {
#         "dry_skin": ["Nars Sheer Glow", "Charlotte Tilbury Light Wonder"],
#         "oily_skin": ["Fenty Pro Filt'r Matte", "Estée Lauder Double Wear"],
#         "combination_skin": ["Make Up For Ever HD Skin", "Giorgio Armani Luminous Silk"]
#     },
#     "moisturizer": {
#         "dry_skin": ["Tatcha Dewy Skin Cream", "La Mer Moisturizing Cream"],
#         "oily_skin": ["Belif Aqua Bomb", "Peter Thomas Roth Water Drench"],
#         "sensitive_skin": ["First Aid Beauty Ultra Repair Cream", "Dr. Jart+ Ceramidin Cream"]
#     }
# }

In [4]:
# def parse_user_query(user_query: str) -> Tuple[str, str]:
#     """
#     Extract product category and skin type dynamically from the user's query.
#     """
#     # Define keywords for product categories and skin types
#     product_keywords = ["foundation", "moisturizer"]
#     skin_type_keywords = ["dry skin", "oily skin", "combination skin", "sensitive skin"]

#     # Match product category
#     product = None
#     for keyword in product_keywords:
#         if keyword in user_query.lower():
#             product = keyword
#             break

#     # Match skin type
#     skin_type = None
#     for keyword in skin_type_keywords:
#         if keyword in user_query.lower():
#             skin_type = keyword.replace(" ", "_")  # Match database format
#             break

#     return product, skin_type

In [5]:
# def generate_response(user_input: str) -> str:
#     """Generate a response to user input using the DeepSeek model."""
#     system_prompt = """You are a knowledgeable beauty advisor at Sephora. 
#     Help customers find the perfect products based on their skin type, concerns, and preferences. 
#     Be friendly, professional, and provide specific product recommendations."""
    
#     full_prompt = f"{system_prompt}\n\nCustomer: {user_input}\nAdvisor:"
    
#     # Tokenize input and pass `attention_mask` explicitly
#     inputs = tokenizer(full_prompt, return_tensors="pt", padding=True, truncation=True).to(device)
#     outputs = model.generate(
#         inputs["input_ids"],
#         attention_mask=inputs["attention_mask"],  # Provide attention mask to avoid warnings
#         max_length=3000,
#         num_return_sequences=1,
#         do_sample=True,  # Enable sampling for temperature and top_p to take effect
#         temperature=0.7,
#         top_p=0.9,
#         pad_token_id=tokenizer.eos_token_id
#     )
    
#     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
#     return response.split("Advisor:")[-1].strip()

In [6]:
# def get_product_recommendations(category: str, skin_type: str) -> List[str]:
#     """Get specific product recommendations based on category and skin type."""
    
#     try:
#         return product_db[category.lower()][skin_type.lower()]
#     except KeyError:
#         return []

In [7]:
# def analyze_skin_concerns(user_description: str) -> Dict[str, float]:
#     """Analyze user's skin concerns and return confidence scores."""
#     concerns = {
#         "dryness": 0.0,
#         "oiliness": 0.0,
#         "sensitivity": 0.0,
#         "aging": 0.0,
#         "acne": 0.0
#     }
    
#     keywords = {
#         "dryness": ["dry", "flaky", "tight", "dehydrated"],
#         "oiliness": ["oily", "shiny", "greasy", "large pores"],
#         "sensitivity": ["sensitive", "red", "irritated", "reactive"],
#         "aging": ["wrinkle", "fine line", "aging", "mature"],
#         "acne": ["acne", "breakout", "pimple", "blemish"]
#     }
    
#     user_description = user_description.lower()
#     for concern, words in keywords.items():
#         for word in words:
#             if word in user_description:
#                 concerns[concern] += 0.25
    
#     return concerns

In [8]:
# Example of how to use the functions

# user_query = "I have dry, sensitive skin and I'm looking for a good foundation that won't irritate my skin."
# product, skin_type = parse_user_query(user_query)

# if product and skin_type:
#     recommendations = get_product_recommendations(product, skin_type)
# else:
#     recommendations = ["No specific recommendations could be generated based on the query."]

# response = generate_response(user_query)
# skin_concerns = analyze_skin_concerns(user_query)

# # Print results
# print("Analyzed Skin Concerns:", skin_concerns)
# print("\nParsed Product and Skin Type:", {"Product": product, "Skin Type": skin_type})
# print("\nBeauty Assistant Response:", response)
# print("\nRecommended Products:", recommendations)