In [13]:
import pandas as pd
df=pd.read_csv(r"../Data/cleaned_data.csv")


In [14]:
import numpy as np
import random


# --- 1. Generate Synthetic User Profiles ---
num_users = 500
user_ids = [f"user_{i:03}" for i in range(1, num_users + 1)]
skin_types = ["Oily", "Dry", "Combination", "Sensitive", "Normal"]

user_profiles = []

for uid in user_ids:
    profile = {
        "user_id": uid,
        "age": np.random.randint(16, 60),
        "skin_type": random.choice(skin_types),
    }
    user_profiles.append(profile)

user_df = pd.DataFrame(user_profiles)

# --- 2. Generate Interactions Based on Matching Skin Type ---
interaction_types = ["viewed", "liked", "added_to_cart", "purchased"]
interactions = []

product_ids = df['product_id'].tolist()

for user in user_ids:
    n_interactions = np.random.randint(10, 30)
    user_skin = user_df.loc[user_df['user_id'] == user, 'skin_type'].values[0]

    # Filter products with matching skin type
    suitable_products = df[df['skintype_list'] == user_skin]['product_id'].tolist()

    # Fallback if too few matching products
    if len(suitable_products) < n_interactions:
        suitable_products = product_ids

    chosen_products = np.random.choice(suitable_products, size=n_interactions, replace=True)

    for pid in chosen_products:
        interaction = {
            "user_id": user,
            "product_id": pid,
            "interaction_type": random.choices(
                interaction_types, weights=[0.5, 0.25, 0.15, 0.1], k=1
            )[0],
        }
        interactions.append(interaction)

interaction_df = pd.DataFrame(interactions)

# --- 3. Assign Scores ---
interaction_weights = {
    "viewed": 1,
    "liked": 2,
    "added_to_cart": 3,
    "purchased": 5
}
interaction_df["score"] = interaction_df["interaction_type"].map(interaction_weights)

# --- 4. Merge with User Info ---
merged_df = interaction_df.merge(user_df, on='user_id', how='left')

# --- 5. Save Files ---
merged_df.to_csv("../Data/merged_user_interaction_data.csv", index=False)
user_df.to_csv("../Data/synthetic_user_profiles.csv", index=False)

print("✅ Done: Merged interaction data created and saved.")







✅ Done: Merged interaction data created and saved.
