In [1]:
# Clothing Recommendation System - Cleaned and Consolidated

import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, MultiLabelBinarizer

# ----------------------
# Step 1: Load the Dataset
# ----------------------
df = pd.read_parquet(r'C:/Users/USER/Documents/SmartLook  Application Project/summer-products.pqt')

# ----------------------
# Step 2: Handle Missing Values
# ----------------------
df['rating'] = df['rating'].fillna(df['rating'].median())
df['rating_count'] = df['rating_count'].fillna(0)
df['price'] = pd.to_numeric(df['price'], errors='coerce')
df['rating'] = pd.to_numeric(df['rating'], errors='coerce')
df['units_sold'] = pd.to_numeric(df['units_sold'], errors='coerce')

rating_columns = [
    'rating_five_count', 'rating_four_count', 'rating_three_count', 
    'rating_two_count', 'rating_one_count'
]
df[rating_columns] = df[rating_columns].fillna(0)

# Other categorical fields
fill_values = {
    'product_color': 'unknown',
    'product_variation_size_id': 'unknown',
    'has_urgency_banner': 0,
    'urgency_text': '',
    'origin_country': 'Unknown',
    'merchant_name': 'Unknown',
    'merchant_info_subtitle': ''
}
df.fillna(value=fill_values, inplace=True)

# Drop irrelevant column
if 'merchant_profile_picture' in df.columns:
    df.drop(columns=['merchant_profile_picture'], inplace=True)

# ----------------------
# Step 3: Feature Engineering
# ----------------------
scaler = MinMaxScaler()
df[['price', 'rating', 'units_sold']] = scaler.fit_transform(df[['price', 'rating', 'units_sold']])

df['tags'] = df['tags'].fillna('')
df['tags_list'] = df['tags'].apply(lambda x: x.split(','))

mlb = MultiLabelBinarizer()
tags_matrix = mlb.fit_transform(df['tags_list'])
df = pd.concat([df, pd.DataFrame(tags_matrix, columns=mlb.classes_)], axis=1)

# Target column for modeling (optional)
df['is_high_rated'] = (df['rating'] >= 0.8).astype(int)  # since rating is normalized

# ----------------------
# Step 4: Recommendation System Functions
# ----------------------
def filter_recommendations(df, user_input):
    filtered_df = df.copy()
    if 'season' in user_input:
        filtered_df = filtered_df[filtered_df['theme'].str.contains(user_input['season'], case=False, na=False)]
    if 'style' in user_input:
        filtered_df = filtered_df[filtered_df['tags'].str.contains(user_input['style'], case=False, na=False)]
    if 'color' in user_input:
        filtered_df = filtered_df[filtered_df['product_color'].str.contains(user_input['color'], case=False, na=False)]
    if 'size' in user_input:
        filtered_df = filtered_df[filtered_df['product_variation_size_id'].str.contains(user_input['size'], case=False, na=False)]
    return filtered_df

def rank_recommendations(filtered_df):
    return filtered_df.sort_values(by=['rating', 'units_sold'], ascending=[False, False])

def get_top_recommendations(ranked_df, top_n=5):
    return ranked_df.head(top_n)[['title', 'rating', 'units_sold']]

def recommend_outfits(df, user_input, top_n=5):
    filtered_df = filter_recommendations(df, user_input)
    ranked_df = rank_recommendations(filtered_df)
    return get_top_recommendations(ranked_df, top_n).to_dict(orient='records')

# ----------------------
# Step 5: Example Usage
# ----------------------
user_input = {
    'season': 'Summer',
    'style': 'Casual',
    'color': 'White',
    'size': 'M'
}

recommendations = recommend_outfits(df, user_input)
print(recommendations)


[{'title': 'M-XXL Taille Été Nouvelles Dames Papillon Impression Bohème Plage Jupe Col En V Sans Manches Casual Tempérament Robe', 'rating': 1.0, 'units_sold': 6.0000600006000066e-05}, {'title': 'Femmes Mode Mode Spaghetti Combinaisons à bretelles Rompers Summer Casual Slim Fit Taille ajustée Lien de serrage Taille une pièce Costume sexy sans manches Bodycon Playsuit Outfits Set Femmes Short Rompers Bodysuits pour vêtements de plage', 'rating': 0.8049999999999999, 'units_sold': 0.19999199991999922}, {'title': 'Mode féminine à manches courtes col rond Débardeurs T-shirt imprimé plumes', 'rating': 0.8025, 'units_sold': 0.00999009990099901}, {'title': 'Mode féminine à manches courtes col rond Débardeurs T-shirt imprimé plumes', 'rating': 0.8025, 'units_sold': 0.00999009990099901}, {'title': 'Femmes T-shirts manches courtes Tops Lettre Imprimé Coton Été Casual T-shirts T-shirts S-5XL', 'rating': 0.7849999999999999, 'units_sold': 0.049990499904999056}]
