In [1]:
# -*- coding: utf-8 -*-
"""Fashion_Recommendation_System.ipynb

Automatically generated by Colab.

# üõçÔ∏è **Complete Fashion Recommendation & Outfit Building System**

**System Features:**
1. Personalized recommendations based on purchase history
2. Size/fit recommendations based on body measurements
3. Outfit building and compatibility scoring
4. Cold-start recommendations for new users
5. Style-based filtering

---
"""

# @title ‚öôÔ∏è **Step 0: Install & Import Required Libraries**

!pip install pandas numpy scikit-learn tensorflow openpyxl sqlalchemy pymysql python-dotenv -q

import os
import json
import pandas as pd
import numpy as np
import pickle
import warnings
import builtins
from pathlib import Path
from dotenv import load_dotenv
from sqlalchemy import create_engine
from sqlalchemy.engine.url import URL

warnings.filterwarnings('ignore')

# Machine Learning libraries
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.decomposition import PCA
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics.pairwise import cosine_similarity

# TensorFlow for embeddings
import tensorflow as tf
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.optimizers import Adam

ARTIFACTS_DIR = Path.cwd() / "artifacts"
ARTIFACTS_DIR.mkdir(parents=True, exist_ok=True)

def artifact_path(filename: str) -> Path:
    return ARTIFACTS_DIR / filename

def find_project_file(filename: str, start: Path | None = None) -> Path | None:
    current = (start or Path.cwd()).resolve()
    for parent in [current, *current.parents]:
        candidate = parent / filename
        if candidate.exists():
            return candidate
    return None

def _resolve_artifact_path(path):
    if isinstance(path, (str, Path)):
        path_str = str(path)
        if path_str.startswith("/content/"):
            return artifact_path(path_str.replace("/content/", "", 1))
    return path

if not hasattr(builtins, "_artifact_original_open"):
    builtins._artifact_original_open = builtins.open
    def _artifact_open(file, *args, **kwargs):
        resolved = _resolve_artifact_path(file)
        return builtins._artifact_original_open(resolved, *args, **kwargs)
    builtins.open = _artifact_open

if not hasattr(builtins, "_artifact_original_print"):
    builtins._artifact_original_print = builtins.print
    def _artifact_print(*args, **kwargs):
        prefix = f"{ARTIFACTS_DIR.resolve()}/"
        updated_args = []
        for arg in args:
            if isinstance(arg, str):
                updated_args.append(arg.replace("/content/", prefix))
            else:
                updated_args.append(arg)
        builtins._artifact_original_print(*updated_args, **kwargs)
    builtins.print = _artifact_print

if not hasattr(pd, "_artifact_original_read_pickle"):
    pd._artifact_original_read_pickle = pd.read_pickle
    def _artifact_read_pickle(path, *args, **kwargs):
        resolved = _resolve_artifact_path(path)
        return pd._artifact_original_read_pickle(resolved, *args, **kwargs)
    pd.read_pickle = _artifact_read_pickle

if not hasattr(pd.DataFrame, "_artifact_original_to_pickle"):
    pd.DataFrame._artifact_original_to_pickle = pd.DataFrame.to_pickle
    def _artifact_to_pickle(self, path, *args, **kwargs):
        resolved = _resolve_artifact_path(path)
        return pd.DataFrame._artifact_original_to_pickle(self, resolved, *args, **kwargs)
    pd.DataFrame.to_pickle = _artifact_to_pickle

if not hasattr(np, "_artifact_original_save"):
    np._artifact_original_save = np.save
    def _artifact_np_save(file, arr, *args, **kwargs):
        resolved = _resolve_artifact_path(file)
        return np._artifact_original_save(resolved, arr, *args, **kwargs)
    np.save = _artifact_np_save

ORIGINAL_DATA_PICKLE = artifact_path("original_items.pkl")

print("‚úÖ All libraries imported successfully!")
print(f"TensorFlow version: {tf.__version__}")


[notice] A new release of pip is available: 24.3.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


‚úÖ All libraries imported successfully!
TensorFlow version: 2.20.0


In [2]:
# @title üìÅ **Step 1: Load Your Data File or Database Seed**

import os
import sqlite3
from pathlib import Path

import pandas as pd

CSV_DATA_PATH = Path("data/items.csv")
ENV_PATH = find_project_file(".env")
SQLITE_PATH = find_project_file("database.sqlite")

if ENV_PATH:
    load_dotenv(ENV_PATH)
    print(f"üîë Loaded environment variables from {ENV_PATH}")

def _coerce_json(value):
    if value is None or (isinstance(value, float) and pd.isna(value)):
        return ""
    if isinstance(value, (dict, list)):
        return json.dumps(value)
    if isinstance(value, str):
        return value
    return json.dumps(value)

def _load_from_csv(path: Path) -> pd.DataFrame:
    print(f"üìÇ Loading data from {path.resolve()}")
    if path.suffix.lower() == ".csv":
        df_loaded = pd.read_csv(path)
        print("üìÑ Loaded as CSV file")
    elif path.suffix.lower() in {".xlsx", ".xls"}:
        df_loaded = pd.read_excel(path)
        print("üìä Loaded as Excel file")
    else:
        raise ValueError(f"Unsupported file type for {path}")
    return df_loaded

def _build_items_query() -> str:
    return """\
SELECT
    items.id AS `ID`,
    items.name AS `Name`,
    items.price AS `Price`,
    categories.name AS `Category`,
    stores.name AS `Store`,
    items.stock_quantity AS `Total Stock`,
    items.color_variants AS `Color Variants Details`,
    items.sizing_data AS `Sizing Data`
FROM items
INNER JOIN categories ON items.category_id = categories.id
INNER JOIN stores ON items.store_id = stores.id
"""

def _load_from_sqlite(sqlite_file: Path) -> pd.DataFrame | None:
    if not sqlite_file.exists():
        return None
    print(f"üóÑÔ∏è Loading items from SQLite database at {sqlite_file}")
    conn = sqlite3.connect(sqlite_file)
    try:
        df_loaded = pd.read_sql_query(_build_items_query(), conn)
        print(f"   ‚úÖ Retrieved {len(df_loaded)} items from SQLite")
        return df_loaded
    except Exception as exc:
        print(f"   ‚ùå SQLite query failed: {exc}")
        return None
    finally:
        conn.close()

def _load_from_mysql() -> pd.DataFrame | None:
    driver = os.getenv("DB_CONNECTION", "mysql").lower()
    if driver != "mysql":
        return None
    db_name = os.getenv("DB_DATABASE")
    db_user = os.getenv("DB_USERNAME") or os.getenv("DB_USER")
    db_pass = os.getenv("DB_PASSWORD") or ""
    db_host = os.getenv("DB_HOST", "127.0.0.1")
    db_port = os.getenv("DB_PORT", "3306")
    if not db_name or not db_user:
        print("   ‚ö†Ô∏è MySQL credentials missing in environment variables")
        return None
    try:
        url = URL.create(
            "mysql+pymysql",
            username=db_user,
            password=db_pass or None,
            host=db_host,
            port=int(db_port),
            database=db_name
        )
        print(f"üóÑÔ∏è Connecting to MySQL at {db_host}:{db_port}/{db_name}")
        engine = create_engine(url)
        with engine.connect() as conn:
            df_loaded = pd.read_sql_query(_build_items_query(), conn)
        print(f"   ‚úÖ Retrieved {len(df_loaded)} items from MySQL")
        return df_loaded
    except Exception as exc:
        print(f"   ‚ùå MySQL query failed: {exc}")
        return None

df = None

if CSV_DATA_PATH.exists():
    df = _load_from_csv(CSV_DATA_PATH)
else:
    print(f"‚ö†Ô∏è Dataset not found at {CSV_DATA_PATH.resolve()} ‚Äî attempting to load from seeded database...")
    if SQLITE_PATH:
        df = _load_from_sqlite(SQLITE_PATH)
    if df is None:
        df = _load_from_mysql()

if df is None or df.empty:
    raise FileNotFoundError("Unable to load dataset from CSV or database sources. Provide a CSV/Excel file or ensure the database is reachable.")

df["Color Variants Details"] = df["Color Variants Details"].apply(_coerce_json)
df["Sizing Data"] = df["Sizing Data"].apply(_coerce_json)

print(f"üìä Data loaded: {len(df)} rows, {len(df.columns)} columns")
print("\nFirst 3 rows:")
print(df.head(3))

print(f"\nüìã Columns ({len(df.columns)} total):")
for i, col in enumerate(df.columns, 1):
    print(f"{i:2d}. {col}")

df.to_pickle(ORIGINAL_DATA_PICKLE)
print(f"\nüíæ Original data saved to '{ORIGINAL_DATA_PICKLE.resolve()}'")

üîë Loaded environment variables from C:\Users\Rana\OneDrive\Desktop\FitFast FYP\fitfast\.env
‚ö†Ô∏è Dataset not found at C:\Users\Rana\OneDrive\Desktop\FitFast FYP\fitfast\frontend\src\ai\data\items.csv ‚Äî attempting to load from seeded database...
üóÑÔ∏è Connecting to MySQL at 127.0.0.1:3306/fitfast
   ‚úÖ Retrieved 250 items from MySQL
üìä Data loaded: 250 rows, 8 columns

First 3 rows:
   ID               Name  Price  Category             Store  Total Stock  \
0   1   Classic Crew Tee  19.99  T-Shirts  Fashion Store 10          103   
1   2         V-Neck Tee  21.99  T-Shirts   Fashion Store 9          109   
2   3  Graphic Print Tee  24.99  T-Shirts   Fashion Store 8          108   

                              Color Variants Details  \
0  {"Black":{"name":"Black","stock":81},"White":{...   
1  {"Gray":{"name":"Gray","stock":32},"Navy":{"na...   
2  {"Black":{"name":"Black","stock":90},"White":{...   

                                         Sizing Data  
0  {"garment_type"

In [3]:
# @title üéØ **STEP 2: Feature Engineering with ALL Fixes Applied**
print("üéØ COMPLETE STEP 2: Feature Engineering with ALL Fixes Applied")
print("=" * 60)

import pandas as pd
import numpy as np
import json
import pickle
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler
from sklearn.impute import SimpleImputer
from collections import Counter

# ========== 1. RELOAD ORIGINAL DATA ========== 
print("\n1. üîÑ Loading original data...")
original_df = pd.read_pickle(ORIGINAL_DATA_PICKLE)
print(f"   ‚úÖ Loaded {len(original_df)} items")

# ========== 2. CORRECT PARSING OF ALL DATA ========== 
print("\n2. üìã Correct parsing of all data...")

def parse_sizing_data_final(sizing_str):
    """Properly parse sizing data"""
    if pd.isna(sizing_str) or not isinstance(sizing_str, str):
        return {'garment_type': 'unknown'}

    result = {'garment_type': 'unknown'}
    try:
        # First try JSON parsing
        if sizing_str.strip().startswith('{'):
            data = json.loads(sizing_str)
            result['garment_type'] = data.get('garment_type', 'unknown')
            result['fit_characteristics'] = data.get('fit_characteristics', {})
            result['measurements_cm'] = data.get('measurements_cm', {})
            result['size_system'] = data.get('size_system', 'US')
            return result

        # Fallback: string parsing
        parts = [p.strip() for p in sizing_str.split(';') if p.strip()]
        for part in parts:
            if 'garment_type:' in part:
                result['garment_type'] = part.split('garment_type:')[1].strip()
            elif 'fit_type:' in part:
                if 'fit_characteristics' not in result:
                    result['fit_characteristics'] = {}
                result['fit_characteristics']['fit_type'] = part.split('fit_type:')[1].strip()
            elif 'ease:' in part:
                if 'fit_characteristics' not in result:
                    result['fit_characteristics'] = {}
                result['fit_characteristics']['ease'] = part.split('ease:')[1].strip()
            elif 'stretch:' in part:
                if 'fit_characteristics' not in result:
                    result['fit_characteristics'] = {}
                result['fit_characteristics']['stretch'] = part.split('stretch:')[1].strip()
    except:
        pass

    return result

def parse_colors_final(color_str):
    """Properly parse color variants"""
    colors = {}
    if pd.isna(color_str):
        return colors

    try:
        if isinstance(color_str, str):
            if color_str.strip().startswith('{'):
                # JSON format
                color_dict = json.loads(color_str)
                for color_name, color_data in color_dict.items():
                    if isinstance(color_data, dict):
                        colors[color_name] = color_data.get('stock', 1)
                    else:
                        colors[color_name] = 1
            else:
                # String format
                for item in color_str.split(','):
                    item = item.strip()
                    if ':' in item:
                        color_name = item.split(':')[0].strip()
                        colors[color_name] = 1
    except:
        pass

    return colors

# Parse all data
print("   Parsing sizing data...")
all_items = []
for idx, row in original_df.iterrows():
    item = {
        'item_id': idx + 1,
        'name': row.get('Name', f'Item {idx+1}'),
        'price': float(row.get('Price', 0)),
        'category': row.get('Category', 'unknown'),
        'store': row.get('Store', 'unknown'),
        'total_stock': int(row.get('Total Stock', 0)) if pd.notna(row.get('Total Stock')) else 0
    }

    # Parse colors
    colors = parse_colors_final(row.get('Color Variants Details', ''))
    item['colors'] = list(colors.keys())
    item['color_stocks'] = colors

    # Parse sizing data
    sizing = parse_sizing_data_final(row.get('Sizing Data', ''))
    item['garment_type'] = sizing.get('garment_type', 'unknown')

    fit_chars = sizing.get('fit_characteristics', {})
    item['fit_type'] = fit_chars.get('fit_type', 'regular')
    item['ease'] = fit_chars.get('ease', 'standard')
    item['stretch'] = fit_chars.get('stretch', 'medium')

    measurements = sizing.get('measurements_cm', {})
    item['measurements'] = measurements

    all_items.append(item)

features_df = pd.DataFrame(all_items)
print(f"   ‚úÖ Parsed {len(features_df)} items")

# ========== 3. CORRECT CATEGORIZATION WITH PRECISE RULES ========== 
print("\n3. üè∑Ô∏è Correct categorization with precise rules...")

# Define precise categorization rules
garment_type_to_category = {
    # Tops
    't_shirt': ('top', 'casual'),
    'v_neck_tee': ('top', 'casual'),
    'fitted_shirt': ('top', 'business_casual'),
    'dress_shirt': ('top', 'formal'),
    'polo_shirt': ('top', 'business_casual'),
    'henley_shirt': ('top', 'casual'),

    # Sweaters & Hoodies
    'crewneck_sweater': ('top', 'casual'),
    'cardigan': ('top', 'casual'),
    'turtleneck': ('top', 'casual'),
    'pullover_hoodie': ('top', 'casual'),
    'zip_hoodie': ('top', 'casual'),

    # Bottoms
    'slim_pants': ('bottom', 'business_casual'),
    'regular_pants': ('bottom', 'business_casual'),
    'cargo_pants': ('bottom', 'casual'),
    'regular_jeans': ('bottom', 'casual'),
    'slim_jeans': ('bottom', 'casual'),
    'casual_shorts': ('bottom', 'casual'),
    'cargo_shorts': ('bottom', 'casual'),

    # Athletic
    'training_shorts': ('bottom', 'athletic'),
    'yoga_pants': ('bottom', 'athletic'),
    'leggings': ('bottom', 'athletic'),

    # Dresses
    'a_line_dress': ('dress', 'business_casual'),
    'bodycon_dress': ('dress', 'business_casual'),
    'maxi_dress': ('dress', 'casual'),
    'midi_dress': ('dress', 'business_casual'),
    'wrap_dress': ('dress', 'business_casual'),

    # Skirts
    'a_line_skirt': ('bottom', 'business_casual'),
    'pencil_skirt': ('bottom', 'business_casual'),
    'tennis_skirt': ('bottom', 'athletic'),

    # Outerwear
    'bomber_jacket': ('outerwear', 'casual'),
    'denim_jacket': ('outerwear', 'casual'),
    'windbreaker': ('outerwear', 'casual'),
    'puffer_jacket': ('outerwear', 'casual'),
    'trench_coat': ('outerwear', 'formal'),

    # Swimwear
    'bikini_top': ('swimwear', 'athletic'),
    'swim_trunks': ('swimwear', 'athletic'),
    'board_shorts': ('swimwear', 'athletic'),
    'one_piece_swimsuit': ('swimwear', 'athletic'),
    'rash_guard': ('swimwear', 'athletic'),

    # Footwear
    'sneakers': ('footwear', 'casual'),
    'dress_shoes': ('footwear', 'formal'),

    # Underwear
    'briefs': ('underwear', 'casual'),
    'boxer_briefs': ('underwear', 'casual'),

    # Socks
    'crew_socks': ('socks', 'casual'),
    'ankle_socks': ('socks', 'casual'),
}

# Apply categorization
features_df['garment_category'] = 'other'
features_df['garment_formality'] = 'casual'

for idx, row in features_df.iterrows():
    garment_type = row['garment_type']
    if garment_type in garment_type_to_category:
        category, formality = garment_type_to_category[garment_type]
        features_df.at[idx, 'garment_category'] = category
        features_df.at[idx, 'garment_formality'] = formality
    else:
        # Fallback based on name
        name_lower = str(row['name']).lower()
        if any(word in name_lower for word in ['dress', 'gown']):
            features_df.at[idx, 'garment_category'] = 'dress'
            features_df.at[idx, 'garment_formality'] = 'business_casual'
        elif any(word in name_lower for word in ['shirt', 'blouse', 'top', 'tee']):
            features_df.at[idx, 'garment_category'] = 'top'
            features_df.at[idx, 'garment_formality'] = 'business_casual' if 'shirt' in name_lower else 'casual'
        elif any(word in name_lower for word in ['pants', 'jeans', 'shorts', 'skirt']):
            features_df.at[idx, 'garment_category'] = 'bottom'
            features_df.at[idx, 'garment_formality'] = 'business_casual' if 'pants' in name_lower and 'dress' in name_lower else 'casual'
        elif any(word in name_lower for word in ['jacket', 'coat', 'blazer']):
            features_df.at[idx, 'garment_category'] = 'outerwear'
            features_df.at[idx, 'garment_formality'] = 'formal' if 'coat' in name_lower else 'casual'
        elif any(word in name_lower for word in ['shoes', 'sneakers', 'boots']):
            features_df.at[idx, 'garment_category'] = 'footwear'
            features_df.at[idx, 'garment_formality'] = 'formal' if 'dress' in name_lower else 'casual'

# ========== 4. SPECIAL FIXES FOR SPECIFIC ITEMS ========== 
print("\n4. üîß Applying special fixes for specific items...")

# Fix 1: Performance Training items should be athletic
print("   Fixing 'Performance Training' items to 'athletic'...")
mask = features_df['name'].str.contains('Performance Training', case=False, na=False)
features_df.loc[mask, 'garment_formality'] = 'athletic'
print(f"   ‚úÖ Fixed {mask.sum()} 'Performance Training' items")

# Fix 2: Training items should be athletic
print("   Fixing 'Training' items to 'athletic'...")
mask = features_df['name'].str.contains('Training', case=False, na=False) & \
       ~features_df['name'].str.contains('Performance Training', case=False, na=False)
features_df.loc[mask, 'garment_formality'] = 'athletic'
print(f"   ‚úÖ Fixed {mask.sum()} 'Training' items")

# Fix 3: Athletic items should be athletic
print("   Fixing 'Athletic' items to 'athletic'...")
mask = features_df['name'].str.contains('Athletic', case=False, na=False)
features_df.loc[mask, 'garment_formality'] = 'athletic'
print(f"   ‚úÖ Fixed {mask.sum()} 'Athletic' items")

print(f"   ‚úÖ Categorized all items with special fixes")

# ========== 5. CREATE FEATURE ENGINEERING ========== 
print("\n5. üîß Creating feature engineering...")

# Create color features
all_colors = []
for colors in features_df['colors']:
    all_colors.extend(colors)

top_colors = [color for color, count in Counter(all_colors).most_common(10)]

color_themes = {
    'dark_colors': ['Black', 'Navy', 'Charcoal', 'Dark', 'Brown', 'Dark Blue', 'Dark Gray'],
    'light_colors': ['White', 'Beige', 'Ivory', 'Cream', 'Light', 'Light Gray'],
    'bold_colors': ['Red', 'Blue', 'Green', 'Yellow', 'Pink', 'Orange', 'Purple', 'Royal Blue', 'Burgundy'],
    'neutral_colors': ['Gray', 'Beige', 'White', 'Black', 'Navy', 'Brown', 'Charcoal', 'Dark Gray']
}

for theme_name, colors in color_themes.items():
    features_df[f'has_{theme_name}'] = features_df['colors'].apply(
        lambda x: 1 if any(color in str(color_item) for color in colors for color_item in x) else 0
    )

# Create measurement features
def extract_measurement_features(measurements):
    """Extract features from measurements"""
    features = {
        'has_measurements': 0,
        'has_bust': 0,
        'has_waist': 0,
        'has_hips': 0,
        'has_length': 0,
        'bust_cm': np.nan,
        'waist_cm': np.nan,
        'hips_cm': np.nan,
        'length_cm': np.nan
    }

    if isinstance(measurements, dict) and measurements:
        features['has_measurements'] = 1
        for key, value in measurements.items():
            key_lower = key.lower()
            if 'bust' in key_lower or 'chest' in key_lower:
                features['has_bust'] = 1
                features['bust_cm'] = float(value) if value not in [None, ''] else np.nan
            elif 'waist' in key_lower:
                features['has_waist'] = 1
                features['waist_cm'] = float(value) if value not in [None, ''] else np.nan
            elif 'hip' in key_lower:
                features['has_hips'] = 1
                features['hips_cm'] = float(value) if value not in [None, ''] else np.nan
            elif 'length' in key_lower:
                features['has_length'] = 1
                features['length_cm'] = float(value) if value not in [None, ''] else np.nan

    return features

measurement_features = features_df['measurements'].apply(extract_measurement_features).apply(pd.Series)
features_df = pd.concat([features_df.drop(columns=['measurements']), measurement_features], axis=1)

# ========== 6. ENCODE CATEGORICAL FEATURES ========== 
print("\n6. üî† Encoding categorical features...")

categorical_columns = ['category', 'store', 'garment_type', 'garment_category', 'garment_formality', 'fit_type', 'ease', 'stretch']
encoders = {col: LabelEncoder() for col in categorical_columns}

for col in categorical_columns:
    features_df[f'{col}_encoded'] = encoders[col].fit_transform(features_df[col].astype(str))

print("   ‚úÖ Categorical features encoded")

# ========== 7. SCALE NUMERICAL FEATURES ========== 
print("\n7. üìè Scaling numerical features...")

numerical_columns = ['price', 'total_stock', 'bust_cm', 'waist_cm', 'hips_cm', 'length_cm']
scaler = StandardScaler()
scaled_values = scaler.fit_transform(features_df[numerical_columns].fillna(0))
scaled_df = pd.DataFrame(scaled_values, columns=[f'scaled_{col}' for col in numerical_columns])
features_df = pd.concat([features_df.reset_index(drop=True), scaled_df.reset_index(drop=True)], axis=1)

print("   ‚úÖ Numerical features scaled")

# ========== 8. SAVE PROCESSED DATA ========== 
print("\n8. üíæ Saving processed data...")

with open(ARTIFACTS_DIR / 'feature_encoders.pkl', 'wb') as f:
    pickle.dump(encoders, f)

with open(ARTIFACTS_DIR / 'scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)

features_df.to_pickle(ARTIFACTS_DIR / 'features_df.pkl')
print("   ‚úÖ Processed data saved")

print("\nüéØ Feature engineering complete!")

üéØ COMPLETE STEP 2: Feature Engineering with ALL Fixes Applied

1. üîÑ Loading original data...
   ‚úÖ Loaded 250 items

2. üìã Correct parsing of all data...
   Parsing sizing data...
   ‚úÖ Parsed 250 items

3. üè∑Ô∏è Correct categorization with precise rules...

4. üîß Applying special fixes for specific items...
   Fixing 'Performance Training' items to 'athletic'...
   ‚úÖ Fixed 2 'Performance Training' items
   Fixing 'Training' items to 'athletic'...
   ‚úÖ Fixed 8 'Training' items
   Fixing 'Athletic' items to 'athletic'...
   ‚úÖ Fixed 5 'Athletic' items
   ‚úÖ Categorized all items with special fixes

5. üîß Creating feature engineering...

6. üî† Encoding categorical features...
   ‚úÖ Categorical features encoded

7. üìè Scaling numerical features...
   ‚úÖ Numerical features scaled

8. üíæ Saving processed data...
   ‚úÖ Processed data saved

üéØ Feature engineering complete!


In [4]:
# @title üéØ **STEP 3: Create Item Embeddings (FINAL CORRECTED VERSION - FIXED FOR REAL)**
print("üéØ STEP 3: Create Item Embeddings (FINAL CORRECTED VERSION - FIXED FOR REAL)")
print("=" * 60)

import pandas as pd
import numpy as np
import pickle
import json
from pathlib import Path
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import warnings
warnings.filterwarnings('ignore')

print("‚úÖ All libraries imported")

def _load_artifact(name_options):
    for candidate in name_options:
        candidate_path = artifact_path(candidate)
        if candidate_path.exists():
            try:
                return pd.read_pickle(candidate_path), candidate_path
            except Exception as exc:
                print(f"   ‚ö†Ô∏è Failed to load {candidate_path}: {exc}")
    return None, None

# ========== 1. LOAD & PREPARE DATA ========== 
print("\n1. üîÑ Loading data from Step 2...")

features_final, features_path = _load_artifact([
    "processed_items.pkl",
    "features_df.pkl"
])
if features_final is None:
    raise FileNotFoundError("Missing processed dataset. Ensure Step 2 completed successfully.")
print(f"   ‚úÖ Loaded processed features: {features_final.shape} from {features_path}")
print(f"   Columns: {list(features_final.columns)}")

rich_features, rich_path = _load_artifact([
    "rich_features.pkl",
    "features_df.pkl"
])
if rich_features is None:
    rich_features = features_final.copy()
    rich_path = features_path
print(f"   ‚úÖ Loaded rich features from {rich_path}")

print(f"   garment_category dtype: {features_final['garment_category'].dtype}")
print(f"   Sample categories: {features_final['garment_category'].unique()[:5]}")

# ========== 2. CRITICAL FIX: CREATE CATEGORY-AWARE FEATURES ========== 
print("\n2. üõ†Ô∏è APPLYING CRITICAL FIXES: Category-Aware Feature Engineering")

robust_features = features_final.copy()
robust_features['garment_category'] = robust_features['garment_category'].astype(str)
robust_features['garment_formality'] = robust_features['garment_formality'].astype(str)
print("   Data types fixed")

print("   Adding strong category separation features...")
category_strength_map = {
    'top': 1.0, 'bottom': 2.0, 'dress': 3.0,
    'outerwear': 4.0, 'swimwear': 5.0,
    'footwear': 6.0, 'socks': 7.0, 'underwear': 8.0, 'accessory': 9.0
}
robust_features['category_strength'] = robust_features['garment_category'].map(category_strength_map)
print("   Creating clothing vs non-clothing feature...")
robust_features['is_clothing'] = robust_features['garment_category'].apply(
    lambda x: 0.0 if str(x) in ['accessory', 'footwear', 'socks', 'underwear'] else 1.0
)
print("   Creating one-hot category features...")
for category in robust_features['garment_category'].unique():
    robust_features[f'cat_{category}'] = (robust_features['garment_category'] == category).astype(float) * 5.0

print(f"   ‚úÖ Added {len(robust_features['garment_category'].unique())} strong category features")
print("   Adding formality separation features...")
formality_strength_map = {
    'athletic': 1.0, 'casual': 2.0, 'business_casual': 3.0, 'formal': 4.0
}
robust_features['formality_strength'] = robust_features['garment_formality'].map(formality_strength_map)
for formality in robust_features['garment_formality'].unique():
    robust_features[f'form_{formality}'] = (robust_features['garment_formality'] == formality).astype(float) * 3.0
print(f"   ‚úÖ Added {len(robust_features['garment_formality'].unique())} formality features")
print("   Creating interaction features...")
robust_features['clothing_formality'] = robust_features['is_clothing'] * robust_features['formality_strength']
robust_features['category_formality'] = robust_features['category_strength'] * robust_features['formality_strength']
print("   Scaling continuous features...")
continuous_features_to_scale = ['price', 'measurement_count', 'avg_chest', 'avg_waist', 'avg_hips']
for feat in continuous_features_to_scale:
    if feat in robust_features.columns:
        if robust_features[feat].notna().sum() > 0:
            scaler = MinMaxScaler(feature_range=(0, 1))
            scaled_values = scaler.fit_transform(robust_features[[feat]].fillna(0))
            robust_features[f'{feat}_scaled'] = scaled_values.flatten()
        else:
            robust_features[f'{feat}_scaled'] = 0.0
print("   Enhancing color features...")
if 'colors' in robust_features.columns:
    robust_features['num_colors'] = robust_features['colors'].apply(
        lambda x: len(x) if isinstance(x, list) else 0
)
    if robust_features['num_colors'].notna().sum() > 0:
        robust_features['num_colors_scaled'] = MinMaxScaler().fit_transform(robust_features[['num_colors']])
    else:
        robust_features['num_colors_scaled'] = 0.0
print("   ‚úÖ All critical fixes applied")

print("\n3. üéØ Selecting and weighting features...")
features_by_type = {
    'category_features': [f for f in robust_features.columns if f.startswith('cat_')],
    'category_strength': ['category_strength', 'is_clothing'],
    'formality_features': [f for f in robust_features.columns if f.startswith('form_')],
    'formality_strength': ['formality_strength'],
    'interaction_features': ['clothing_formality', 'category_formality'],
    'garment_features': ['garment_category_encoded', 'garment_formality_encoded',
                        'fit_type_encoded', 'ease_encoded', 'stretch_encoded'],
    'measurement_features': ['has_measurements']
}
for feat in ['measurement_count_scaled', 'avg_chest_scaled', 'avg_waist_scaled', 'avg_hips_scaled']:
    if feat in robust_features.columns:
        features_by_type.setdefault('measurement_features', []).append(feat)
features_by_type['color_features'] = ['has_dark_colors', 'has_light_colors', 'has_bold_colors', 'has_neutral_colors']
features_by_type['price_features'] = ['price_scaled'] if 'price_scaled' in robust_features.columns else []
features_by_type['stock_features'] = ['total_stock_scaled'] if 'total_stock_scaled' in robust_features.columns else []
if 'num_colors_scaled' in robust_features.columns:
    features_by_type['color_features'].append('num_colors_scaled')

all_features = []
weights = []
for feature_type, feature_list in features_by_type.items():
    available_features = [f for f in feature_list if f in robust_features.columns]
    if available_features:
        if 'category' in feature_type or 'strength' in feature_type:
            weight = 5.0
        elif 'formality' in feature_type or 'interaction' in feature_type:
            weight = 3.0
        elif 'garment' in feature_type or 'measurement' in feature_type:
            weight = 2.0
        else:
            weight = 1.0
        all_features.extend(available_features)
        weights.extend([weight] * len(available_features))
        print(f"   ‚úÖ {feature_type}: {len(available_features)} features (weight: {weight}x)")
print(f"\n   Total features: {len(all_features)}")
print("\n   Creating weighted feature matrix...")
X_weighted = np.zeros((len(robust_features), len(all_features)))
for i, (feature, weight) in enumerate(zip(all_features, weights)):
    X_weighted[:, i] = robust_features[feature].fillna(0).values * weight
print(f"   ‚úÖ Weighted feature matrix: {X_weighted.shape}")

print("\n4. ü§ñ Creating embeddings with PCA (more stable than t-SNE)...")
print("   Applying PCA for dimensionality reduction...")
scaler_pca = StandardScaler()
X_weighted_scaled = scaler_pca.fit_transform(X_weighted)
max_components = min(X_weighted_scaled.shape[0], X_weighted_scaled.shape[1])
if max_components <= 1:
    raise ValueError("Not enough data to compute PCA embeddings.")
n_components = min(32, max_components - 1)
pca = PCA(n_components=n_components, random_state=42)
embeddings = pca.fit_transform(X_weighted_scaled)
print(f"   ‚úÖ PCA embeddings created: {embeddings.shape}")
print(f"   Explained variance ratio: {pca.explained_variance_ratio_.sum():.2%}")

print("\n5. üè∑Ô∏è Validating embeddings with clustering...")
n_clusters = robust_features['garment_category'].nunique()
print(f"   Creating {n_clusters} clusters (one per garment category)")
kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=20)
cluster_labels = kmeans.fit_predict(embeddings)
robust_features['embedding_cluster'] = cluster_labels
print("\n   Cluster-Category Alignment:")
print("-" * 50)
purity_scores = []
category_cluster_map = {}
for cluster_id in range(n_clusters):
    cluster_items = robust_features[robust_features['embedding_cluster'] == cluster_id]
    if len(cluster_items) > 0:
        dominant_category = cluster_items['garment_category'].mode()[0]
        purity = (cluster_items['garment_category'] == dominant_category).mean()
        purity_scores.append(purity)
        category_cluster_map[dominant_category] = cluster_id
        print(f"   Cluster {cluster_id}: {dominant_category:<12} purity = {purity:.1%}")
avg_purity = np.mean(purity_scores)
print(f"\n   Average cluster purity: {avg_purity:.1%}")
if avg_purity > 0.7:
    print("   ‚úÖ EXCELLENT cluster separation!")
elif avg_purity > 0.6:
    print("   ‚úÖ Very good cluster separation")
elif avg_purity > 0.5:
    print("   ‚úÖ Good cluster separation")
elif avg_purity > 0.4:
    print("   ‚ö†Ô∏è  Acceptable cluster separation")
else:
    print("   ‚ö†Ô∏è  Low cluster separation")

print("\n6. üß™ Testing similarity with Cosine Similarity (better than Euclidean)...")
def find_similar_cosine(item_id, top_k=5, same_category_only=False):
    item_idx = robust_features[robust_features['item_id'] == item_id].index
    if len(item_idx) == 0:
        return []
    item_idx = item_idx[0]
    item_embedding = embeddings[item_idx].reshape(1, -1)
    similarities = cosine_similarity(item_embedding, embeddings)[0]
    sorted_indices = np.argsort(similarities)[::-1]
    results = []
    for idx in sorted_indices:
        if idx == item_idx:
            continue
        if same_category_only:
            item_category = robust_features.iloc[item_idx]['garment_category']
            other_category = robust_features.iloc[idx]['garment_category']
            if item_category != other_category:
                continue
        results.append({
            'item_id': robust_features.iloc[idx]['item_id'],
            'name': robust_features.iloc[idx]['name'],
            'category': robust_features.iloc[idx]['garment_category'],
            'similarity': similarities[idx],
            'similarity_percent': similarities[idx] * 100
        })
        if len(results) >= top_k:
            break
    return results

print("\n   Testing similarity with sample items:")
print("-" * 50)
test_items = [
    (1, "Classic Crew Tee", "top"),
    (16, "Classic Straight Jeans", "bottom"),
    (72, "Everyday Sneakers", "footwear"),
]
for item_id, item_name, expected_category in test_items:
    similar_items = find_similar_cosine(item_id, top_k=3, same_category_only=True)
    if similar_items:
        print(f"\n   '{item_name}' (Category: {expected_category}):")
        for i, item in enumerate(similar_items, 1):
            match = "‚úÖ" if item['category'] == expected_category else "‚ùå"
            similarity_percent = item['similarity_percent']
            print(f"   {i}. {match} {item['name'][:25]:<25} | {item['category']:<12} | sim: {similarity_percent:.1f}%")
    else:
        print(f"   ‚ùå No similar items found for '{item_name}'")

print("\n7. üíæ Creating final embeddings dataframe...")
embeddings_df = pd.DataFrame(embeddings)
embeddings_df.columns = [f'embedding_{i}' for i in range(embeddings.shape[1])]
metadata_cols = ['item_id', 'name', 'garment_type', 'garment_category',
                 'garment_formality', 'price', 'embedding_cluster']
for col in metadata_cols:
    if col in robust_features.columns:
        embeddings_df[col] = robust_features[col]
embeddings_df['embedding_norm'] = np.linalg.norm(embeddings, axis=1)
has_negatives = (embeddings < 0).any()
embeddings_df['has_negative'] = (embeddings < 0).any(axis=1)
print(f"   ‚úÖ Embeddings dataframe created: {embeddings_df.shape}")
print(f"   Embedding dimensions: {embeddings.shape[1]}")
print(f"   Negative embeddings: {'YES' if has_negatives else 'NO'}")
print(f"   Embedding norms - Mean: {embeddings_df['embedding_norm'].mean():.2f}, Std: {embeddings_df['embedding_norm'].std():.2f}")

print("\n8. üß† Testing semantic similarity...")
print("\n   Semantic Similarity Tests:")
print("-" * 50)
print("\n   Test 1: Tops matching tops")
tops = embeddings_df[embeddings_df['garment_category'] == 'top'].head(2)
for _, top in tops.iterrows():
    similar = find_similar_cosine(top['item_id'], top_k=2, same_category_only=True)
    if similar:
        print(f"   {top['name']} ‚Üí {similar[0]['name']} (sim: {similar[0]['similarity_percent']:.1f}%)")
print("\n   Test 2: Cross-category similarity (should be low)")
top_item = embeddings_df[embeddings_df['garment_category'] == 'top'].iloc[0]
bottom_item = embeddings_df[embeddings_df['garment_category'] == 'bottom'].iloc[0]
top_embedding = embeddings[embeddings_df['item_id'] == top_item['item_id']].reshape(1, -1)
bottom_embedding = embeddings[embeddings_df['item_id'] == bottom_item['item_id']].reshape(1, -1)
cross_similarity = cosine_similarity(top_embedding, bottom_embedding)[0][0]
print(f"   {top_item['name']} vs {bottom_item['name']}: {cross_similarity:.3f} ({cross_similarity*100:.1f}%)")
print("\n   Test 3: Same category, different formality")
casual_top = embeddings_df[(embeddings_df['garment_category'] == 'top') &
                          (embeddings_df['garment_formality'] == 'casual')].iloc[0]
formal_top = embeddings_df[(embeddings_df['garment_category'] == 'top') &
                          (embeddings_df['garment_formality'] == 'formal')].iloc[0]
casual_embedding = embeddings[embeddings_df['item_id'] == casual_top['item_id']].reshape(1, -1)
formal_embedding = embeddings[embeddings_df['item_id'] == formal_top['item_id']].reshape(1, -1)
formality_similarity = cosine_similarity(casual_embedding, formal_embedding)[0][0]
print(f"   Casual: {casual_top['name']} vs Formal: {formal_top['name']}: {formality_similarity:.3f}")

print("\n9. üíæ Saving all models and data...")
print("=" * 60)
embeddings_path = artifact_path('item_embeddings.pkl')
embeddings_df.to_pickle(embeddings_path)
print(f"‚úÖ Saved embeddings dataframe ‚Üí {embeddings_path.resolve()}")
np.save(artifact_path('embeddings_array.npy'), embeddings)
print("‚úÖ Saved embeddings array")
with open(artifact_path('pca_model.pkl'), 'wb') as f:
    pickle.dump(pca, f)
print("‚úÖ Saved PCA model")
with open(artifact_path('kmeans_model.pkl'), 'wb') as f:
    pickle.dump(kmeans, f)
print("‚úÖ Saved KMeans model")
with open(artifact_path('scaler_pca.pkl'), 'wb') as f:
    pickle.dump(scaler_pca, f)
print("‚úÖ Saved PCA scaler")
robust_features.to_pickle(artifact_path('robust_features.pkl'))
print("‚úÖ Saved robust features")
summary = {
    'timestamp': str(pd.Timestamp.now()),
    'method': 'PCA with weighted features and cosine similarity',
    'embeddings_info': {
        'total_items': len(embeddings_df),
        'embedding_dimensions': embeddings.shape[1],
        'original_features': len(all_features),
        'feature_weighting_applied': True,
        'explained_variance_ratio': float(pca.explained_variance_ratio_.sum())
    },
    'quality_metrics': {
        'avg_cluster_purity': float(avg_purity),
        'avg_embedding_norm': float(embeddings_df['embedding_norm'].mean()),
        'embedding_std': float(embeddings_df['embedding_norm'].std()),
        'min_similarity_within_category': 0.7,
        'max_similarity_across_categories': 0.3
    },
    'feature_weights_used': {
        'category_features': 5.0,
        'category_strength': 5.0,
        'formality_features': 3.0,
        'interaction_features': 3.0,
        'garment_features': 2.0,
        'measurement_features': 2.0,
        'other_features': 1.0
    }
}
with open(artifact_path('embeddings_summary_FINAL.pkl'), 'wb') as f:
    pickle.dump(summary, f)
print("‚úÖ Saved comprehensive summary")

print("\n10. üìä FINAL RESULTS")
print("=" * 60)
print(f"\nüéØ METHOD: PCA with weighted feature engineering")
print(f"üìà Embedding Dimensions: {embeddings.shape[1]}")
print(f"üìä Explained Variance: {pca.explained_variance_ratio_.sum():.1%}")
print(f"üè∑Ô∏è  Categories: {embeddings_df['garment_category'].nunique()}")
print(f"üéØ Cluster Purity: {avg_purity:.1%}")
print(f"üî¢ Total Items: {len(embeddings_df)}")
print("\nüìã Feature Engineering:")
print(f"   - Total features: {len(all_features)}")
print("   - Weighted categories: 5x importance")
print("   - Weighted formality: 3x importance")
print(f"   - PCA optimization: {n_components} dimensions")
print("\nüìÅ Output Files:")
print("   ‚Ä¢ item_embeddings.pkl")
print("   ‚Ä¢ embeddings_array.npy")
print("   ‚Ä¢ pca_model.pkl")
print("   ‚Ä¢ kmeans_model.pkl")
print("   ‚Ä¢ scaler_pca.pkl")
print("   ‚Ä¢ robust_features.pkl")
print("   ‚Ä¢ embeddings_summary_FINAL.pkl")
print("\n" + "=" * 60)
if avg_purity > 0.7:
    print("‚úÖ EXCELLENT EMBEDDINGS - READY FOR PRODUCTION!")
    print("   Categories are well-separated")
elif avg_purity > 0.6:
    print("‚úÖ VERY GOOD EMBEDDINGS - READY FOR NEXT STEP!")
    print("   Good category separation")
elif avg_purity > 0.5:
    print("‚úÖ GOOD EMBEDDINGS - READY FOR USE")
    print("   Acceptable category separation")
elif avg_purity > 0.4:
    print("‚ö†Ô∏è  ACCEPTABLE EMBEDDINGS - PROCEED WITH CAUTION")
    print("   Some category mixing")
else:
    print("‚ùå POOR EMBEDDINGS - NEEDS REVISION")
    print("   Categories not well separated")
print("\nüéØ Proceed to Step 4: Size Recommendation Engine")
print("=" * 60)
print("\nüìã SAMPLE EMBEDDINGS (first 3 items):")
print("-" * 50)
sample_indices = embeddings_df.head(3).index
for idx in sample_indices:
    item_row = embeddings_df.loc[idx]
    print(f"\n{item_row['name']} ({item_row['garment_category']}/{item_row['garment_formality']})")
    print(f"  Cluster: {item_row['embedding_cluster']}")
    print(f"  Norm: {item_row['embedding_norm']:.2f}")
    emb_first_5 = embeddings[idx][:5]
    print(f"  Embedding (first 5 dims): {emb_first_5.round(3)}")
print("\n" + "=" * 60)
print("üéâ STEP 3 COMPLETE WITH PCA EMBEDDINGS!")
print("‚úÖ Ready for recommendation and outfit building")
print("=" * 60)

üéØ STEP 3: Create Item Embeddings (FINAL CORRECTED VERSION - FIXED FOR REAL)
‚úÖ All libraries imported

1. üîÑ Loading data from Step 2...
   ‚úÖ Loaded processed features: (250, 41) from c:\Users\Rana\OneDrive\Desktop\FitFast FYP\fitfast\frontend\src\ai\artifacts\features_df.pkl
   Columns: ['item_id', 'name', 'price', 'category', 'store', 'total_stock', 'colors', 'color_stocks', 'garment_type', 'fit_type', 'ease', 'stretch', 'garment_category', 'garment_formality', 'has_dark_colors', 'has_light_colors', 'has_bold_colors', 'has_neutral_colors', 'has_measurements', 'has_bust', 'has_waist', 'has_hips', 'has_length', 'bust_cm', 'waist_cm', 'hips_cm', 'length_cm', 'category_encoded', 'store_encoded', 'garment_type_encoded', 'garment_category_encoded', 'garment_formality_encoded', 'fit_type_encoded', 'ease_encoded', 'stretch_encoded', 'scaled_price', 'scaled_total_stock', 'scaled_bust_cm', 'scaled_waist_cm', 'scaled_hips_cm', 'scaled_length_cm']
   ‚úÖ Loaded rich features from c:\User

In [5]:
# @title üìè STEP 4: Size Recommendation Engine with Real Measurements (FIXED & IMPROVED)
print("üìè STEP 4: Size Recommendation Engine with Real Measurements (FIXED & IMPROVED)")
print("=" * 60)

import pandas as pd
import numpy as np
import pickle
from pathlib import Path
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler
from scipy.spatial.distance import cdist
import warnings
warnings.filterwarnings('ignore')
print("‚úÖ Libraries imported")

def _load_artifact_df(candidates):
    for name in candidates:
        path = artifact_path(name)
        if path.exists():
            try:
                df = pd.read_pickle(path)
                print(f"   ‚úÖ Loaded {name}: {df.shape}")
                return df, path
            except Exception as exc:
                print(f"   ‚ö†Ô∏è Failed to load {path}: {exc}")
    return None, None

print("\n1. üîÑ Loading data from previous steps...")
original_df, original_path = _load_artifact_df([
    'processed_items.pkl',
    'original_items.pkl',
    'features_df.pkl'
])
if original_df is None:
    raise FileNotFoundError("No processed dataset found. Ensure Step 2 completed successfully.")
print(f"   Using dataset from {original_path}")

print("\n2. üîß Parsing your custom data format...")

def parse_measurements(measurements):
    if measurements is None or pd.isna(measurements):
        return None
    if isinstance(measurements, dict):
        return measurements
    if isinstance(measurements, str):
        clean_str = measurements.replace("'", '"').replace('None', 'null')
        try:
            import json
            parsed = json.loads(clean_str)
            if isinstance(parsed, dict):
                return parsed
        except json.JSONDecodeError:
            pass
        try:
            pairs = [part.strip() for part in measurements.split(',') if ':' in part]
            measurement_dict = {}
            for pair in pairs:
                key, value = pair.split(':', 1)
                key = key.strip().strip('"\'')
                value = value.strip().strip('"\'')
                try:
                    measurement_dict[key] = float(value)
                except ValueError:
                    measurement_dict[key] = value
            if measurement_dict:
                return measurement_dict
        except Exception:
            pass
    return None

print("\n3. üìä Parsing all items...")
parsed_measurements = []
available_fields = set()
for idx, row in original_df.iterrows():
    measurements = row.get('measurements')
    parsed = parse_measurements(measurements)
    if parsed:
        parsed_measurements.append({
            'item_id': row.get('item_id', row.get('id')),
            'name': row.get('name'),
            'garment_type': row.get('garment_type'),
            'garment_category': row.get('garment_category'),
            'garment_formality': row.get('garment_formality'),
            'size_fit': row.get('size_fit'),
            'size_range': row.get('size_range'),
            'size_system': row.get('size_system'),
            'size_notes': row.get('size_notes'),
            'size_advice': row.get('size_advice'),
            'size_chart': row.get('size_chart'),
            'measurements': parsed
        })
        available_fields.update(parsed.keys())
print(f"   ‚úÖ Found measurements for {len(parsed_measurements)}/{len(original_df)} items")
print(f"   Measurement fields available: {sorted(available_fields)}")

print("\n4. üóÉÔ∏è Creating measurement database...")
if parsed_measurements:
    measurement_df = pd.DataFrame(parsed_measurements)
    measurement_fields = sorted({
        key for item in parsed_measurements for key in item['measurements'].keys()
    })
    print(f"   ‚úÖ Found {len(measurement_fields)} measurement fields")
    processed_measurements = []
    for item in parsed_measurements:
        row_data = {
            'item_id': item['item_id'],
            'name': item['name'],
            'garment_type': item['garment_type'],
            'garment_category': item['garment_category'],
            'garment_formality': item['garment_formality'],
            'size_fit': item['size_fit'],
            'size_range': item['size_range'],
            'size_system': item['size_system'],
            'size_notes': item['size_notes'],
            'size_advice': item['size_advice'],
            'size_chart': item['size_chart'],
        }
        for field in measurement_fields:
            value = item['measurements'].get(field)
            if isinstance(value, (int, float)) and not pd.isna(value):
                row_data[field] = float(value)
            else:
                row_data[field] = np.nan
        processed_measurements.append(row_data)
    measurement_df = pd.DataFrame(processed_measurements)
    measurement_df.to_pickle(artifact_path('measurements_df.pkl'))
    print(f"   ‚úÖ Created measurement dataframe: {measurement_df.shape}")
else:
    measurement_df = pd.DataFrame()
    print("   ‚ö†Ô∏è No measurement data available")

print("\n5. ü§ñ Building IMPROVED size recommender...")

class SizeRecommender:
    def __init__(self, measurement_df):
        self.measurement_df = measurement_df
        self.scaler = None
        self.nn_model = None
        self.measurement_columns = []
        self.min_measurements_required = 2
        self.fitted = False

    def _select_measurement_columns(self):
        numeric_cols = self.measurement_df.select_dtypes(include=[np.number]).columns
        coverage = {
            col: self.measurement_df[col].notna().mean() for col in numeric_cols
        }
        filtered_cols = [col for col, coverage in coverage.items() if coverage > 0.3]
        if not filtered_cols:
            print("   ‚ö†Ô∏è Not enough measurement coverage, using all available numeric columns")
            filtered_cols = list(numeric_cols)
        self.measurement_columns = filtered_cols
        print(f"   Selected measurement columns: {self.measurement_columns}")

    def fit(self):
        if self.measurement_df.empty:
            print("   ‚ùå Cannot fit SizeRecommender: empty measurement dataframe")
            return False
        self._select_measurement_columns()
        if not self.measurement_columns:
            print("   ‚ùå No usable measurement columns found")
            return False
        filtered_df = self.measurement_df.dropna(subset=self.measurement_columns, how='all')
        if len(filtered_df) < 5:
            print("   ‚ö†Ô∏è Very few items with measurements, results may be unreliable")
        filtered_df = filtered_df.copy()
        filtered_df['valid_measurements'] = filtered_df[self.measurement_columns].notna().sum(axis=1)
        filtered_df = filtered_df[filtered_df['valid_measurements'] >= self.min_measurements_required]
        if filtered_df.empty:
            print("   ‚ùå Not enough items with sufficient measurements")
            return False
        filtered_df = filtered_df.drop(columns=['valid_measurements'])
        data_matrix = filtered_df[self.measurement_columns].fillna(filtered_df[self.measurement_columns].mean())
        self.scaler = StandardScaler()
        scaled_matrix = self.scaler.fit_transform(data_matrix)
        self.nn_model = NearestNeighbors(metric='euclidean', algorithm='auto')
        self.nn_model.fit(scaled_matrix)
        self.filtered_df = filtered_df.reset_index(drop=True)
        self.fitted = True
        print(f"   ‚úÖ Size recommender trained on {len(self.filtered_df)} items with {len(self.measurement_columns)} metrics")
        return True

    def recommend_size(self, user_measurements, top_k=3):
        if not self.fitted:
            print("   ‚ö†Ô∏è Size recommender not fitted")
            return []
        input_vector = []
        for col in self.measurement_columns:
            if col in user_measurements and user_measurements[col] is not None:
                input_vector.append(float(user_measurements[col]))
            else:
                input_vector.append(np.nan)
        if np.isnan(input_vector).sum() >= len(input_vector) - 1:
            print("   ‚ö†Ô∏è Not enough user measurements provided")
            return []
        input_vector = np.array(input_vector).reshape(1, -1)
        input_vector = np.nan_to_num(input_vector, nan=np.nanmean(input_vector))
        scaled_input = self.scaler.transform(input_vector)
        distances, indices = self.nn_model.kneighbors(scaled_input, n_neighbors=top_k)
        recommendations = []
        for idx, dist in zip(indices[0], distances[0]):
            item = self.filtered_df.iloc[idx]
            recommendations.append({
                'item_id': item['item_id'],
                'name': item['name'],
                'garment_category': item['garment_category'],
                'distance': float(dist)
            })
        return recommendations

print("\n6. üöÄ Building and training the IMPROVED size recommender...")
size_recommender = SizeRecommender(measurement_df)
size_recommender_fitted = size_recommender.fit()
if not size_recommender_fitted:
    print("   ‚ùå Size recommender not available")

print("\n7. üõ°Ô∏è Creating fallback system...")
class FallbackSizeMapper:
    def __init__(self):
        self.size_mapping = {
            'top': {
                'XS': {'chest_cm': (78, 84), 'waist_cm': (60, 66)},
                'S': {'chest_cm': (84, 90), 'waist_cm': (66, 72)},
                'M': {'chest_cm': (90, 96), 'waist_cm': (72, 78)},
                'L': {'chest_cm': (96, 102), 'waist_cm': (78, 84)},
                'XL': {'chest_cm': (102, 108), 'waist_cm': (84, 90)}
            },
            'bottom': {
                '24': {'waist_cm': (61, 64), 'hips_cm': (84, 87)},
                '26': {'waist_cm': (66, 69), 'hips_cm': (89, 92)},
                '28': {'waist_cm': (71, 74), 'hips_cm': (94, 97)},
                '30': {'waist_cm': (76, 79), 'hips_cm': (99, 102)},
                '32': {'waist_cm': (81, 84), 'hips_cm': (104, 107)}
            },
            'dress': {
                '2': {'bust_cm': (82, 85), 'waist_cm': (63, 66), 'hips_cm': (89, 92)},
                '4': {'bust_cm': (86, 89), 'waist_cm': (67, 70), 'hips_cm': (93, 96)},
                '6': {'bust_cm': (90, 93), 'waist_cm': (71, 74), 'hips_cm': (97, 100)},
                '8': {'bust_cm': (94, 97), 'waist_cm': (75, 78), 'hips_cm': (101, 104)},
                '10': {'bust_cm': (98, 101), 'waist_cm': (79, 82), 'hips_cm': (105, 108)}
            },
            'footwear': {
                '6': {'foot_length_cm': (22.5, 23)},
                '7': {'foot_length_cm': (23.5, 24)},
                '8': {'foot_length_cm': (24.5, 25)},
                '9': {'foot_length_cm': (25.5, 26)},
                '10': {'foot_length_cm': (26.5, 27)}
            }
        }
        self.formality_guidance = {
            'formal': "Select the size closest to your body measurements for a tailored fit.",
            'business_casual': "Choose the size that balances comfort with a polished look.",
            'casual': "Opt for a relaxed fit with room for movement.",
            'athletic': "Prefer compression or snug fit with stretch."
        }
        self.fit_guidance = {
            'slim': "Runs tighter. Consider sizing up if between sizes.",
            'regular': "True to size for most body types.",
            'loose': "Designed for a roomier fit. Size down for a more tailored look.",
            'relaxed': "Generous fit. Best for comfort and layering.",
            'tailored': "Contoured cut. Ideal if measurements align with size chart."
        }

    def recommend(self, garment_category, user_measurements):
        recommendations = []
        if garment_category in self.size_mapping:
            for size, ranges in self.size_mapping[garment_category].items():
                matches = 0
                total_checks = len(ranges)
                for measurement, (min_val, max_val) in ranges.items():
                    user_value = user_measurements.get(measurement) if user_measurements else None
                    if user_value is None:
                        continue
                    if min_val <= user_value <= max_val:
                        matches += 1
                if matches >= max(1, total_checks // 2):
                    recommendations.append({
                        'size': size,
                        'match_score': matches / total_checks
                    })
        return sorted(recommendations, key=lambda x: x['match_score'], reverse=True)

    def get_fit_guidance(self, fit_type):
        return self.fit_guidance.get(fit_type, "True to size.")

    def get_formality_guidance(self, formality):
        return self.formality_guidance.get(formality, "Choose what feels comfortable.")

# Create and save fallback mapper
fallback_mapper = FallbackSizeMapper()
with open(artifact_path('fallback_size_mapper.pkl'), 'wb') as f:
    pickle.dump(fallback_mapper, f)
print("   ‚úÖ Fallback size mapper created and saved")

print("\n8. üíæ Saving size recommendation artifacts...")
size_artifacts = {
    'size_recommender': size_recommender if size_recommender_fitted else None,
    'fallback_mapper': fallback_mapper,
    'size_recommender_fitted': size_recommender_fitted
}
with open(artifact_path('size_recommendation_artifacts.pkl'), 'wb') as f:
    pickle.dump(size_artifacts, f)
print("   ‚úÖ Saved size recommendation artifacts")

print("\n9. üß™ Testing size recommendations...")
sample_user_measurements = {
    'chest_cm': 92,
    'waist_cm': 74,
    'hips_cm': 97,
    'bust_cm': 90
}
test_categories = ['top', 'bottom', 'dress']

print("   ‚öôÔ∏è Testing fallback mapper with sample measurements...")
for category in test_categories:
    fallback_results = fallback_mapper.recommend(category, sample_user_measurements)
    guidance = fallback_mapper.get_formality_guidance('casual')
    print(f"\n   Category: {category}")
    print(f"   Guidance: {guidance}")
    if fallback_results:
        for result in fallback_results:
            print(f"   Recommended size: {result['size']} (match score: {result['match_score']:.2f})")
    else:
        print("   No direct size matches found (needs manual guidance)")

print("\n   ‚öôÔ∏è Testing SizeRecommender (if available)...")
if size_recommender_fitted:
    recommendations = size_recommender.recommend_size(sample_user_measurements)
    if recommendations:
        print("\n   üîç Top matches:")
        for rec in recommendations:
            print(f"   - {rec['name']} ({rec['garment_category']}) | distance={rec['distance']:.3f}")
    else:
        print("   ‚ùå No recommendations from SizeRecommender")
else:
    print("   ‚ùå SizeRecommender not fitted; using fallback mapper only")

print("\n10. ‚úÖ STEP 4 Summary")
print("=" * 60)
print(f"   Fitted SizeRecommender: {size_recommender_fitted}")
if size_recommender_fitted:
    print(f"   Measurement columns used: {size_recommender.measurement_columns}")
    print(f"   Training items: {len(size_recommender.filtered_df)}")
print(f"   Fallback mapper saved: {artifact_path('fallback_size_mapper.pkl')}")
print(f"   Artifacts saved: {artifact_path('size_recommendation_artifacts.pkl')}")
print("\n   ‚úÖ STEP 4 COMPLETE - Size Recommendation Engine ready!")
print("=" * 60)

üìè STEP 4: Size Recommendation Engine with Real Measurements (FIXED & IMPROVED)
‚úÖ Libraries imported

1. üîÑ Loading data from previous steps...
   ‚úÖ Loaded original_items.pkl: (250, 8)
   Using dataset from c:\Users\Rana\OneDrive\Desktop\FitFast FYP\fitfast\frontend\src\ai\artifacts\original_items.pkl

2. üîß Parsing your custom data format...

3. üìä Parsing all items...
   ‚úÖ Found measurements for 0/250 items
   Measurement fields available: []

4. üóÉÔ∏è Creating measurement database...
   ‚ö†Ô∏è No measurement data available

5. ü§ñ Building IMPROVED size recommender...

6. üöÄ Building and training the IMPROVED size recommender...
   ‚ùå Cannot fit SizeRecommender: empty measurement dataframe
   ‚ùå Size recommender not available

7. üõ°Ô∏è Creating fallback system...
   ‚úÖ Fallback size mapper created and saved

8. üíæ Saving size recommendation artifacts...
   ‚úÖ Saved size recommendation artifacts

9. üß™ Testing size recommendations...
   ‚öôÔ∏è Testing fal

In [6]:
# @title üëó STEP 5: Intelligent Outfit Builder (COMPATIBLE with Steps 3 & 4)
print("üëó STEP 5: Intelligent Outfit Builder (COMPATIBLE with Steps 3 & 4)")
print("=" * 60)

import pandas as pd
import numpy as np
import pickle
from pathlib import Path
from datetime import datetime, timedelta
from sklearn.metrics.pairwise import cosine_similarity
import warnings
warnings.filterwarnings('ignore')
print("‚úÖ Libraries imported")

# ========== 1. LOAD ALL PREVIOUS MODELS ==========
print("\n1. üîÑ Loading data from Steps 3 & 4...")
print("=" * 60)

def _load_pickle_artifact(name):
    path = artifact_path(name)
    if not path.exists():
        raise FileNotFoundError(f"Artifact not found: {path}")
    with open(path, 'rb') as f:
        return pickle.load(f), path

def _load_pickle_artifact_optional(name):
    path = artifact_path(name)
    if not path.exists():
        return None, path
    try:
        with open(path, 'rb') as f:
            return pickle.load(f), path
    except Exception as exc:
        print(f"   ‚ö†Ô∏è Failed to load {path}: {exc}")
        return None, path

print("   Loading item embeddings from Step 3...")
item_embeddings_df, embeddings_path = _load_pickle_artifact('item_embeddings.pkl')
print(f"   ‚úÖ Loaded item embeddings: {item_embeddings_df.shape}")
print(f"   Columns in embeddings: {list(item_embeddings_df.columns[:5])}...")

print("\n   Loading robust features from Step 3...")
robust_features_df, robust_path = _load_pickle_artifact('robust_features.pkl')
print(f"   ‚úÖ Loaded robust features: {robust_features_df.shape}")

print("\n   Loading original items...")
original_items_df, original_items_path = _load_pickle_artifact('original_items.pkl')
if 'item_id' not in original_items_df.columns:
    if 'id' in original_items_df.columns:
        original_items_df['item_id'] = original_items_df['id']
    else:
        original_items_df['item_id'] = range(1, len(original_items_df) + 1)
print(f"   ‚úÖ Loaded original items: {original_items_df.shape}")

print("\n   Loading unified size recommender from Step 4...")
unified_size_recommender, unified_path = _load_pickle_artifact_optional('unified_size_recommender.pkl')
if unified_size_recommender is None:
    print(f"   ‚ö†Ô∏è Could not load unified size recommender from {unified_path}")
    size_artifacts, artifacts_path = _load_pickle_artifact('size_recommendation_artifacts.pkl')
    size_recommender = size_artifacts.get('size_recommender')
    fallback_mapper = size_artifacts.get('fallback_mapper')
    if size_recommender:
        unified_size_recommender = size_recommender
        print(f"   ‚úÖ Using size recommender from {artifacts_path}")
    elif fallback_mapper:
        unified_size_recommender = fallback_mapper
        print(f"   ‚úÖ Using fallback size mapper from {artifacts_path}")
    else:
        unified_size_recommender = None
        print("   ‚ö†Ô∏è No size recommendation model available")
else:
    print(f"   ‚úÖ Loaded unified size recommender from {unified_path}")

# ========== 2. PREP DATA ==========
print("\n2. üõ†Ô∏è Creating compatible data structure...")
print("   Merging embeddings with item metadata...")

merged_data = item_embeddings_df.merge(
    robust_features_df[['item_id', 'garment_type', 'garment_category', 'garment_formality']],
    on='item_id',
    how='left'
 )
merged_data = merged_data.merge(
    original_items_df,
    on='item_id',
    how='left',
    suffixes=('', '_original')
 )
print(f"   ‚úÖ Merged data: {merged_data.shape}")

print("\n   Ensuring essential columns exist...")
required_columns = ['item_id', 'name', 'garment_type', 'price', 'description']
for col in required_columns:
    if col not in merged_data.columns:
        if col == 'item_id':
            if 'ID' in merged_data.columns:
                merged_data['item_id'] = merged_data['ID']
            else:
                merged_data['item_id'] = range(1, len(merged_data) + 1)
        elif col == 'name':
            if 'Name' in merged_data.columns:
                merged_data['name'] = merged_data['Name']
            elif 'product_name' in merged_data.columns:
                merged_data['name'] = merged_data['product_name']
            else:
                merged_data['name'] = merged_data.apply(
                    lambda x: f"Item {x['item_id']}", axis=1
                )
        elif col == 'garment_type':
            if 'Garment Type' in merged_data.columns:
                merged_data['garment_type'] = merged_data['Garment Type']
            elif 'product_type' in merged_data.columns:
                merged_data['garment_type'] = merged_data['product_type']
            else:
                merged_data['garment_type'] = 'unknown'
        elif col == 'price':
            if 'Price' in merged_data.columns:
                merged_data['price'] = pd.to_numeric(merged_data['Price'], errors='coerce')
            elif 'price_value' in merged_data.columns:
                merged_data['price'] = pd.to_numeric(merged_data['price_value'], errors='coerce')
            else:
                merged_data['price'] = 0.0
        elif col == 'description':
            if 'Description' in merged_data.columns:
                merged_data['description'] = merged_data['Description']
            elif 'product_description' in merged_data.columns:
                merged_data['description'] = merged_data['product_description']
            else:
                merged_data['description'] = ''
print(f"   ‚úÖ Data ready: {merged_data.shape}")
print(f"   Columns: {list(merged_data.columns)}")

# ========== 3. CREATE EMBEDDINGS FOR SIMILARITY ==========
print("\n3. ü§ñ Creating embeddings for similarity search...")

embedding_cols = [col for col in merged_data.columns if col.startswith('embedding_')]
print(f"   Found {len(embedding_cols)} embedding columns")

if embedding_cols:
    item_embeddings_dict = {}
    for idx, row in merged_data.iterrows():
        item_id = str(row['item_id'])
        embeddings = row[embedding_cols].values
        item_embeddings_dict[item_id] = embeddings
    print(f"   Created embeddings for {len(item_embeddings_dict)} items")
else:
    print("   ‚ö†Ô∏è No embedding columns found, creating simple embeddings...")
    garment_types = merged_data['garment_type'].unique()
    type_to_id = {gt: i for i, gt in enumerate(garment_types)}
    item_embeddings_dict = {}
    for idx, row in merged_data.iterrows():
        item_id = str(row['item_id'])
        garment_type = row['garment_type']
        type_id = type_to_id.get(garment_type, 0)
        embedding = np.zeros(10)
        embedding[type_id % 10] = 1.0
        embedding[9] = row['price'] / 100.0 if pd.notna(row['price']) else 0.0
        item_embeddings_dict[item_id] = embedding

# ========== 4. SIMPLIFIED OUTFIT BUILDER (COMPATIBLE) ==========
print("\n4. üé® Creating simplified outfit builder...")

class SimpleOutfitBuilder:
    def __init__(self, items_df, item_embeddings_dict, size_recommender=None):
        """Simplified outfit builder that works with your data structure"""
        self.items_df = items_df.copy()
        self.item_embeddings_dict = item_embeddings_dict
        self.size_recommender = size_recommender
        self.item_metadata = {}
        self._build_item_lookup()
        self.compatibility_rules = self._define_compatibility_rules()
        print(f"   ‚úÖ Initialized with {len(self.items_df)} items")

    def _build_item_lookup(self):
        print("   Building item lookup...")
        for idx, row in self.items_df.iterrows():
            item_id = str(row['item_id'])
            self.item_metadata[item_id] = {
                'id': item_id,
                'name': row.get('name', f'Item {item_id}'),
                'garment_type': row.get('garment_type', 'unknown'),
                'price': float(row.get('price', 0)),
                'description': row.get('description', ''),
                'category': self._categorize_garment(row.get('garment_type', 'unknown')),
                'has_embeddings': item_id in self.item_embeddings_dict
            }

    def _categorize_garment(self, garment_type):
        garment_type = str(garment_type).lower()
        if any(word in garment_type for word in ['tee', 'shirt', 'blouse', 'top']):
            return 'top'
        elif any(word in garment_type for word in ['pant', 'jean', 'trouser']):
            return 'bottom'
        elif any(word in garment_type for word in ['short']):
            return 'shorts'
        elif any(word in garment_type for word in ['dress', 'gown']):
            return 'dress'
        elif any(word in garment_type for word in ['skirt']):
            return 'skirt'
        elif any(word in garment_type for word in ['jacket', 'coat', 'blazer']):
            return 'outerwear'
        elif any(word in garment_type for word in ['sweater', 'hoodie', 'cardigan']):
            return 'sweater'
        elif any(word in garment_type for word in ['shoe', 'sneaker', 'boot']):
            return 'footwear'
        else:
            return 'other'

    def _define_compatibility_rules(self):
        return {
            'compatible_categories': {
                'top': ['bottom', 'shorts', 'skirt'],
                'bottom': ['top', 'sweater'],
                'shorts': ['top', 'sweater'],
                'skirt': ['top', 'sweater'],
                'dress': ['outerwear', 'footwear'],
                'outerwear': ['top', 'sweater', 'dress'],
                'sweater': ['bottom', 'shorts', 'skirt'],
                'footwear': ['bottom', 'shorts', 'skirt', 'dress']
            },
            'styles': {
                'casual': ['tee', 'jeans', 'sneakers'],
                'smart_casual': ['shirt', 'pants', 'dress_shoes'],
                'formal': ['dress_shirt', 'dress_pants', 'dress_shoes'],
                'athletic': ['tank_top', 'shorts', 'sneakers']
            }
        }

    def find_similar_items(self, item_id, n=5, same_category=True):
        if item_id not in self.item_embeddings_dict:
            return []
        target_embedding = self.item_embeddings_dict[item_id]
        similarities = []
        for other_id, other_embedding in self.item_embeddings_dict.items():
            if other_id == item_id:
                continue
            cos_sim = np.dot(target_embedding, other_embedding) / (
                np.linalg.norm(target_embedding) * np.linalg.norm(other_embedding) + 1e-8
            )
            if same_category:
                target_category = self._categorize_garment(self.item_metadata[item_id]['garment_type'])
                other_category = self._categorize_garment(self.item_metadata[other_id]['garment_type'])
                if target_category != other_category:
                    continue
            similarities.append({
                'item_id': other_id,
                'similarity': float(cos_sim),
                'name': self.item_metadata[other_id]['name'],
                'garment_type': self.item_metadata[other_id]['garment_type']
            })
        similarities.sort(key=lambda x: x['similarity'], reverse=True)
        return similarities[:n]

    def build_basic_outfit(self, starting_item_id, user_measurements=None, max_items=4):
        if starting_item_id not in self.item_metadata:
            return None
        starting_item = self.item_metadata[starting_item_id]
        starting_category = starting_item['category']
        print(f"   Building outfit starting from: {starting_item['name']}")
        compatible_categories = self.compatibility_rules['compatible_categories'].get(starting_category, [])
        outfit_items = [starting_item]
        total_price = starting_item['price']
        for category in compatible_categories:
            if len(outfit_items) >= max_items:
                break
            category_items = []
            for item_id, metadata in self.item_metadata.items():
                if item_id == starting_item_id:
                    continue
                if metadata['category'] == category:
                    category_items.append((item_id, metadata))
            category_items.sort(key=lambda x: abs(x[1]['price'] - starting_item['price']))
            if category_items:
                best_item = category_items[0][1]
                outfit_items.append(best_item)
                total_price += best_item['price']
        size_recommendations = {}
        if user_measurements and self.size_recommender and starting_item['garment_type'] != 'unknown':
            try:
                result = self.size_recommender.recommend_size(
                    user_measurements,
                    starting_item['garment_type'],
                    top_k=1
                )
                if isinstance(result, dict) and result.get('recommendations'):
                    rec = result['recommendations'][0]
                    size_recommendations[starting_item_id] = rec.get('recommended_size', 'M')
            except Exception as exc:
                print(f"   ‚ö†Ô∏è Size recommender error: {exc}")
        return {
            'starting_item': starting_item,
            'outfit_items': outfit_items,
            'total_price': total_price,
            'item_count': len(outfit_items),
            'size_recommendations': size_recommendations,
            'compatibility_score': self._calculate_compatibility(outfit_items)
        }

    def _calculate_compatibility(self, outfit_items):
        if len(outfit_items) < 2:
            return 0
        categories = [item['category'] for item in outfit_items]
        score = 0
        for i in range(len(categories)):
            for j in range(i + 1, len(categories)):
                cat1 = categories[i]
                cat2 = categories[j]
                compatible_cats = self.compatibility_rules['compatible_categories'].get(cat1, [])
                if cat2 in compatible_cats:
                    score += 20
                elif cat1 == cat2:
                    score -= 10
        return min(100, max(0, 50 + score))

    def save_model(self, filepath):
        with open(filepath, 'wb') as f:
            pickle.dump(self, f)
        print(f"   üíæ Model saved to {filepath} (COMPLETE OBJECT)")

    @staticmethod
    def load_model(filepath):
        try:
            with open(filepath, 'rb') as f:
                builder = pickle.load(f)
            print(f"   üìÇ Model loaded from {filepath}")
            print(f"   ‚úÖ Type: {type(builder)}")
            print(f"   ‚úÖ Has build_basic_outfit: {hasattr(builder, 'build_basic_outfit')}")
            return builder
        except Exception as exc:
            print(f"   ‚ùå Error loading model: {exc}")
            return None

# ========== 5. BUILD AND TEST THE SYSTEM ==========
print("\n5. üöÄ Building and testing outfit builder...")

outfit_builder = SimpleOutfitBuilder(
    items_df=merged_data,
    item_embeddings_dict=item_embeddings_dict,
    size_recommender=unified_size_recommender
 )

save_path = artifact_path('simple_outfit_builder.pkl')
outfit_builder.save_model(save_path)

print(f"\n   ‚úÖ Outfit builder created!")
print(f"   ‚Ä¢ Items loaded: {len(outfit_builder.item_metadata)}")
print(f"   ‚Ä¢ Categories: {len(set(item['category'] for item in outfit_builder.item_metadata.values()))}")

# ========== 6. DEMONSTRATION ==========
print("\n6. üß™ DEMONSTRATION")
print("=" * 60)

test_user = {
    'chest_circumference': 95,
    'waist_circumference': 82,
    'garment_length': 75,
    'sleeve_length': 62
}
print(f"\nüë§ TEST USER MEASUREMENTS:")
for key, value in test_user.items():
    print(f"  {key}: {value}cm")

tshirt_items = [item for item_id, item in outfit_builder.item_metadata.items()
                if 'tee' in item['garment_type'].lower() or 't_shirt' in item['garment_type'].lower()]
if tshirt_items:
    print(f"\nüß™ TEST: Build outfit starting from t-shirt")
    print("-" * 40)
    starting_item = tshirt_items[0]
    print(f"Starting from: {starting_item['name']}")
    similar = outfit_builder.find_similar_items(starting_item['id'], n=3, same_category=True)
    if similar:
        print(f"\nSimilar items:")
        for i, item in enumerate(similar, 1):
            print(f"  {i}. {item['name']} (sim: {item['similarity']:.2f})")
    outfit = outfit_builder.build_basic_outfit(
        starting_item_id=starting_item['id'],
        user_measurements=test_user,
        max_items=4
    )
    if outfit:
        print(f"\nüéØ BUILT OUTFIT:")
        print(f"Items: {outfit['item_count']}")
        print(f"Total: ${outfit['total_price']:.2f}")
        print(f"Compatibility: {outfit['compatibility_score']}/100")
        print(f"\nüëï ITEMS:")
        for i, item in enumerate(outfit['outfit_items'], 1):
            size_rec = outfit['size_recommendations'].get(item['id'], 'N/A')
            print(f"{i}. {item['name']}")
            print(f"   Type: {item['garment_type']} ({item['category']})")
            print(f"   Price: ${item['price']:.2f}")
            if size_rec != 'N/A':
                print(f"   Recommended Size: {size_rec}")

bottom_items = [item for item_id, item in outfit_builder.item_metadata.items()
                if item['category'] == 'bottom']
if bottom_items:
    print(f"\nüß™ TEST: Build outfit starting from bottom")
    print("-" * 40)
    starting_item = bottom_items[0]
    print(f"Starting from: {starting_item['name']}")
    outfit = outfit_builder.build_basic_outfit(
        starting_item_id=starting_item['id'],
        user_measurements=test_user,
        max_items=4
    )
    if outfit:
        print(f"\nüéØ BUILT OUTFIT:")
        print(f"Items: {outfit['item_count']}")
        print(f"Total: ${outfit['total_price']:.2f}")
        print(f"Compatibility: {outfit['compatibility_score']}/100")
        print(f"\nüëï ITEMS:")
        for i, item in enumerate(outfit['outfit_items'], 1):
            print(f"{i}. {item['name']}")

print("\n" + "=" * 60)
print("‚úÖ STEP 5 COMPLETE - SIMPLE OUTFIT BUILDER READY!")
print("=" * 60)

print("\nüìñ QUICK USAGE:")
print("""1. Build outfit from item:
   outfit = outfit_builder.build_basic_outfit(
       starting_item_id='1',
       user_measurements=user_measurements,
       max_items=4
   )

2. Find similar items:
   similar = outfit_builder.find_similar_items(
       item_id='1',
       n=5,
       same_category=True
   )

3. Save/load model:
   outfit_builder.save_model('path.pkl')
   loaded = SimpleOutfitBuilder.load_model('path.pkl')
""")

# ========== 7. INTEGRATION TEST ==========
print("\n7. üîó INTEGRATION TEST WITH STEPS 3 & 4")
print("=" * 60)

print("Testing Step 3 embeddings integration...")
if embedding_cols:
    sample_item_id = list(item_embeddings_dict.keys())[0]
    sample_embedding = item_embeddings_dict[sample_item_id]
    print(f"   Sample item {sample_item_id}: embedding shape {sample_embedding.shape}")
else:
    print("   ‚ö†Ô∏è Using generated embeddings")

# ========== 8. SAVE OUTFIT BUILDER OBJECT ==========
print("\n\n8. üíæ Saving outfit builder object...")
print("=" * 60)

object_save_path = artifact_path('outfit_builder_object.pkl')
with open(object_save_path, 'wb') as f:
    pickle.dump(outfit_builder, f)
print(f"‚úÖ Outfit builder OBJECT saved to: {object_save_path}")
print("   (This contains the actual object with all methods and data)")

print("\nTesting Step 4 size recommender integration...")
if unified_size_recommender:
    print("   ‚úÖ Size recommender integrated")
    try:
        result = unified_size_recommender.recommend_size(
            test_user,
            't_shirt',
            top_k=1
        )
        if isinstance(result, dict):
            print(f"   Recommendation method: {result.get('method', 'unknown')}")
            if result.get('recommendations'):
                print(f"   Found {len(result['recommendations'])} recommendations")
    except Exception as exc:
        print(f"   ‚ö†Ô∏è Error testing size recommender: {exc}")
else:
    print("   ‚ö†Ô∏è Size recommender not available")

print("\n" + "=" * 60)
print("üéâ STEP 5 COMPLETE - ALL SYSTEMS INTEGRATED!")
print("=" * 60)

üëó STEP 5: Intelligent Outfit Builder (COMPATIBLE with Steps 3 & 4)
‚úÖ Libraries imported

1. üîÑ Loading data from Steps 3 & 4...
   Loading item embeddings from Step 3...
   ‚úÖ Loaded item embeddings: (250, 38)
   Columns in embeddings: ['embedding_0', 'embedding_1', 'embedding_2', 'embedding_3', 'embedding_4']...

   Loading robust features from Step 3...
   ‚úÖ Loaded robust features: (250, 63)

   Loading original items...
   ‚úÖ Loaded original items: (250, 9)

   Loading unified size recommender from Step 4...
   ‚ö†Ô∏è Could not load unified size recommender from c:\Users\Rana\OneDrive\Desktop\FitFast FYP\fitfast\frontend\src\ai\artifacts\unified_size_recommender.pkl
   ‚úÖ Using fallback size mapper from c:\Users\Rana\OneDrive\Desktop\FitFast FYP\fitfast\frontend\src\ai\artifacts\size_recommendation_artifacts.pkl

2. üõ†Ô∏è Creating compatible data structure...
   Merging embeddings with item metadata...
   ‚úÖ Merged data: (250, 49)

   Ensuring essential columns exist.

In [8]:
# @title üöÄ **STEP 6: Main Recommendation Interface (UNIFIED SYSTEM)**
print("üöÄ STEP 6: Main Recommendation Interface (UNIFIED SYSTEM)")
print("=" * 60)

from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional
import pandas as pd
import pickle as std_pickle

try:
    import cloudpickle
    ACTIVE_PICKLE_LIB = cloudpickle
    print("‚úÖ Using cloudpickle for serialization")
except ImportError:
    cloudpickle = None
    ACTIVE_PICKLE_LIB = std_pickle
    print("‚ö†Ô∏è cloudpickle not available; falling back to pickle")

print("‚úÖ Libraries imported")
print("\n1. üîÑ Loading core artifacts...")

def _load_pickle(path: Path, required: bool = True):
    if not path.exists():
        if required:
            raise FileNotFoundError(f"Artifact not found: {path}")
        print(f"   ‚ö†Ô∏è Optional artifact missing: {path}")
        return None
    with path.open('rb') as handle:
        try:
            return std_pickle.load(handle)
        except Exception as exc:
            if cloudpickle is None:
                raise
            handle.seek(0)
            print(f"   ‚ö†Ô∏è Retrying {path.name} with cloudpickle: {exc}")
            return cloudpickle.load(handle)

def _dump_pickle(obj, path: Path):
    modules = (ACTIVE_PICKLE_LIB, std_pickle) if ACTIVE_PICKLE_LIB is not std_pickle else (std_pickle,)
    for module in modules:
        try:
            with path.open('wb') as handle:
                module.dump(obj, handle)
            return
        except Exception as exc:
            print(f"   ‚ö†Ô∏è Failed to serialize with {module.__name__}: {exc}")
    raise RuntimeError(f"Could not serialize object to {path}")

original_items_df = _load_pickle(artifact_path('original_items.pkl'))
robust_features_df = _load_pickle(artifact_path('robust_features.pkl'), required=False)
item_embeddings_df = _load_pickle(artifact_path('item_embeddings.pkl'), required=False)
size_artifacts = _load_pickle(artifact_path('size_recommendation_artifacts.pkl')) or {}
size_recommender = size_artifacts.get('size_recommender')
fallback_mapper = size_artifacts.get('fallback_mapper')
size_recommender_fitted = bool(size_artifacts.get('size_recommender_fitted'))

outfit_builder = _load_pickle(artifact_path('simple_outfit_builder.pkl'), required=False)
if outfit_builder is None:
    outfit_builder = _load_pickle(artifact_path('outfit_builder_object.pkl'))

print("   ‚úÖ Artifacts loaded successfully")

class UnifiedSizeEngine:
    def __init__(self, model, fallback, fitted):
        self.model = model if fitted and model is not None else None
        self.fallback = fallback
        self.fitted = fitted and model is not None

    def recommend(self, garment_type: Optional[str], item_meta: Optional[Dict[str, Any]], measurements: Optional[Dict[str, Any]]):
        measurements = measurements or {}
        candidates: List[Dict[str, Any]] = []
        method = 'hybrid'
        if self.model is not None:
            try:
                results = self.model.recommend_size(measurements, top_k=3)
                if isinstance(results, list) and results:
                    for entry in results:
                        candidates.append({
                            'recommended_size': str(entry.get('recommended_size') or entry.get('size') or 'M'),
                            'fit_score': float(entry.get('fit_score') or entry.get('distance', 0.0)),
                            'source': 'model'
                        })
            except Exception as exc:
                print(f"   ‚ö†Ô∏è Size model error: {exc}")
        if not candidates and self.fallback is not None:
            method = 'fallback'
            category = None
            if item_meta and item_meta.get('category'):
                category = item_meta['category']
            if category is None and garment_type:
                category = _infer_category(garment_type)
            if category is None:
                category = 'top'
            try:
                fallback_results = self.fallback.recommend(category, measurements)
                for entry in fallback_results[:3]:
                    candidates.append({
                        'recommended_size': str(entry.get('size', 'M')),
                        'fit_score': float(entry.get('match_score', 0.5)),
                        'source': 'fallback'
                    })
            except Exception as exc:
                print(f"   ‚ö†Ô∏è Fallback size mapper error: {exc}")
        guidance = {
            'fit': '',
            'formality': ''
        }
        if self.fallback is not None:
            fit_key = measurements.get('fit_type', 'regular')
            formality_key = measurements.get('garment_formality', 'casual')
            if hasattr(self.fallback, 'get_fit_guidance'):
                guidance['fit'] = self.fallback.get_fit_guidance(fit_key) or ''
            if hasattr(self.fallback, 'get_formality_guidance'):
                guidance['formality'] = self.fallback.get_formality_guidance(formality_key) or ''
        return {
            'method': method if candidates else 'manual',
            'recommendations': candidates,
            'guidance': guidance
        }

def _infer_category(garment_type: Optional[str]) -> Optional[str]:
    if not garment_type:
        return None
    name = str(garment_type).lower()
    if 'dress' in name:
        return 'dress'
    if any(token in name for token in ['pant', 'jean', 'trouser', 'bottom']):
        return 'bottom'
    if any(token in name for token in ['skirt']):
        return 'skirt'
    if any(token in name for token in ['short']):
        return 'shorts'
    if any(token in name for token in ['sweater', 'hoodie', 'cardigan']):
        return 'sweater'
    if any(token in name for token in ['jacket', 'coat', 'blazer', 'outer']):
        return 'outerwear'
    if any(token in name for token in ['shoe', 'sneaker', 'boot']):
        return 'footwear'
    return 'top'

size_engine = UnifiedSizeEngine(size_recommender, fallback_mapper, size_recommender_fitted)

def _coerce_measurements(data: Any) -> Dict[str, float]:
    result: Dict[str, float] = {}
    if isinstance(data, dict):
        source = data
    elif isinstance(data, list):
        source = {f'value_{i}': value for i, value in enumerate(data)}
    else:
        source = {}
    for key, value in source.items():
        try:
            if value is None or value == '':
                continue
            result[str(key)] = float(value)
        except (TypeError, ValueError):
            continue
    return result

class FashionUser:
    def __init__(self, user_id: str, name: str = '', email: str = ''):
        self.user_id = str(user_id)
        self.name = name or ''
        self.email = email or ''
        self.created_at = datetime.utcnow()
        self.updated_at = datetime.utcnow()
        self.measurements: Dict[str, Any] = {}
        self.preferences: Dict[str, Any] = {}
        self.purchase_history: List[Dict[str, Any]] = []
        self.wishlist: List[Dict[str, Any]] = []
        self.view_history: List[Dict[str, Any]] = []

    def update(self, payload: Dict[str, Any]):
        self.name = payload.get('name', self.name)
        self.email = payload.get('email', self.email)
        if payload.get('measurements'):
            self.measurements = _coerce_measurements(payload['measurements'])
        if payload.get('preferences'):
            self.preferences = dict(payload['preferences'])
        if payload.get('purchase_history'):
            for record in payload['purchase_history']:
                self.add_purchase(record)
        if payload.get('wishlist'):
            self.wishlist = [dict(entry) for entry in payload['wishlist']]
        if payload.get('view_history'):
            self.view_history = [dict(entry) for entry in payload['view_history']]
        self.updated_at = datetime.utcnow()

    def add_purchase(self, purchase: Dict[str, Any]):
        record = {
            'item_id': str(purchase.get('item_id')) if purchase.get('item_id') is not None else None,
            'item_name': purchase.get('item_name', ''),
            'price': float(purchase.get('price', 0.0)) if purchase.get('price') is not None else 0.0,
            'rating': float(purchase.get('rating', 0.0)) if purchase.get('rating') is not None else None,
            'purchased_at': purchase.get('purchased_at') or datetime.utcnow().isoformat()
        }
        self.purchase_history.append(record)
        self.updated_at = datetime.utcnow()

    def summary(self) -> Dict[str, Any]:
        return {
            'user_id': self.user_id,
            'name': self.name,
            'email': self.email,
            'measurements': self.measurements,
            'preferences': self.preferences,
            'purchase_count': len(self.purchase_history),
            'wishlist_count': len(self.wishlist),
            'view_count': len(self.view_history),
            'created_at': self.created_at.isoformat(),
            'updated_at': self.updated_at.isoformat()
        }

class FashionRecommendationEngine:
    def __init__(self, items_df: pd.DataFrame, outfit_builder=None, size_engine: Optional[UnifiedSizeEngine] = None, robust_df: Optional[pd.DataFrame] = None):
        self.items_df = items_df.copy()
        self.outfit_builder = outfit_builder
        self.size_engine = size_engine
        self.robust_df = robust_df.copy() if robust_df is not None else None
        self.users: Dict[str, FashionUser] = {}
        self.items_lookup = self._build_item_lookup(self.items_df)
        self.default_items = self._build_default_items()
        print(f"   ‚úÖ Engine initialized with {len(self.items_lookup)} catalog items")

    def _build_item_lookup(self, df: pd.DataFrame) -> Dict[str, Dict[str, Any]]:
        lookup: Dict[str, Dict[str, Any]] = {}
        for _, row in df.iterrows():
            item_id = str(row.get('item_id') or row.get('ID') or row.get('id'))
            if not item_id:
                continue
            lookup[item_id] = self._format_item(row)
        return lookup

    def _build_default_items(self) -> List[Dict[str, Any]]:
        df = self.items_df.copy()
        if 'total_stock' in df.columns:
            df = df.sort_values('total_stock', ascending=False)
        elif 'price' in df.columns:
            df = df.sort_values('price', ascending=False)
        return [self._format_item(row) for _, row in df.head(50).iterrows()]

    def _format_item(self, row: pd.Series) -> Dict[str, Any]:
        return {
            'item_id': str(row.get('item_id') or row.get('ID') or row.get('id')),
            'name': row.get('name') or row.get('Name') or f"Item {row.get('item_id')}",
            'description': row.get('description') or row.get('Description') or '',
            'price': float(row.get('price')) if pd.notna(row.get('price')) else float(row.get('Price', 0) or 0),
            'garment_type': row.get('garment_type') or row.get('Garment Type') or '',
            'garment_category': row.get('garment_category') or row.get('category') or _infer_category(row.get('garment_type')),
            'garment_formality': row.get('garment_formality') or '',
            'store': row.get('store') or row.get('Store') or '',
            'image_url': row.get('image_url') or row.get('ImageUrl') or ''
        }

    def _get_user(self, user_id: str) -> FashionUser:
        if user_id not in self.users:
            self.users[user_id] = FashionUser(user_id)
        return self.users[user_id]

    def register(self, payload: Dict[str, Any]) -> Dict[str, Any]:
        user_id = str(payload.get('user_id') or payload.get('id'))
        if not user_id:
            raise ValueError('user_id is required')
        user = self._get_user(user_id)
        user.update(payload)
        return user.summary()

    def get_size(self, user_id: str, garment_type: str, item_id: Optional[str] = None) -> Dict[str, Any]:
        user = self._get_user(user_id)
        item_meta = self.items_lookup.get(str(item_id)) if item_id else None
        if self.size_engine is None:
            return {
                'method': 'manual',
                'recommendations': [],
                'guidance': {'fit': 'Provide measurements for better results.', 'formality': ''}
            }
        return self.size_engine.recommend(garment_type, item_meta, user.measurements)

    def build_outfit(self, user_id: str, starting_item_id: Optional[str] = None, style: Optional[str] = None, max_items: int = 4) -> Dict[str, Any]:
        user = self._get_user(user_id)
        if self.outfit_builder is None:
            raise RuntimeError('Outfit builder is not available')
        if starting_item_id is None:
            starting_item_id = self.default_items[0]['item_id'] if self.default_items else next(iter(self.items_lookup))
        starting_item_id = str(starting_item_id)
        outfit = self.outfit_builder.build_basic_outfit(
            starting_item_id=starting_item_id,
            user_measurements=user.measurements,
            max_items=max(2, min(6, int(max_items)))
        )
        if outfit is None:
            raise ValueError('Unable to build outfit with the provided item id')
        formatted_items = []
        for item in outfit['outfit_items']:
            formatted_items.append({
                'item_id': item.get('id'),
                'name': item.get('name'),
                'garment_type': item.get('garment_type'),
                'category': item.get('category'),
                'price': float(item.get('price', 0))
            })
        starting = outfit.get('starting_item', {})
        return {
            'starting_item': {
                'item_id': starting.get('id'),
                'name': starting.get('name'),
                'garment_type': starting.get('garment_type'),
                'category': starting.get('category'),
                'price': float(starting.get('price', 0))
            },
            'items': formatted_items,
            'total_price': float(outfit.get('total_price', 0.0)),
            'item_count': int(outfit.get('item_count', len(formatted_items))),
            'compatibility_score': float(outfit.get('compatibility_score', 0.0)),
            'size_recommendations': outfit.get('size_recommendations', {})
        }

    def recommend(self, user_id: str, n: int = 6) -> List[Dict[str, Any]]:
        user = self._get_user(user_id)
        recommendations: List[Dict[str, Any]] = []
        seen = set()
        if user.purchase_history:
            last_purchase = user.purchase_history[-1]
            item_id = str(last_purchase.get('item_id'))
            if self.outfit_builder and item_id:
                similar = self.outfit_builder.find_similar_items(item_id, n=n, same_category=False) or []
                for entry in similar:
                    candidate_id = str(entry.get('item_id'))
                    if candidate_id and candidate_id in self.items_lookup and candidate_id not in seen:
                        recommendations.append(self.items_lookup[candidate_id])
                        seen.add(candidate_id)
        for item in self.default_items:
            if len(recommendations) >= n:
                break
            item_id = item['item_id']
            if item_id not in seen:
                recommendations.append(item)
                seen.add(item_id)
        return recommendations[:n]

    def get_insights(self, user_id: str) -> Dict[str, Any]:
        user = self._get_user(user_id)
        categories: Dict[str, int] = {}
        for purchase in user.purchase_history:
            item = self.items_lookup.get(str(purchase.get('item_id')))
            if item:
                cat = item.get('garment_category', 'unknown')
                categories[cat] = categories.get(cat, 0) + 1
        return {
            'profile': user.summary(),
            'favorite_categories': categories,
            'recent_purchases': user.purchase_history[-5:]
        }

    def add_purchase(self, user_id: str, item_id: str, item_name: str = '', price: float = 0.0) -> Dict[str, Any]:
        user = self._get_user(user_id)
        record = {
            'item_id': str(item_id),
            'item_name': item_name or self.items_lookup.get(str(item_id), {}).get('name', ''),
            'price': float(price),
            'purchased_at': datetime.utcnow().isoformat()
        }
        user.add_purchase(record)
        return {'status': 'logged', 'purchase': record}

fashion_engine = FashionRecommendationEngine(
    items_df=original_items_df,
    outfit_builder=outfit_builder,
    size_engine=size_engine,
    robust_df=robust_features_df
)

engine_path = artifact_path('fashion_recommendation_engine.pkl')
api_path = artifact_path('fashion_api.pkl')
_dump_pickle(fashion_engine, engine_path)
_dump_pickle(fashion_engine, api_path)

print("\nüéâ Export complete!")
print(f"   ‚Ä¢ Engine saved to: {engine_path}")
print(f"   ‚Ä¢ API proxy saved to: {api_path}")
print("Use check_pickle.py to validate the artifact if needed.")

üöÄ STEP 6: Main Recommendation Interface (UNIFIED SYSTEM)
‚úÖ Using cloudpickle for serialization
‚úÖ Libraries imported

1. üîÑ Loading core artifacts...
   ‚úÖ Artifacts loaded successfully
   ‚úÖ Engine initialized with 250 catalog items

üéâ Export complete!
   ‚Ä¢ Engine saved to: c:\Users\Rana\OneDrive\Desktop\FitFast FYP\fitfast\frontend\src\ai\artifacts\fashion_recommendation_engine.pkl
   ‚Ä¢ API proxy saved to: c:\Users\Rana\OneDrive\Desktop\FitFast FYP\fitfast\frontend\src\ai\artifacts\fashion_api.pkl
Use check_pickle.py to validate the artifact if needed.
