In [1]:
import gradio as gr
import os
import pickle
import faiss
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from PIL import Image # Sử dụng Pillow để xử lý ảnh từ Gradio
import time # Để tạo tên file tạm thời duy nhất (một cách khác)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
FAISS_DATA_PATH = r"C:\Users\tanki\Desktop\recommend sys image\data\faiss_fashion.pkl"
INDEX_FILE_PATH = r"C:\Users\tanki\Desktop\recommend sys image\data\faiss_fashion_index.index"
MODEL_PATH = r"C:\Users\tanki\Desktop\recommend sys image\models\fashion_resnet50.h5"
DATASET_PATH = r"C:\Users\tanki\Desktop\recommend sys image\data\fashion-dataset"
IMAGE_COLUMN_NAME = 'image' 
IMG_WIDTH, IMG_HEIGHT = 224, 224

In [3]:
print("Loading resources...")
try:
    # Load Model
    if not os.path.exists(MODEL_PATH):
        raise FileNotFoundError(f"Model file not found: {MODEL_PATH}")
    model = keras.models.load_model(MODEL_PATH)
    print("Keras model loaded successfully.")
    # model.summary() # Bỏ comment nếu muốn xem cấu trúc model khi chạy

    # Load FAISS Index
    if not os.path.exists(INDEX_FILE_PATH):
         raise FileNotFoundError(f"FAISS index file not found: {INDEX_FILE_PATH}")
    index_loaded = faiss.read_index(INDEX_FILE_PATH)
    print(f"FAISS index loaded successfully with {index_loaded.ntotal} vectors.")

    # Load DataFrame Metadata
    if not os.path.exists(FAISS_DATA_PATH):
         raise FileNotFoundError(f"FAISS data file (pickle) not found: {FAISS_DATA_PATH}")
    with open(FAISS_DATA_PATH, 'rb') as f:
        df_loaded = pickle.load(f)
    print(f"DataFrame metadata loaded successfully from {FAISS_DATA_PATH}.")

    df_image_paths = pd.read_csv(DATASET_PATH  + "\\styles.csv", on_bad_lines='skip', nrows=10000)
    df_image_paths['image'] = df_image_paths.apply(lambda row: str(row['id']) + ".jpg", axis=1)
    df_image_paths = df_image_paths.reset_index(drop=True)

    # --- Crucial Validation ---
    # if not isinstance(df_loaded, pd.DataFrame):
    #      raise TypeError(f"Data loaded from {FAISS_DATA_PATH} is not a Pandas DataFrame.")
    # if IMAGE_COLUMN_NAME not in df_loaded.columns:
    #      raise ValueError(f"DataFrame from {FAISS_DATA_PATH} must contain column named '{IMAGE_COLUMN_NAME}' storing image paths.")
    print(f"Verified DataFrame structure. Using column '{IMAGE_COLUMN_NAME}' for image paths.")
except Exception as e:
    load_error_message = f"FATAL ERROR loading resources: {e}. Please check paths and file integrity."
    print(load_error_message)
    # Gradio interface sẽ kiểm tra biến này trước khi chạy hàm chính


Loading resources...




Keras model loaded successfully.
FAISS index loaded successfully with 9999 vectors.
DataFrame metadata loaded successfully from C:\Users\tanki\Desktop\recommend sys image\data\faiss_fashion.pkl.
Verified DataFrame structure. Using column 'image' for image paths.


In [4]:
# --- Image Processing Function (Adapted for PIL Image input from Gradio) ---
def process_image_pil(pil_image, target_model, img_width=IMG_WIDTH, img_height=IMG_HEIGHT):
    """Processes a PIL image to get ResNet50 embedding."""
    if target_model is None:
         raise ValueError("Model is not loaded.")
    if pil_image is None:
        raise ValueError("Input image is None.")

    # Resize PIL Image
    img = pil_image.convert('RGB').resize((img_width, img_height)) # Đảm bảo ảnh RGB và đúng kích thước
    # Convert to Array
    x = keras.preprocessing.image.img_to_array(img)
    # Expand Dim (1, w, h, c)
    x = np.expand_dims(x, axis=0)
    # Preprocess Input specific to ResNet50
    x = keras.applications.resnet50.preprocess_input(x)
    # Get prediction (embedding)
    embedding = target_model.predict(x)
    # Đảm bảo float32 và C-contiguous cho FAISS
    return np.ascontiguousarray(embedding.astype(np.float32))

In [5]:
# --- Recommendation Function for Gradio ---
def recommend_fashion_images(uploaded_image_pil, k):
    """Takes a PIL image and k, returns list of recommended image paths."""

    if uploaded_image_pil is None:
        # Không có ảnh tải lên, trả về danh sách rỗng (hoặc thông báo)
        # gr.Warning("Please upload an image first.") # Thông báo trên console
        return [] # Gallery sẽ trống

    print(f"Received image, requesting k={k} recommendations.")

    try:
        # 1. Process uploaded image to get embedding
        start_time = time.time()
        query_embedding = process_image_pil(uploaded_image_pil, model)
        proc_time = time.time() - start_time
        print(f"Image processed in {proc_time:.4f} seconds. Embedding shape: {query_embedding.shape}")

        # 2. Search FAISS index
        start_time = time.time()
        # Đảm bảo k không lớn hơn số lượng vector trong index
        k_actual = min(int(k), index_loaded.ntotal)
        if k_actual <= 0:
             print("Warning: k is zero or index is empty.")
             return []

        distances, faiss_indices = index_loaded.search(query_embedding, k_actual)
        search_time = time.time() - start_time
        print(f"FAISS search completed in {search_time:.4f} seconds.")
        # print("Distances:", distances)
        # print("Indices:", faiss_indices)

        # 3. Get corresponding indices (filter out -1 if search returns invalid indices)
        result_indices = faiss_indices[0]
        valid_indices = result_indices[result_indices != -1] # Lọc bỏ index -1 (nếu có)
        # print(f"Original indices: {result_indices}")
        # print(f"Valid indices after filtering: {valid_indices}")

        if len(valid_indices) == 0:
            print("No valid similar items found in the index.")
            return []

        # 4. Retrieve image paths from the loaded DataFrame using valid indices
        # Sử dụng .iloc vì faiss_indices là chỉ số vị trí số nguyên
        recommended_rows = df_image_paths.iloc[valid_indices]
        # print(recommended_rows)
        return recommended_rows
    
    except Exception as e:
        # Ghi log lỗi chi tiết ở phía server
        print(f"ERROR during recommendation: {e}")


In [6]:
df = pd.read_csv(DATASET_PATH + "\\styles.csv", on_bad_lines='skip')
df['image'] = df.apply(lambda row: str(row['id']) + ".jpg", axis=1)
df = df.reset_index(drop=True) 
df.head(5)

Unnamed: 0,id,gender,masterCategory,subCategory,articleType,baseColour,season,year,usage,productDisplayName,image
0,15970,Men,Apparel,Topwear,Shirts,Navy Blue,Fall,2011.0,Casual,Turtle Check Men Navy Blue Shirt,15970.jpg
1,39386,Men,Apparel,Bottomwear,Jeans,Blue,Summer,2012.0,Casual,Peter England Men Party Blue Jeans,39386.jpg
2,59263,Women,Accessories,Watches,Watches,Silver,Winter,2016.0,Casual,Titan Women Silver Watch,59263.jpg
3,21379,Men,Apparel,Bottomwear,Track Pants,Black,Fall,2011.0,Casual,Manchester United Men Solid Black Track Pants,21379.jpg
4,53759,Men,Apparel,Topwear,Tshirts,Grey,Summer,2012.0,Casual,Puma Men Grey T-shirt,53759.jpg


In [7]:
def accuracy_rcm_sys(sample = 100, k = 3):
    tb = 0
    df_test = df.sample(n=sample, random_state=42)
    for i in df_test.iloc:
        img_path = os.path.join(DATASET_PATH, 'images', str(i['image']))
        pil_image = Image.open(img_path)
        recommend_row = recommend_fashion_images(pil_image, k)
        count = 0
        for j in recommend_row.iloc:
            if j['subCategory'] == i['subCategory']:
                count += 1
        print('map {} = {}'.format(k, count))
        tb += count

    print('map trung bình: ', (tb/sample)/k)

In [8]:
accuracy_rcm_sys(sample=1000, k = 3)

Received image, requesting k=3 recommendations.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
Image processed in 2.4575 seconds. Embedding shape: (1, 2048)
FAISS search completed in 0.0320 seconds.
map 3 = 2
Received image, requesting k=3 recommendations.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 121ms/step
Image processed in 0.2400 seconds. Embedding shape: (1, 2048)
FAISS search completed in 0.0071 seconds.
map 3 = 0
Received image, requesting k=3 recommendations.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 113ms/step
Image processed in 0.1819 seconds. Embedding shape: (1, 2048)
FAISS search completed in 0.0051 seconds.
map 3 = 3
Received image, requesting k=3 recommendations.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 115ms/step
Image processed in 0.1835 seconds. Embedding shape: (1, 2048)
FAISS search completed in 0.0055 seconds.
map 3 = 2
Received image, requesting k=3 recommendations.
[1m1/1[0m [3