In [148]:
import pandas as pd
data = pd.read_csv(r"C:\Users\vcsma\Downloads\AI PROJECT\fashion_products.csv")

content_df = data[['Product ID', 'Product Name', 'Brand', 'Category', 'Color', 'Size']].copy()
content_df['Content'] = content_df.apply(lambda row: ' '.join(row.dropna().astype(str)), axis=1)
print(content_df.head(20))

    Product ID Product Name   Brand         Category   Color Size  \
0            1        Dress  Adidas    Men's Fashion   Black   XL   
1            2        Shoes     H&M  Women's Fashion   Black    L   
2            3        Dress  Adidas  Women's Fashion  Yellow   XL   
3            4        Shoes    Zara    Men's Fashion   White    S   
4            5      T-shirt  Adidas    Men's Fashion   Black    M   
5            6        Dress  Adidas    Men's Fashion  Yellow    L   
6            7        Jeans   Gucci    Men's Fashion   White   XL   
7            8      Sweater    Zara    Kids' Fashion    Blue   XL   
8            9      Sweater     H&M    Men's Fashion   Green   XL   
9           10      T-shirt    Zara    Kids' Fashion   White   XL   
10          11      T-shirt  Adidas    Men's Fashion     Red    S   
11          12      Sweater   Gucci    Kids' Fashion  Yellow    M   
12          13        Jeans    Nike    Kids' Fashion     Red    M   
13          14        Dress    Zar

In [149]:
from surprise import Dataset, Reader, SVD 
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

Content Based Filtering

In [150]:

content_df = data[['Product ID', 'Product Name', 'Brand', 'Category', 'Color', 'Size']].copy()
content_df['Content'] = content_df.apply(lambda row: ' '.join(row.dropna().astype(str)), axis=1)

tfidf_vectorizer = TfidfVectorizer()
content_matrix = tfidf_vectorizer.fit_transform(content_df['Content'])

content_similarity = linear_kernel(content_matrix, content_matrix)

reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(data[['User ID', 
                                  'Product ID', 
                                  'Rating']], reader)

def get_content_based_recommendations(product_id, top_n):
    index = content_df[content_df['Product ID'] == product_id].index[0]
    similarity_scores = content_similarity[index]
    similar_indices = similarity_scores.argsort()[::-1][1:top_n + 1]
    recommendations = content_df.loc[similar_indices, 'Product ID'].values
    return recommendations

Collaborative Filtering

In [151]:
algo = SVD()
trainset = data.build_full_trainset()
algo.fit(trainset)

def get_collaborative_filtering_recommendations(user_id, top_n):
    testset = trainset.build_anti_testset()
    testset = filter(lambda x: x[0] == user_id, testset)
    predictions = algo.test(testset)
    predictions.sort(key=lambda x: x.est, reverse=True)
    recommendations = [prediction.iid for prediction in predictions[:top_n]]
    return recommendations

In [152]:
def precision_at_k(predictions, k=10, threshold=4):
    top_k_pred = sorted(predictions, key=lambda x: x.est, reverse=True)[:k]
    num_relevant = sum((pred.r_ui >= threshold) for pred in top_k_pred)
    return num_relevant / min(k, len(predictions))

def recall_at_k(predictions, k=10, threshold=4):
    top_k_pred = sorted(predictions, key=lambda x: x.est, reverse=True)[:k]
    num_relevant = sum((pred.r_ui >= threshold) for pred in top_k_pred)
    num_total_relevant = sum((pred.r_ui >= threshold) for pred in predictions)
    
    if num_total_relevant == 0:
        return 0
    else:
        return num_relevant / num_total_relevant


Evaluating Our Recommender

In [153]:
def evaluate_recommender(model, trainset, testset, k=10):
    model.fit(trainset)
    predictions = model.test(testset)
    
    precisions, recalls, map_scores = [], [], []
    for uid in set([pred.uid for pred in predictions]):
        user_predictions = [pred for pred in predictions if pred.uid == uid]
        user_predictions.sort(key=lambda x: x.est, reverse=True)
        
        
        top_k_recommended = [pred.iid for pred in user_predictions[:k]]
        

        true_items = [pred.iid for pred in user_predictions if pred.r_ui >= 4]
        

        precisions.append(precision_at_k(user_predictions, k))
        recalls.append(recall_at_k(user_predictions, k))
        
        map_score = sum([precision_at_k(user_predictions, i+1) for i, pred in enumerate(user_predictions) if pred.iid in true_items]) / max(1, len(true_items), k)

        map_scores.append(map_score)
    
    
    mean_precision = sum(precisions) / len(precisions)
    mean_recall = sum(recalls) / len(recalls)
    mean_map = sum(map_scores) / len(map_scores)
    
    return mean_precision, mean_recall, mean_map

Hybrid Filtering (Combination of Both)

In [154]:
def get_hybrid_recommendations(user_id, product_id, top_n=15):
    content_based_recommendations = get_content_based_recommendations(product_id, top_n)
    collaborative_filtering_recommendations = get_collaborative_filtering_recommendations(user_id, top_n)
    hybrid_recommendations = list(set(content_based_recommendations + collaborative_filtering_recommendations))
    return hybrid_recommendations[:top_n]

def get_hybrid_recommendations_for_new_user(user_id, top_n=10):
    popular_items = data['Product ID'].value_counts().index[:top_n].tolist()
    return popular_items

In [155]:
user_id = 10
product_id = 5
top_n = 15

if product_id is None:
    recommendations = get_hybrid_recommendations_for_new_user(user_id, top_n)
    print(f"Hybrid Recommendations for New User {user_id}:")
else:
    recommendations = get_hybrid_recommendations(user_id, product_id, top_n)
    print(f"Hybrid Recommendations for User {user_id} based on Product {product_id}:")

for i, recommendation in enumerate(recommendations):
    print(f"{i + 1}. Product ID: {recommendation}")


trainset, testset = train_test_split(data_surprise, test_size=0.2, random_state=42)


precision, recall, map_score = evaluate_recommender(algo, trainset, testset)

print("Evaluation Metrics:")
print(f"Precision@10: {precision}")
print(f"Recall@10: {recall}")
print(f"Mean Average Precision: {map_score}")


Hybrid Recommendations for User 10 based on Product 5:
1. Product ID: 866
2. Product ID: 899
3. Product ID: 482
4. Product ID: 582
5. Product ID: 902
6. Product ID: 1896
7. Product ID: 1198
8. Product ID: 722
9. Product ID: 1523
10. Product ID: 1718
11. Product ID: 921
12. Product ID: 1115
13. Product ID: 765
14. Product ID: 1055
Evaluation Metrics:
Precision@10: 0.35205761316872425
Recall@10: 0.6049382716049383
Mean Average Precision: 0.05473251028806585
