In [22]:
import pandas as pd

from surprise import Dataset, Reader, SVD
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


I have take the data from

https://statso.io/hybrid-recommendations-case-study/

In [23]:
data = pd.read_csv("/content/drive/MyDrive/Github Projects/Hybrid Filtering (content based and collaborative filtering)/fashion_products.csv")
print(data.head())

   User ID  Product ID Product Name   Brand         Category  Price    Rating  \
0       19           1        Dress  Adidas    Men's Fashion     40  1.043159   
1       97           2        Shoes     H&M  Women's Fashion     82  4.026416   
2       25           3        Dress  Adidas  Women's Fashion     44  3.337938   
3       57           4        Shoes    Zara    Men's Fashion     23  1.049523   
4       79           5      T-shirt  Adidas    Men's Fashion     79  4.302773   

    Color Size  
0   Black   XL  
1   Black    L  
2  Yellow   XL  
3   White    S  
4   Black    M  


Content Based Recommendations

In [25]:
content_df = data[['Product ID', 'Product Name', 'Brand', 'Category', 'Color', 'Size']].copy()
content_df['Content'] = content_df.apply(lambda row: ' '.join(row.dropna().astype(str)), axis=1)

In [26]:
# Use TF-IDF vectorizer to convert content into a matrix of TF-IDF features
tfidf_vectorizer = TfidfVectorizer()
content_matrix = tfidf_vectorizer.fit_transform(content_df['Content'])

content_similarity = linear_kernel(content_matrix, content_matrix)

reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(data[['User ID',
                                  'Product ID',
                                  'Rating']], reader)


In [27]:
def get_content_based_recommendations(product_id, top_n):
    index = content_df[content_df['Product ID'] == product_id].index[0]
    similarity_scores = content_similarity[index]
    similar_indices = similarity_scores.argsort()[::-1][1:top_n + 1]
    recommendations = content_df.loc[similar_indices, 'Product ID'].values
    return recommendations

Collaborative Filtering based recommendations

In [28]:
algo = SVD()
trainset = data.build_full_trainset()
algo.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7c09962f3010>

In [29]:
def get_collaborative_filtering_recommendations(user_id, top_n):
    testset = trainset.build_anti_testset()
    testset = filter(lambda x: x[0] == user_id, testset)
    predictions = algo.test(testset)
    predictions.sort(key=lambda x: x.est, reverse=True)
    recommendations = [prediction.iid for prediction in predictions[:top_n]]
    return recommendations

Hybrid Approach (content based and collaborative)

In [42]:
def get_hybrid_recommendations(user_id, product_id, top_n):
    # Get content-based recommendations and their product names
    content_based_recommendations = get_content_based_recommendations(product_id, top_n)

    # Get collaborative filtering recommendations and their product names
    collaborative_filtering_recommendations = get_collaborative_filtering_recommendations(user_id, top_n)

    # Combine all recommendations
    hybrid_recommendations = list(set(content_based_recommendations + collaborative_filtering_recommendations))

    # Get product names for all recommendations
    hybrid_product_names = []
    for recommendation in hybrid_recommendations:
        try:
            product_name = content_df.loc[content_df['Product ID'] == recommendation, 'Product Name'].iloc[0]
        except IndexError:
            # Handle the case where the recommendation is not found in the content_df
            product_name = "Unknown Product"
        hybrid_product_names.append(product_name)

    return hybrid_recommendations, hybrid_product_names

user_id = 19
product_id = 1
top_n = 5
recommendations, product_names = get_hybrid_recommendations(user_id, product_id, top_n)

print(f"Hybrid Recommendations for User {user_id} based on Product {product_id}:")
for i, (recommendation, product_name) in enumerate(zip(recommendations, product_names)):
    print(f"{i + 1}. Product ID: {recommendation}, Product Name: {product_name}")


Hybrid Recommendations for User 19 based on Product 1:
1. Product ID: 608, Product Name: T-shirt
2. Product ID: 625, Product Name: Shoes
3. Product ID: 1170, Product Name: Unknown Product
4. Product ID: 569, Product Name: Dress
5. Product ID: 831, Product Name: Jeans
