# Case Study

Give some Hybrid Recommendations per user, having in consideration Product Names, Brands, Prices and Raing

In [46]:
import pandas as pd
from surprise import Dataset, Reader, SVD
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [47]:
# Read the Data

data = pd.read_csv("fashion_products.csv")
print(data.head())

   User ID  Product ID Product Name   Brand         Category  Price    Rating  \
0       19           1        Dress  Adidas    Men's Fashion     40  1.043159   
1       97           2        Shoes     H&M  Women's Fashion     82  4.026416   
2       25           3        Dress  Adidas  Women's Fashion     44  3.337938   
3       57           4        Shoes    Zara    Men's Fashion     23  1.049523   
4       79           5      T-shirt  Adidas    Men's Fashion     79  4.302773   

    Color Size  
0   Black   XL  
1   Black    L  
2  Yellow   XL  
3   White    S  
4   Black    M  


In [48]:
# Insights about it
# 1000 rows

data.describe()

Unnamed: 0,User ID,Product ID,Price,Rating
count,1000.0,1000.0,1000.0,1000.0
mean,50.419,500.5,55.785,2.993135
std,28.78131,288.819436,26.291748,1.153185
min,1.0,1.0,10.0,1.000967
25%,26.0,250.75,33.0,1.992786
50%,50.0,500.5,57.0,2.984003
75%,75.0,750.25,78.25,3.985084
max,100.0,1000.0,100.0,4.987964


In [49]:
# Checking if there's any null values

null_sum_by_column = data.isnull().sum()
print(null_sum_by_column)

User ID         0
Product ID      0
Product Name    0
Brand           0
Category        0
Price           0
Rating          0
Color           0
Size            0
dtype: int64


## Recommendation System

1) First Approach: Content Based Filtering

Basically we filter which columns we want and then choosing which text is the most used / popular among our columns

In [50]:
# 1) checking the columns we want to use
# 2) Use TF-IDF vectorizer to convert content into a matrix of TF-IDF features  (Term Frequency-Inverse Document Frequency)

#1
content_df = data[['Product ID', 'Product Name', 'Brand', 
                   'Category', 'Color', 'Size']]
content_df['Content'] = content_df.apply(lambda row: ' '.join(row.dropna().astype(str)), axis=1)

#2
tfidf_vectorizer = TfidfVectorizer()
content_matrix = tfidf_vectorizer.fit_transform(content_df['Content'])

content_similarity = linear_kernel(content_matrix, content_matrix)

reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(data[['User ID', 
                                  'Product ID', 
                                  'Rating']], reader)

def get_content_based_recommendations(product_id, top_n):
    # Obtener el índice del producto en el dataframe
    index = content_df[content_df['Product ID'] == product_id].index[0]
    
    # Obtener la similitud de contenido para el producto específico
    similarity_scores = content_similarity[index]
    
    # Obtener los índices de productos similares ordenados por similitud (excluyendo el producto actual)
    similar_indices = similarity_scores.argsort()[::-1][1:top_n + 1]
    
    # Obtener los IDs de los productos similares recomendados
    recommendations = content_df.loc[similar_indices, 'Product ID'].values
    
    return recommendations

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  content_df['Content'] = content_df.apply(lambda row: ' '.join(row.dropna().astype(str)), axis=1)


2) Second Approach: Collaborative Filtering

In [51]:
algo = SVD()
trainset = data.build_full_trainset()
algo.fit(trainset)


def get_collaborative_filtering_recommendations(user_id, top_n):
    testset = trainset.build_anti_testset()
    testset = filter(lambda x: x[0] == user_id, testset)
    predictions = algo.test(testset)

    # Ordenar las predicciones por estimación de calificación y precio
    predictions.sort(key=lambda x: (x.est, x.iid), reverse=True)

    # Obtener los IDs de los productos recomendados
    recommendations = [prediction.iid for prediction in predictions[:top_n]]
    
    return recommendations

3) The Hybrid Approach

In [52]:
def get_hybrid_recommendations(user_id, product_id, top_n):
    # Obtener recomendaciones basadas en contenido para el producto específico
    content_based_recommendations = get_content_based_recommendations(product_id, top_n)
    
    # Obtener recomendaciones basadas en filtrado colaborativo para el usuario específico
    collaborative_filtering_recommendations = get_collaborative_filtering_recommendations(user_id, top_n)
    
    # Combinar las recomendaciones de ambos enfoques en una lista única
    hybrid_recommendations = list(set(content_based_recommendations + collaborative_filtering_recommendations))
    
    # Retornar las primeras top_n recomendaciones combinadas
    return hybrid_recommendations[:top_n]


In [None]:
#The Hybrid method uses the last two methods, it makes more personalized recommendations.

In [53]:
user_id = 25
product_id = 3
top_n = 3

recommendations = get_hybrid_recommendations(user_id, product_id, top_n)

print("Hybrid Recommendations:")
for i, recommendation in enumerate(recommendations):
    print(f"{i + 1}. Product ID: {recommendation}")


Hybrid Recommendations:
1. Product ID: 652
2. Product ID: 701
3. Product ID: 30


In [45]:
user_id = 3
product_id = 11
top_n = 5
recommendations = get_hybrid_recommendations(user_id, product_id, top_n)

print(f"Hybrid Recommendations for User {user_id} based on Product {product_id}:")
for i, recommendation in enumerate(recommendations):
    print(f"{i + 1}. Product ID: {recommendation}")

Hybrid Recommendations for User 3 based on Product 11:
1. Product ID: 944
2. Product ID: 1746
3. Product ID: 149
4. Product ID: 1014
5. Product ID: 1591
