# Recommending Items Based on User Profiles

## Imports

In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt

## Loading Datasets

In [None]:
item_embeddings_df = pd.read_csv('../data/item_embeddings.csv')
user_profiles_df = pd.read_csv('../data/user_profiles.csv')

In [3]:
item_embeddings_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,374,375,376,377,378,379,380,381,382,383
0,-0.074143,0.06405,-0.009264,0.046263,-0.027863,-0.048036,0.06423,-0.072758,-0.083279,-0.023699,...,-0.032186,-0.009203,-0.015632,0.013018,0.038265,0.057547,0.033841,-0.040459,-0.005354,-0.016443
1,-0.053667,0.086153,0.008184,-0.027359,0.064028,-0.062513,0.064731,0.053492,-0.086663,0.04204,...,-0.039931,-0.055881,0.070485,0.079482,-0.065805,0.01045,0.083477,-0.020643,0.026802,0.023278
2,-0.103944,0.160174,0.006674,0.032914,0.023432,0.015533,0.124695,-0.012912,-0.071057,0.007715,...,-0.04839,-0.047802,-0.056081,0.034009,0.005964,0.015365,0.026713,-0.099851,-0.039444,0.01975
3,-0.068707,0.033529,0.007046,0.08301,0.072068,0.020052,0.061295,0.038851,0.006501,0.060401,...,-0.073263,-0.021329,-0.063976,0.042687,0.028866,0.015143,-0.02514,-0.103199,-0.039904,0.047723
4,-0.069275,0.071025,0.019378,0.039093,0.075728,-0.033218,0.122828,-0.019662,-0.06492,-0.04029,...,-0.048714,-0.067688,-0.049945,-0.00556,-0.028457,0.019639,-0.037464,-0.115767,-0.017989,0.000391


In [4]:
user_profiles_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,373,374,375,376,377,378,379,380,381,382
0,0.060143,0.036061,0.048108,0.008844,-0.007575,0.06545,-0.048654,-0.067773,0.014274,0.024337,...,-0.05487,-0.012844,-0.018181,0.041335,0.007615,0.011697,-0.027718,-0.063241,0.007207,-0.003882
1,0.038324,0.021305,-0.009668,0.062717,-0.027837,0.082639,0.013449,-0.052762,-0.003109,0.028908,...,0.002024,-0.060947,-0.035074,0.033858,0.003707,-0.009584,0.002526,-0.081802,-0.057961,-0.013604
2,0.063626,-0.022125,0.018907,0.037874,-0.007774,0.043303,0.007892,-0.044934,0.031136,0.045607,...,-0.054719,-0.027572,0.020879,0.054398,-0.066077,0.013572,0.070344,-0.072806,-0.012058,0.03805
3,0.078631,0.007365,0.026449,0.023867,0.000867,0.078875,0.040846,-0.059256,0.005375,-0.013123,...,-0.02719,-0.045285,-0.011149,0.047736,0.016887,0.021995,0.036113,-0.101998,-0.036493,0.005175
4,0.116301,0.017553,0.029933,0.044769,-0.029575,0.086199,-0.01863,-0.05432,0.015293,-0.012321,...,-0.020133,-0.030688,-0.024696,0.026634,0.004515,0.004056,-0.016762,-0.056608,-0.036605,-0.030182


In [10]:
interactions = pd.read_csv('../data/fashion_interactions.csv')
interactions.head()

Unnamed: 0,user_id,item_id,liked
0,0,0,1
1,0,8,1
2,0,15,1
3,0,18,1
4,0,36,1


In [16]:
users = pd.read_csv('../data/fashion_users.csv')
users.head()

Unnamed: 0,user_id,interests
0,0,"['boho', 'summer', 'feminine']"
1,1,"['streetwear', 'sneakers', 'urban']"
2,2,"['minimalist', 'neutrals', 'sustainable']"
3,3,"['vintage', 'fall', 'denim']"
4,4,"['formal', 'classic', 'blazer']"


In [20]:
items = pd.read_csv('../data/fashion_items.csv')
items.head()

Unnamed: 0,item_id,title,tags,category
0,0,Boho Summer Maxi Dress,"['boho', 'dress', 'summer']",dresses
1,1,Minimalist Linen Blouse,"['minimalist', 'blouse', 'neutrals']",tops
2,2,Vintage Denim Jacket,"['vintage', 'jacket', 'denim']",outerwear
3,3,Cozy Knit Sweater,"['cozy', 'sweater', 'fall']",tops
4,4,Streetwear Graphic Hoodie,"['streetwear', 'hoodie', 'urban']",outerwear


## Recommendation by Profile Similarity

In [26]:
def recommend_items(user_id, top_n=5):
    """
    Recommend items based on user profile similarity.
    
    Parameters:
    - user_id: ID of the user for whom to recommend items.
    - top_n: Number of top recommendations to return.
    
    Returns:
    - DataFrame of recommended items.
    """
    user_profile = user_profiles_df.loc[user_id]
    item_embeddings = item_embeddings_df.iloc[:, 1:].values
    
    # Calculate cosine similarity between user profile and item embeddings
    user_profile = user_profile.values.reshape(1, -1)  # Reshape to 2D array for cosine similarity
    similarities = cosine_similarity(user_profile, item_embeddings).flatten()

    # Remove the user's saved items from recommendations
    # Get saved items based on interactions data
    # This gives a list of indices of saved items
    saved_items = interactions[interactions['user_id'] == user_id]['item_id'].values
    # Remove saved items from the similarity scores
#    similarities[saved_items] = -np.inf  # Set saved items' similarity to -inf to exclude them

    # Get indices of top N similar items
    top_indices = np.argsort(similarities)[-top_n:][::-1]
    
    # Return recommended items from the actual fashion_items dataframe
    return items.iloc[top_indices]

Let's test this function on a sample user!

In [27]:
user = 0  # Example user ID
# Print user profile for reference
print("User Profile for User ID:", user)
print(users.loc[user])
# print the users saved items
print("Saved Items for User ID:", user)
print(items[interactions['user_id'] == user])
# And print the recommendations
recommended_items = recommend_items(user)
print("Recommended Items for User ID:", user)
print(recommended_items)

User Profile for User ID: 0
user_id                                   0
interests    ['boho', 'summer', 'feminine']
Name: 0, dtype: object
Saved Items for User ID: 0
   item_id                      title                                   tags  \
0        0     Boho Summer Maxi Dress            ['boho', 'dress', 'summer']   
1        1    Minimalist Linen Blouse   ['minimalist', 'blouse', 'neutrals']   
2        2       Vintage Denim Jacket         ['vintage', 'jacket', 'denim']   
3        3          Cozy Knit Sweater            ['cozy', 'sweater', 'fall']   
4        4  Streetwear Graphic Hoodie      ['streetwear', 'hoodie', 'urban']   
5        5       Sustainable Yoga Set  ['sustainable', 'athleisure', 'yoga']   

     category  
0     dresses  
1        tops  
2   outerwear  
3        tops  
4   outerwear  
5  activewear  
Recommended Items for User ID: 0
    item_id                    title  \
36       36  Floral Print Wrap Dress   
83       83  Elegant Silk Wrap Dress   
15      

  print(items[interactions['user_id'] == user])


Great, we recovered one of the original items the user saved! (Uncomment the line in the function above if recommending unseen items).

In [29]:
import random
user = random.choice(users['user_id'].values)  # Randomly select a user ID
# Print user profile for reference
print("User Profile for User ID:", user)
print(users.loc[user])
# print the users saved items
print("Saved Items for User ID:", user)
print(items[interactions['user_id'] == user])
# And print the recommendations
recommended_items = recommend_items(user)
print("Recommended Items for User ID:", user)
print(recommended_items)

User Profile for User ID: 10
user_id                                        10
interests    ['high-waisted', 'chic', 'trousers']
Name: 10, dtype: object
Saved Items for User ID: 10
    item_id                     title                                tags  \
50       50    Elegant Pearl Earrings    ['elegant', 'pearl', 'earrings']   
51       51  Sporty Athletic Tank Top  ['sporty', 'athletic', 'tank top']   
52       52   Casual Hoodie with Logo        ['casual', 'hoodie', 'logo']   

       category  
50  accessories  
51         tops  
52    outerwear  
Recommended Items for User ID: 10
    item_id                                   title  \
13       13          High-Waisted Wide Leg Trousers   
59       59                  Tailored Wool Trousers   
99       99  Tailored Wool Trousers with Cuffed Hem   
70       70                     Tailored Suit Pants   
17       17                    Chic Leather Handbag   

                                           tags     category  
13        

  print(items[interactions['user_id'] == user])


While we didn't recover any of the original pins in this example, I would argue that the recommendations actually fit the user's profile better than there saved items. It might be interesting in future to explore creating recommendations with the profile and current pins in case tastes changed after creating a profile. Let's try one more!

In [None]:
user = random.choice(users['user_id'].values)  # Randomly select a user ID
# Print user profile for reference
print("User Profile for User ID:", user)
print(users.loc[user])
# print the users saved items
print("Saved Items for User ID:", user)
print(items[interactions['user_id'] == user])
# And print the recommendations
recommended_items = recommend_items(user)
print("Recommended Items for User ID:", user)
print(recommended_items)

User Profile for User ID: 15
user_id                               15
interests    ['woven', 'tote', 'summer']
Name: 15, dtype: object
Saved Items for User ID: 15
    item_id                                           title  \
76       76              Casual Denim Overalls with Pockets   
77       77                Tailored Wool Blazer with Lining   
78       78      Sporty Athletic Skirt with Built-in Shorts   
79       79  Vintage-Inspired Leather Backpack with Buckles   
80       80              Casual Graphic T-Shirt with Slogan   
81       81           Retro Glasses with Pink Tinted Lenses   

                                               tags     category  
76          ['casual', 'denim overalls', 'pockets']      bottoms  
77            ['tailored', 'wool blazer', 'lining']    outerwear  
78  ['sporty', 'athletic skirt', 'built-in shorts']      bottoms  
79       ['vintage', 'leather backpack', 'buckles']  accessories  
80          ['casual', 'graphic t-shirt', 'slogan']         

  print(items[interactions['user_id'] == user])


In this case, the recommendations really leaned into the summer aspect of the user's interests. This might make sense in context of the tag breakdown though, because neither woven nor tote were in the top 20 tags from our EDA.

## Collaborative Filtering