In [2]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MultiLabelBinarizer

# Your data: each sublist is a user’s interacted items
data = [
    ["a", "b", "c"],  # User 0
    ["a", "b"],       # User 1
    ["d", "e"]        # User 2
]

# Create User-Item Matrix
mlb = MultiLabelBinarizer()
user_item_matrix = pd.DataFrame(mlb.fit_transform(data), columns=mlb.classes_)

print("User-Item Matrix:")
print(user_item_matrix)

# Compute Item-Item Similarity (cosine)
item_similarity = pd.DataFrame(
    cosine_similarity(user_item_matrix.T),
    index=user_item_matrix.columns,
    columns=user_item_matrix.columns
)

print("\nItem-Item Similarity:")
print(item_similarity)

# Recommend Items for a given user (e.g. User 1)
user_id = 1
user_vector = user_item_matrix.iloc[user_id]
scores = item_similarity.dot(user_vector)
scores = scores[~user_vector.astype(bool)]  # Remove already seen items

recommended_items = scores.sort_values(ascending=False)
print(f"\nRecommended items for User {user_id}:")
print(recommended_items)

User-Item Matrix:
   a  b  c  d  e
0  1  1  1  0  0
1  1  1  0  0  0
2  0  0  0  1  1

Item-Item Similarity:
          a         b         c    d    e
a  1.000000  1.000000  0.707107  0.0  0.0
b  1.000000  1.000000  0.707107  0.0  0.0
c  0.707107  0.707107  1.000000  0.0  0.0
d  0.000000  0.000000  0.000000  1.0  1.0
e  0.000000  0.000000  0.000000  1.0  1.0

Recommended items for User 1:
c    1.414214
d    0.000000
e    0.000000
dtype: float64
