In [107]:
import pandas as pd
interaction_df = pd.read_csv("../Data/merged_user_interaction_data.csv")
interaction_df.head(2)


Unnamed: 0,user_id,product_id,interaction_type,score,age,skin_type
0,user_001,prod_671,viewed,1,37,Combination
1,user_001,prod_855,viewed,1,37,Combination


In [108]:
user_item_matrix = interaction_df.pivot_table(
    index='user_id',
    columns='product_id',
    values='score',
    aggfunc='sum'
).fillna(0)

user_item_matrix.head()


product_id,prod_0,prod_1,prod_10,prod_100,prod_1000,prod_1001,prod_1002,prod_1003,prod_1004,prod_1005,...,prod_990,prod_991,prod_992,prod_993,prod_994,prod_995,prod_996,prod_997,prod_998,prod_999
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
user_001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
user_002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
user_003,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
user_004,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
user_005,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [109]:
from sklearn.metrics.pairwise import cosine_similarity
#for item-item
# Transpose 
item_similarity = cosine_similarity(user_item_matrix.T)

item_sim_df = pd.DataFrame(item_similarity, index=user_item_matrix.columns, columns=user_item_matrix.columns)


In [110]:
# User-User Cosine Similarity
user_similarity = cosine_similarity(user_item_matrix)
user_sim_df = pd.DataFrame(user_similarity, index=user_item_matrix.index, columns=user_item_matrix.index)


In [111]:
def recommend_user_user(user_id, user_sim_df, user_item_matrix, top_n=15):
    similar_users = user_sim_df[user_id].drop(user_id)
    top_users = similar_users.sort_values(ascending=False).head(5).index
    mean_scores = user_item_matrix.loc[top_users].mean()
    already_interacted = user_item_matrix.loc[user_id] > 0
    mean_scores[already_interacted] = 0
    return mean_scores.sort_values(ascending=False).head(top_n).index


In [112]:
def recommend_item_item(user_id, user_item_matrix, item_sim_df, top_n=15):
    user_vector = user_item_matrix.loc[user_id].values
    scores = user_vector @ item_sim_df
    interacted = user_item_matrix.loc[user_id] > 0
    scores[interacted] = 0
    top_items = scores.argsort()[-top_n:][::-1]
    return user_item_matrix.columns[top_items]



In [113]:
def hybrid_recommendation(user_id, user_item_matrix, user_sim_df, item_sim_df, top_n=5):
    user_based = recommend_user_user(user_id, user_sim_df, user_item_matrix, top_n * 2)
    item_based = recommend_item_item(user_id, user_item_matrix, item_sim_df, top_n * 2)
    
    combined = pd.Series(user_based.tolist() + item_based.tolist()).value_counts()

    return combined.head(top_n).index.tolist()


In [114]:
test = interaction_df.groupby('user_id').apply(lambda x: x.sample(1, random_state=42)).reset_index(drop=True)
train = pd.concat([interaction_df, test]).drop_duplicates(keep=False)


  test = interaction_df.groupby('user_id').apply(lambda x: x.sample(1, random_state=42)).reset_index(drop=True)


In [115]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# --- 1. Build Interaction Matrix ---
def build_interaction_matrix(df, user_col='user_id', item_col='product_id', score_col='score'):
    # Create mapping from IDs to matrix indices
    user_map = {u: i for i, u in enumerate(df[user_col].unique())}
    item_map = {p: i for i, p in enumerate(df[item_col].unique())}

    df = df.copy()
    df['user_idx'] = df[user_col].map(user_map)
    df['item_idx'] = df[item_col].map(item_map)

    matrix = pd.pivot_table(
        df,
        index='user_idx',
        columns='item_idx',
        values=score_col,
        aggfunc='sum'
    ).fillna(0)

    return matrix, user_map, item_map

# --- 2. Precision@K and Recall@K ---
def precision_at_k(recommended, actual, k):
    recommended = recommended[:k]
    return len(set(recommended) & set(actual)) / k if k > 0 else 0

def recall_at_k(recommended, actual, k):
    recommended = recommended[:k]
    return len(set(recommended) & set(actual)) / len(actual) if actual else 0

# --- 3. User-Based Scores ---
def user_based_scores(user_idx, interaction_matrix, user_sim_df):
    sim_vector = user_sim_df.loc[user_idx].values
    scores = sim_vector @ interaction_matrix.values
    interacted = interaction_matrix.loc[user_idx] > 0
    scores[interacted] = 0
    return scores

# --- 4. Item-Based Scores ---
def item_based_scores(user_idx, interaction_matrix, item_sim_df):
    user_vector = interaction_matrix.loc[user_idx].values
    scores = user_vector @ item_sim_df
    interacted = interaction_matrix.loc[user_idx] > 0
    scores[interacted] = 0
    return scores

# --- 5. Hybrid Recommendation ---
def hybrid_recommendations(user_idx, interaction_matrix, user_sim_df, item_sim_df, alpha=0.5, top_n=5):
    user_scores = user_based_scores(user_idx, interaction_matrix, user_sim_df)
    item_scores = item_based_scores(user_idx, interaction_matrix, item_sim_df)

    # Normalize scores using numpy.ptp for compatibility with NumPy 2.0+
    user_scores_norm = (user_scores - user_scores.min()) / (np.ptp(user_scores) + 1e-8)
    item_scores_norm = (item_scores - item_scores.min()) / (np.ptp(item_scores) + 1e-8)

    hybrid_scores = alpha * user_scores_norm + (1 - alpha) * item_scores_norm

    recommended_indices = np.argsort(hybrid_scores)[-top_n:][::-1]
    return interaction_matrix.columns[recommended_indices]

# ---------- Main Evaluation Workflow ----------

# Assume you have done train/test split on your original DataFrame `df`
# Variables `train` and `test` are your split DataFrames

# Build interaction matrices for training and test
train_interaction_matrix, train_user_map, train_item_map = build_interaction_matrix(train)
test_interaction_matrix, _, _ = build_interaction_matrix(test)

# Compute similarity matrices from training data
user_sim = cosine_similarity(train_interaction_matrix)
user_sim_df = pd.DataFrame(user_sim, index=train_interaction_matrix.index, columns=train_interaction_matrix.index)

item_sim = cosine_similarity(train_interaction_matrix.T)
item_sim_df = pd.DataFrame(item_sim, index=train_interaction_matrix.columns, columns=train_interaction_matrix.columns)

# Reverse maps: index to user/product IDs
inverse_train_user_map = {v: k for k, v in train_user_map.items()}
inverse_train_item_map = {v: k for k, v in train_item_map.items()}

precisions, recalls = [], []
K = 5  # Top-K for evaluation

for user_id in test['user_id'].unique():
    if user_id not in train_user_map:
        # Skip cold-start users; or handle them with a different logic if you want
        continue
    user_idx = train_user_map[user_id]

    # Generate recommendations (top 2*K for better recall coverage)
    recommended_item_indices = hybrid_recommendations(
        user_idx,
        train_interaction_matrix,
        user_sim_df,
        item_sim_df,
        alpha=0.5,
        top_n=K*2
    )

    # Map indices back to actual product IDs
    recommended_products = [inverse_train_item_map[idx] for idx in recommended_item_indices]

    # Actual products interacted in test set by user
    actual_products = test[test['user_id'] == user_id]['product_id'].tolist()

    precisions.append(precision_at_k(recommended_products, actual_products, K))
    recalls.append(recall_at_k(recommended_products, actual_products, K))

print(f"Average Precision@{K}: {np.mean(precisions):.4f}")
print(f"Average Recall@{K}: {np.mean(recalls):.4f}")


Average Precision@5: 0.0004
Average Recall@5: 0.0020
