In [61]:
import json
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import precision_score
from sklearn.metrics.pairwise import cosine_similarity

In [62]:
# Load user interests from JSON (replace 'your_interests.json' with your JSON file)
with open('../data/users.json', 'r', encoding='utf-8') as json_file:
    user_data = json.load(json_file)

# Load item data from JSON (replace 'your_items.json' with your JSON file)
with open('../data/campaigns.json', 'r', encoding='utf-8') as json_file:
    items_data = json.load(json_file)

with open('../data/interactions.json', 'r', encoding='utf-8') as json_file:
    interaction_data = json.load(json_file)

user_df = pd.DataFrame(user_data)
items_df = pd.DataFrame(items_data)
interactions_df = pd.DataFrame(interaction_data)


In [63]:
# Merge order and order_detail to get purchase history
like_history = pd.merge(interactions_df[interactions_df['like'] == 1], items_df, left_on='item_id', right_on='_id', how='inner')[['user_id', 'item_id']]

# Create a user-item matrix (binary representation)
user_item_matrix = like_history.pivot_table(index='user_id', columns='item_id', aggfunc='size', fill_value=0)

# Calculate cosine similarity between users
user_similarity = cosine_similarity(user_item_matrix)

# Map actual user IDs to a range of integers
user_id_to_index = {user_id: index for index, user_id in enumerate(user_item_matrix.index)}

# Function to get similar users
def get_similar_users(user_id, threshold=0.2):
    if user_id not in user_id_to_index:
        return []

    user_index = user_id_to_index[user_id]
    similar_users = user_similarity[user_index]
    similar_users = [(i, score) for i, score in enumerate(similar_users) if i != user_index and score >= threshold]
    similar_users.sort(key=lambda x: x[1], reverse=True)
    return similar_users


In [64]:
# Function to recommend products based on similar users
def recommend_projects_by_user(user_id, num_recommendations=10, threshold=0.2):
    similar_users = get_similar_users(user_id, threshold)

    user_likes = set(like_history[like_history['user_id'] == user_id]['item_id'])

    recommendations = []
    for similar_user_index, similarity_score in similar_users:
        similar_user_id = user_item_matrix.index[similar_user_index]
        similar_user_likes = set(like_history[like_history['user_id'] == similar_user_id]['item_id'])
        new_items = similar_user_likes - user_likes
        recommendations.extend(new_items)

        if len(recommendations) >= num_recommendations:
            break

    return recommendations[:num_recommendations]

In [65]:
# Create a TF-IDF vectorizer to convert item genres into numerical vectors
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
item_tfidf_matrix = tfidf_vectorizer.fit_transform(items_df['genres'].apply(lambda x: ' '.join(x)))

# Compute the cosine similarity between item genres
cosine_sim = linear_kernel(item_tfidf_matrix, item_tfidf_matrix)

In [66]:
def get_user_profile(user_id):
    user_interests = user_df[user_df['_id'] == user_id]['interests'].values[0]

    return tfidf_vectorizer.transform([' '.join(user_interests)])

# Calculate the number of samples (items)
n_samples = len(items_df)

# Determine the appropriate value for n_neighbors
n_neighbors = min(10, n_samples)

# Create a Nearest Neighbors model based on cosine similarity
nn_model = NearestNeighbors(n_neighbors=n_neighbors, metric='cosine', algorithm='brute')
nn_model.fit(item_tfidf_matrix)

In [67]:
# Function to get item recommendations for a user
def get_recommend_projects_by_interests(user_id):
    user_profile = get_user_profile(user_id)
    
    # Find similar items based on user's interests
    item_indices = nn_model.kneighbors(user_profile, n_neighbors=n_neighbors)[1][0]
    
    # Filter out items that the user has already liked
    recommended_items = [items_data[idx]['_id'] for idx in item_indices if items_data[idx]['_id'] ]
    
    return recommended_items

In [68]:
def get_hybrid_recommendations(user_id, max=10):

    data1 = recommend_projects_by_user(user_id)
    data2 = get_recommend_projects_by_interests(user_id)

    recommend_ids = np.unique(np.concatenate((data1, data2)))

    # Get the liked items of the user
    liked_items = interactions_df[(interactions_df['user_id'] == user_id)  & (interactions_df['like'] == 1)]['item_id'].tolist()

    # Lấy thông tin chi tiết của các mục đề xuất sau khi loại bỏ các mục đã thích
    recommended_items = [
        item for item in items_data if item['_id'] in recommend_ids and item['_id'] not in liked_items
    ]

    return recommended_items

In [69]:
target_user_id = '650a5c3a0057dfad70f707a9'
recommendations = get_hybrid_recommendations(target_user_id)
# Print recommended items
# print("Recommended Items:", recommendations)
for item in recommendations:
    print(f"Item ID: {item['_id']}, Genres: {item['genres']}")

Item ID: 6598d9ddd9792d88b95cba98, Genres: ['education']
Item ID: 6598e133c4ba9e96ba6fc5d6, Genres: ['community']
Item ID: 6598e5ce45b53ae37c9607c4, Genres: ['community']
Item ID: 659960c5a047c1353271e6eb, Genres: ['education']
Item ID: 65996448a047c1353271e6fd, Genres: ['medical', 'child']
Item ID: 6599f81451541653eb5d4cba, Genres: ['education']
Item ID: 6599f92551541653eb5d4cc1, Genres: ['technology', 'community']
