In [17]:
import json
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from sklearn.neighbors import NearestNeighbors

In [18]:
# Load user interests from JSON (replace 'your_interests.json' with your JSON file)
with open('../data/users.json', 'r', encoding='utf-8') as json_file:
    user_data = json.load(json_file)

# Load item data from JSON (replace 'your_items.json' with your JSON file)
with open('../data/campaigns.json', 'r', encoding='utf-8') as json_file:
    items_data = json.load(json_file)

# Create a DataFrame for user interests
user_df = pd.DataFrame(user_data)

# Create a DataFrame for item data
items_df = pd.DataFrame(items_data)

print(user_df)
print(items_df)


   user_id               interests       liked
0        1  [hành động, adventure]  [102, 103]
1        2       [comedy, romance]  [102, 105]
2        3       [science fiction]       [104]
   item_id                  genres
0      101  [hành động, adventure]
1      102       [comedy, romance]
2      103  [hành động, adventure]
3      104       [science fiction]
4      105             [education]
5      106               [romance]
6      107             [adventure]
7      108       [comedy, romance]
8      109  [hành động, adventure]


In [19]:
# Filter data for the target user (replace with the target user's ID)
# target_user_id = 1
# target_user_interests = user_interests_df[user_interests_df['user_id'] == target_user_id]['interests'].values[0]

# print(target_user_interests)

In [20]:
# Create a TF-IDF vectorizer to convert item genres into numerical vectors
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
item_tfidf_matrix = tfidf_vectorizer.fit_transform(items_df['genres'].apply(lambda x: ' '.join(x)))

print(item_tfidf_matrix)

  (0, 0)	0.5298570392092922
  (0, 7)	0.5996880514068805
  (0, 4)	0.5996880514068805
  (1, 5)	0.656138284657041
  (1, 1)	0.7546406770160988
  (2, 0)	0.5298570392092922
  (2, 7)	0.5996880514068805
  (2, 4)	0.5996880514068805
  (3, 3)	0.7071067811865476
  (3, 6)	0.7071067811865476
  (4, 2)	1.0
  (5, 5)	1.0
  (6, 0)	1.0
  (7, 5)	0.656138284657041
  (7, 1)	0.7546406770160988
  (8, 0)	0.5298570392092922
  (8, 7)	0.5996880514068805
  (8, 4)	0.5996880514068805


In [21]:
# Compute the cosine similarity between item genres and the user's interests
# cosine_sim = linear_kernel(item_tfidf_matrix, tfidf_vectorizer.transform([target_user_interests]))

# Compute the cosine similarity between item genres
cosine_sim = linear_kernel(item_tfidf_matrix, item_tfidf_matrix)
print(cosine_sim)



[[1.         0.         1.         0.         0.         0.
  0.52985704 0.         1.        ]
 [0.         1.         0.         0.         0.         0.65613828
  0.         1.         0.        ]
 [1.         0.         1.         0.         0.         0.
  0.52985704 0.         1.        ]
 [0.         0.         0.         1.         0.         0.
  0.         0.         0.        ]
 [0.         0.         0.         0.         1.         0.
  0.         0.         0.        ]
 [0.         0.65613828 0.         0.         0.         1.
  0.         0.65613828 0.        ]
 [0.52985704 0.         0.52985704 0.         0.         0.
  1.         0.         0.52985704]
 [0.         1.         0.         0.         0.         0.65613828
  0.         1.         0.        ]
 [1.         0.         1.         0.         0.         0.
  0.52985704 0.         1.        ]]


In [22]:
# Get the indices of items sorted by their similarity scores
# item_scores = list(enumerate(cosine_sim.flatten()))
# item_scores = sorted(item_scores, key=lambda x: x[1], reverse=True)

# print(item_scores)

# Create a user profile based on interests
def get_user_profile(user_id):
    user_interests = user_df[user_df['user_id'] == user_id]['interests'].values[0]
    return tfidf_vectorizer.transform([' '.join(user_interests)])

# Calculate the number of samples (items)
n_samples = len(items_df)

# Determine the appropriate value for n_neighbors
n_neighbors = min(10, n_samples)

# Create a Nearest Neighbors model based on cosine similarity
nn_model = NearestNeighbors(n_neighbors=n_neighbors, metric='cosine', algorithm='brute')
nn_model.fit(item_tfidf_matrix)

In [23]:
# Extract the top N recommended items (you can change N as needed)
# N = 10  # Change N to the number of recommendations you want
# top_n_recommendations = item_scores[:N]

# Function to get item recommendations for a user
def get_item_recommendations(user_id):
    user_profile = get_user_profile(user_id)
    
    # Find similar items based on user's interests
    item_indices = nn_model.kneighbors(user_profile, n_neighbors=n_neighbors)[1][0]
    
    # Get the liked items of the user
    liked_items = user_df[user_df['user_id'] == user_id]['liked'].values[0]
    
    # Filter out items that the user has already liked
    # recommended_items = [items_df.iloc[idx] for idx in item_indices if items_df.iloc[idx]['item_id'] not in liked_items]
    recommended_items = [items_data[idx] for idx in item_indices if items_data[idx]['item_id'] not in liked_items]

    
    return recommended_items

In [24]:
# Get the item IDs of the top recommended items
# recommended_item_ids = [items_df['item_id'].iloc[i[0]] for i in top_n_recommendations]

# Example: Get recommendations for a user (replace with the target user ID)
target_user_id = 1
recommendations = get_item_recommendations(target_user_id)

print(recommendations)


[{'item_id': 101, 'genres': ['hành động', 'adventure']}, {'item_id': 109, 'genres': ['hành động', 'adventure']}, {'item_id': 107, 'genres': ['adventure']}, {'item_id': 104, 'genres': ['science fiction']}, {'item_id': 105, 'genres': ['education']}, {'item_id': 106, 'genres': ['romance']}, {'item_id': 108, 'genres': ['comedy', 'romance']}]


In [25]:
# Print recommended item IDs
# print("Recommended Item IDs:", recommended_item_ids)

# Print recommended items
print("Recommended Items:")
for item in recommendations:
    print(f"Item ID: {item['item_id']}, Genres: {item['genres']}")


Recommended Items:
Item ID: 101, Genres: ['hành động', 'adventure']
Item ID: 109, Genres: ['hành động', 'adventure']
Item ID: 107, Genres: ['adventure']
Item ID: 104, Genres: ['science fiction']
Item ID: 105, Genres: ['education']
Item ID: 106, Genres: ['romance']
Item ID: 108, Genres: ['comedy', 'romance']
