In [100]:
import json
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import precision_score

In [90]:
# Load user interests from JSON (replace 'your_interests.json' with your JSON file)
with open('../data/users.json', 'r', encoding='utf-8') as json_file:
    user_data = json.load(json_file)

# Load item data from JSON (replace 'your_items.json' with your JSON file)
with open('../data/campaigns.json', 'r', encoding='utf-8') as json_file:
    items_data = json.load(json_file)

# Create a DataFrame for user interests
user_df = pd.DataFrame(user_data)
print(user_df)

# Create a DataFrame for item data
items_df = pd.DataFrame(items_data)


                        _id        orgName  \
0  64e807313dae5c7f6bf58b76   GlobalGiving   
1  650a5c3a0057dfad70f707a9  Đại Nam Group   

                                               about  \
0  Quỹ Vì Tầm Vóc Việt (VSF) là tổ chức phi lợi n...   
1                                                NaN   

                                               image  \
0  https://files.globalgiving.org/pfil/organ/189/...   
1  https://dainambpo.vn/wp-content/uploads/2022/0...   

                                            location  \
0                                   Ha Noi, Viet Nam   
1  292 Ung Van Khiem, P.25, Binh Thanh, Ho Chi Mi...   

                           website            fb       twitter  \
0   https:/​/​www.globalgiving.org  GlobalGiving  GlobalGiving   
1            https://dainambpo.vn/     DaiNamBPO     DaiNamBPO   

                    email password           noCampaign          slug  \
0  globalgiving@gmail.com   123456  {'$numberInt': '3'}  globalgiving   
1     dai

In [91]:
# Filter data for the target user (replace with the target user's ID)
# target_user_id = 1
# target_user_interests = user_interests_df[user_interests_df['user_id'] == target_user_id]['interests'].values[0]

# print(target_user_interests)

In [92]:
# Create a TF-IDF vectorizer to convert item genres into numerical vectors
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
item_tfidf_matrix = tfidf_vectorizer.fit_transform(items_df['genres'].apply(lambda x: ' '.join(x)))

print(item_tfidf_matrix)

  (0, 1)	1.0
  (1, 0)	1.0
  (2, 0)	1.0
  (3, 0)	0.6292275146695526
  (3, 1)	0.7772211620785797


In [93]:
# Compute the cosine similarity between item genres and the user's interests
# cosine_sim = linear_kernel(item_tfidf_matrix, tfidf_vectorizer.transform([target_user_interests]))

# Compute the cosine similarity between item genres
cosine_sim = linear_kernel(item_tfidf_matrix, item_tfidf_matrix)
print(cosine_sim)



[[1.         0.         0.         0.77722116]
 [0.         1.         1.         0.62922751]
 [0.         1.         1.         0.62922751]
 [0.77722116 0.62922751 0.62922751 1.        ]]


In [94]:
# Get the indices of items sorted by their similarity scores
# item_scores = list(enumerate(cosine_sim.flatten()))
# item_scores = sorted(item_scores, key=lambda x: x[1], reverse=True)

# print(item_scores)

# Create a user profile based on interests
def get_user_profile(user_id):
    user_interests = user_df[user_df['_id'] == user_id]['interests'].values[0]
    # user_visited   = user_interests_df[user_interests_df['_id'] == user_id]['visited'].values[0]
    # items_visited  = [item for item in items_df if item["id"] in user_visited]
    # items_genres   = [item["name"] for item in result_items]
    # consume_data = user_interests + user_visited

    return tfidf_vectorizer.transform([' '.join(user_interests)])

# Calculate the number of samples (items)
n_samples = len(items_df)

# Determine the appropriate value for n_neighbors
n_neighbors = min(10, n_samples)

# Create a Nearest Neighbors model based on cosine similarity
nn_model = NearestNeighbors(n_neighbors=n_neighbors, metric='cosine', algorithm='brute')
nn_model.fit(item_tfidf_matrix)

In [95]:
# Extract the top N recommended items (you can change N as needed)
# N = 10  # Change N to the number of recommendations you want
# top_n_recommendations = item_scores[:N]

# Function to get item recommendations for a user
def get_item_recommendations(user_id):
    user_profile = get_user_profile(user_id)
    
    # Find similar items based on user's interests
    item_indices = nn_model.kneighbors(user_profile, n_neighbors=n_neighbors)[1][0]
    
    # Get the liked items of the user
    liked_items = user_df[user_df['_id'] == user_id]['liked'].values[0]
    
    # Filter out items that the user has already liked
    recommended_items = [items_data[idx] for idx in item_indices if items_data[idx]['_id'] not in liked_items]
    
    return recommended_items

In [98]:
# Get the item IDs of the top recommended items
# recommended_item_ids = [items_df['item_id'].iloc[i[0]] for i in top_n_recommendations]

# Example: Get recommendations for a user (replace with the target user ID)
target_user_id = '650a5c3a0057dfad70f707a9'
recommendations = get_item_recommendations(target_user_id)

print(recommendations)


[{'_id': '6598d9ddd9792d88b95cba98', 'ownerAddress': '0xa73B10dC969a376cF1F140e4E2C2ccea1b6d86eE', 'createdBy': {'$oid': '64e807313dae5c7f6bf58b76'}, 'title': 'Phẫu thuật khuyết tật vận động miễn phí cho bệnh nhi nghèo', 'description': 'Theo tài liệu hướng dẫn “Phát hiện sớm – can thiệp sớm khuyết tật trẻ em” do Bộ Y tế vừa ban hành năm 2023,: loại khuyết tật phổ biến nhất ở trẻ em trong điều tra tại cộng đồng là khuyết tật về vận động, chiếm 22,4% tổng số trẻ khuyết tật.', 'content': '<p>Khuyết tật vận động c&oacute; thể do nhiều nguy&ecirc;n nh&acirc;n g&acirc;y ra, trong đ&oacute; c&oacute; loại h&igrave;nh do bẩm sinh v&agrave; do tai nạn. Trong đ&oacute;, c&oacute; những loại h&igrave;nh khuyết tật vận động nhỏ, phổ biến c&oacute; thể kể đến bao gồm: hiện tượng co g&acirc;n g&oacute;t ch&acirc;n, d&iacute;nh ng&oacute;n ch&acirc;n, ng&oacute;n tay, hội chứng b&agrave;n ch&acirc;n bẹt, đứt cơ/g&acirc;n, v. v.</p>\n<p>Đối với c&aacute;c trường hợp n&agrave;y, phẫu thuật l&agrave; ph

In [99]:
# Print recommended item IDs
# print("Recommended Item IDs:", recommended_item_ids)

# Print recommended items
print("Recommended Items:")
for item in recommendations:
    print(f"Item ID: {item['_id']}, Genres: {item['genres']}")


Recommended Items:
Item ID: 6598d9ddd9792d88b95cba98, Genres: ['education']
Item ID: 6599425fe3e75cddc1a4884b, Genres: ['community', 'education']
Item ID: 6598e133c4ba9e96ba6fc5d6, Genres: ['community']
Item ID: 6598e5ce45b53ae37c9607c4, Genres: ['community']


In [103]:
#Danh gia do chinh xac cua mo hinh
# Trích xuất nhãn thực tế và nhãn dự đoán từ dữ liệu
items_id = [item['_id'] for item in items_data]
recommended_id = [item['_id'] for item in recommendations]
print(recommended_id)

# actual_labels = data['actual_labels']
# predicted_labels = data['predicted_labels']

# Tính precision
# precision = precision_score(actual_labels, predicted_labels)

# print(f'Precision: {precision}')

['6598d9ddd9792d88b95cba98', '6599425fe3e75cddc1a4884b', '6598e133c4ba9e96ba6fc5d6', '6598e5ce45b53ae37c9607c4']
