In [1]:
from pymongo import MongoClient
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from bson import ObjectId
import random
import json

# Connect to MongoDB
client = MongoClient('mongodb://localhost:27017')
db = client['collaborativefilteringtest']  # Database name
users_collection = db['users']
posts_collection = db['posts']
likes_collection = db['likes']


In [2]:
# Fetch data from MongoDB
users = list(users_collection.find())
posts = list(posts_collection.find())
likes = list(likes_collection.find())


# Create mappings for user and post IDs (using string IDs)
user_ids = {str(user['_id']): idx for idx, user in enumerate(users)}
post_ids = {str(post['_id']): idx for idx, post in enumerate(posts)}

print('post_id: ', post_ids , 'user_ids: ', user_ids)

# Create rating matrix
num_users = len(users)
num_posts = len(posts)
ratings_matrix = np.zeros((num_users, num_posts))

for like in likes:
    user_id_str = str(like['user_id'])
    post_id_str = str(like['post_id'])
    
    # Debug output: Check if the ID exists in the mappings
    if user_id_str not in user_ids:
        print(f"User ID {user_id_str} not found in user_ids mapping")
    if post_id_str not in post_ids:
        print(f"Post ID {post_id_str} not found in post_ids mapping")

    user_idx = user_ids[user_id_str]
    post_idx = post_ids[post_id_str]
    
    ratings_matrix[user_idx, post_idx] = 1
    print(ratings_matrix)


post_id:  {'669c0b941bfaacf011f85e5a': 0, '669c0b941bfaacf011f85e5b': 1, '669c0b941bfaacf011f85e5c': 2, '669c0b941bfaacf011f85e5d': 3, '669c0b941bfaacf011f85e5e': 4, '669c0b941bfaacf011f85e5f': 5, '669c0b941bfaacf011f85e60': 6, '669c0b941bfaacf011f85e61': 7, '669c0b941bfaacf011f85e62': 8, '669c0b941bfaacf011f85e63': 9, '669c0b941bfaacf011f85e64': 10, '669c0b941bfaacf011f85e65': 11, '669c0b941bfaacf011f85e66': 12, '669c0b941bfaacf011f85e67': 13, '669c0b941bfaacf011f85e68': 14, '669c0b941bfaacf011f85e69': 15, '669c0b941bfaacf011f85e6a': 16, '669c0b941bfaacf011f85e6b': 17, '669c0b941bfaacf011f85e6c': 18, '669c0b941bfaacf011f85e6d': 19, '669c8dd90ffb4db2ccf6ee31': 20, '669cf99b797b3370e3f5e5d8': 21, '669db8f27358a92546edc210': 22, '669de4b3fd44c25b32e1427d': 23, '669dee9c0181b9ae41b7ae7f': 24, '669deeae0181b9ae41b7ae81': 25} user_ids:  {'669c0b8e2ea43ac3b0c83bd2': 0, '669c0b8e2ea43ac3b0c83bd3': 1, '669c0b8e2ea43ac3b0c83bd4': 2, '669c0b8e2ea43ac3b0c83bd5': 3, '669c0b8e2ea43ac3b0c83bd6': 4, 

In [3]:

# Calculate cosine similarity between users
user_similarity = cosine_similarity(ratings_matrix)
print('------------------------------------------------------')
print('user similarity: ', user_similarity)
print('------------------------------------------------------')
# Function to recommend posts for a given user based on similar users' likes
def recommend_posts(user_idx, num_recommendations=5):
    sim_scores = user_similarity[user_idx]
    print('sim_scores: ', sim_scores)
    print('------------------------------------------------------')
    
    similar_users = np.argsort(sim_scores)[::-1][1:]  # Exclude the user itself
    print('similar users: ', similar_users)
    print('------------------------------------------------------')
    
    post_scores = np.zeros(num_posts)
   
    for similar_user in similar_users:
        post_scores += sim_scores[similar_user] * ratings_matrix[similar_user]

    print('post score: ', post_scores)
    post_scores[ratings_matrix[user_idx] > 0] = 0
    
    print('------------------------------------------------------')
    recommended_post_indices = np.argsort(post_scores)[::-1][:num_recommendations]
    return recommended_post_indices

recommend_posts(user_idx, num_recommendations=10)

------------------------------------------------------
user similarity:  [[0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.        ]
 [0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.        ]
 [0.         0.         1.         0.40824829 0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.5
  0.         0.         0.         0.39223227]
 [0.         0.         0.40824829 1.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.57735027 0.         0.         0.         0.40824829
  0.         0.         0.         0.48038446]
 [0.      

array([14, 25, 23, 22, 21, 20, 19, 18, 17, 24])

In [4]:

# Generate recommendations for all users and save to a dictionary
recommendations = {}
for user_id_str, user_idx in user_ids.items():
    recommended_posts = recommend_posts(user_idx, num_recommendations=10)
    print('------------------------------------------------------')
    print('recommended posts: ', recommended_posts)
    print('------------------------------------------------------')
    recommended_post_ids = [list(post_ids.keys())[list(post_ids.values()).index(post_idx)] for post_idx in recommended_posts]
    recommendations[user_id_str] = recommended_post_ids

# Save recommendations to a JSON file
with open('recommendations.json', 'w') as f:
    json.dump(recommendations, f)

sim_scores:  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
------------------------------------------------------
similar users:  [20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0]
------------------------------------------------------
post score:  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0.]
------------------------------------------------------
------------------------------------------------------
recommended posts:  [25 24 23 22 21 20 19 18 17 16]
------------------------------------------------------
sim_scores:  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
------------------------------------------------------
similar users:  [20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0]
------------------------------------------------------
post score:  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0.]
---------------------------------------------------