In [15]:
from faker import Faker
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import random

# Step 1: Generate random user data

fake = Faker()

# List of predefined skills
skills_list = ['UI/UX', 'C++ Programming', 'Algorithms', 'Data structures']


# Generate random user data
users = []
for _ in range(10):
    email = fake.email()
    skills = random.sample(skills_list, 4)  # Random skills from the predefined list
    users.append((email, skills))

# Step 2: Generate random project data

# Generate random project data
projects = []
for _ in range(25):
    project_id = fake.uuid4()
    skills = random.sample(skills_list, 4)  # Random skills from the predefined list
    projects.append((project_id, skills))




In [16]:
users

[('umunoz@example.org',
  ['Data structures', 'C++ Programming', 'UI/UX', 'Algorithms']),
 ('scott17@example.org',
  ['C++ Programming', 'Algorithms', 'UI/UX', 'Data structures']),
 ('natalie19@example.com',
  ['Algorithms', 'UI/UX', 'Data structures', 'C++ Programming']),
 ('steven79@example.org',
  ['UI/UX', 'Data structures', 'Algorithms', 'C++ Programming']),
 ('fgeorge@example.org',
  ['Data structures', 'C++ Programming', 'Algorithms', 'UI/UX']),
 ('powelllaura@example.net',
  ['Algorithms', 'UI/UX', 'Data structures', 'C++ Programming']),
 ('herrerajohn@example.net',
  ['C++ Programming', 'UI/UX', 'Algorithms', 'Data structures']),
 ('fhall@example.org',
  ['UI/UX', 'C++ Programming', 'Data structures', 'Algorithms']),
 ('marc02@example.org',
  ['Data structures', 'Algorithms', 'UI/UX', 'C++ Programming']),
 ('nicole20@example.net',
  ['Data structures', 'UI/UX', 'C++ Programming', 'Algorithms'])]

In [17]:
projects

[('b5e96689-c891-49f7-a95e-040f80fb2db2',
  ['UI/UX', 'C++ Programming', 'Algorithms', 'Data structures']),
 ('53dffbd2-8555-4298-8cbe-707f86b235db',
  ['C++ Programming', 'Data structures', 'UI/UX', 'Algorithms']),
 ('948a03c1-f827-40c3-9a68-54eb6c7d3a52',
  ['C++ Programming', 'UI/UX', 'Algorithms', 'Data structures']),
 ('84a1baa9-fecd-4073-8708-14394c1e52f3',
  ['Algorithms', 'UI/UX', 'C++ Programming', 'Data structures']),
 ('ea6d9f9f-92f3-4682-8182-ddd5555ae4a8',
  ['Algorithms', 'C++ Programming', 'UI/UX', 'Data structures']),
 ('9cf78974-cada-4a9d-ae4f-7f4961deeadd',
  ['Algorithms', 'UI/UX', 'Data structures', 'C++ Programming']),
 ('b1a3b242-dfbf-4bb2-be59-0b8ca8e1d9c1',
  ['C++ Programming', 'Data structures', 'UI/UX', 'Algorithms']),
 ('9af6f0a8-e4ba-49f9-a6a2-195fb6d5d83e',
  ['UI/UX', 'Data structures', 'Algorithms', 'C++ Programming']),
 ('fbc14302-f25c-4f74-a34d-f1f685db1dc6',
  ['Data structures', 'C++ Programming', 'UI/UX', 'Algorithms']),
 ('d02f3545-b4c3-4cc1-94bb-7

In [18]:
# Step 3: Preprocess data and compute cosine similarity matrix

# Preprocess skills for CountVectorizer
all_skills = [' '.join(user[1]) for user in users] + [' '.join(project[1]) for project in projects]



In [19]:
all_skills

['Data structures C++ Programming UI/UX Algorithms',
 'C++ Programming Algorithms UI/UX Data structures',
 'Algorithms UI/UX Data structures C++ Programming',
 'UI/UX Data structures Algorithms C++ Programming',
 'Data structures C++ Programming Algorithms UI/UX',
 'Algorithms UI/UX Data structures C++ Programming',
 'C++ Programming UI/UX Algorithms Data structures',
 'UI/UX C++ Programming Data structures Algorithms',
 'Data structures Algorithms UI/UX C++ Programming',
 'Data structures UI/UX C++ Programming Algorithms',
 'UI/UX C++ Programming Algorithms Data structures',
 'C++ Programming Data structures UI/UX Algorithms',
 'C++ Programming UI/UX Algorithms Data structures',
 'Algorithms UI/UX C++ Programming Data structures',
 'Algorithms C++ Programming UI/UX Data structures',
 'Algorithms UI/UX Data structures C++ Programming',
 'C++ Programming Data structures UI/UX Algorithms',
 'UI/UX Data structures Algorithms C++ Programming',
 'Data structures C++ Programming UI/UX Algori

In [20]:
# Create CountVectorizer and fit-transform the skills
vectorizer = CountVectorizer()
skills_matrix = vectorizer.fit_transform(all_skills)

# Compute cosine similarity matrix
user_skills = skills_matrix[:len(users)]
project_skills = skills_matrix[len(users):]
cosine_sim_matrix = cosine_similarity(user_skills, project_skills)

# Step 4: Generate recommendations

for user_index, user in enumerate(users):
    email = user[0]

    # Get cosine similarity scores for the user
    user_sim_scores = cosine_sim_matrix[user_index]

    # Sort projects based on similarity scores
    sorted_indices = user_sim_scores.argsort()[::-1]

    # Get top 15 recommended projects
    num_projects = min(15, len(projects))
    recommended_projects = [projects[i][0] for i in sorted_indices[:num_projects]]

    print(f"Recommended projects for {email}: {recommended_projects}")

Recommended projects for umunoz@example.org: ['de71a8ce-b6fb-45c8-a846-9e92d0b91fdb', '2b5110c2-f84c-4edf-a042-166060adb64e', '53dffbd2-8555-4298-8cbe-707f86b235db', '948a03c1-f827-40c3-9a68-54eb6c7d3a52', '84a1baa9-fecd-4073-8708-14394c1e52f3', 'ea6d9f9f-92f3-4682-8182-ddd5555ae4a8', '9cf78974-cada-4a9d-ae4f-7f4961deeadd', 'b1a3b242-dfbf-4bb2-be59-0b8ca8e1d9c1', '9af6f0a8-e4ba-49f9-a6a2-195fb6d5d83e', 'fbc14302-f25c-4f74-a34d-f1f685db1dc6', 'd02f3545-b4c3-4cc1-94bb-7bb7cf3cb304', '6e9c8029-0425-477c-a035-f19893c905fa', 'c0001b14-263d-45dc-b912-a7b92a31ceb3', 'af692fe9-a275-4957-be4d-341a40392da7', '659584e4-0da9-47e4-91bc-d9d70d7267ae']
Recommended projects for scott17@example.org: ['de71a8ce-b6fb-45c8-a846-9e92d0b91fdb', '2b5110c2-f84c-4edf-a042-166060adb64e', '53dffbd2-8555-4298-8cbe-707f86b235db', '948a03c1-f827-40c3-9a68-54eb6c7d3a52', '84a1baa9-fecd-4073-8708-14394c1e52f3', 'ea6d9f9f-92f3-4682-8182-ddd5555ae4a8', '9cf78974-cada-4a9d-ae4f-7f4961deeadd', 'b1a3b242-dfbf-4bb2-be59-0b