In [4]:
#Imports

import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

In [5]:
#Loading the Data
users_df = pd.read_csv ("users.csv")

projects_df = pd.read_csv ("projects.csv")

In [6]:
users_df.head(5)

Unnamed: 0,first_name,last_name,email,interests
0,Bianca,Hill,bianca.hill@example.com,"['UI/UX Design', 'Android Mobile App Programmi..."
1,Mindy,Williams,mindy.williams@example.com,"['Machine Learning', 'Data Analysis', 'Web Dev..."
2,Jacob,Stanley,jacob.stanley@example.com,"['Java Programming', 'UI/UX Design', 'Data Ana..."
3,Alicia,Willis,alicia.willis@example.com,"['C++ Programming', 'Data Analysis', 'Java Pro..."
4,Cassandra,Stephens,cassandra.stephens@example.com,"['Android Mobile App Programming', 'Machine Le..."


In [7]:
#Preprocessing the data we are interested in
user_interests = users_df['interests'].apply(eval).tolist()
project_skills = projects_df['project_skills'].apply(eval).tolist()

In [8]:
user_interests[0:10]

[['UI/UX Design', 'Android Mobile App Programming', 'C++ Programming'],
 ['Machine Learning',
  'Data Analysis',
  'Web Development',
  'C++ Programming',
  'UI/UX Design'],
 ['Java Programming',
  'UI/UX Design',
  'Data Analysis',
  'C++ Programming',
  'Android Mobile App Programming'],
 ['C++ Programming',
  'Data Analysis',
  'Java Programming',
  'UI/UX Design',
  'Machine Learning',
  'Android Mobile App Programming',
  'Web Development'],
 ['Android Mobile App Programming',
  'Machine Learning',
  'Web Development',
  'Data Analysis',
  'UI/UX Design',
  'C++ Programming'],
 ['Web Development',
  'Machine Learning',
  'Java Programming',
  'Data Analysis',
  'Android Mobile App Programming',
  'C++ Programming'],
 ['Machine Learning',
  'Android Mobile App Programming',
  'Java Programming',
  'UI/UX Design',
  'Data Analysis'],
 ['Web Development',
  'Machine Learning',
  'Java Programming',
  'Android Mobile App Programming',
  'C++ Programming',
  'Data Analysis'],
 ['UI/UX 

In [9]:
#Create a list of unique interests and skills
all_interests = list(set([interest for interests in user_interests for interest in interests]))
all_skills = list(set([skill for skills in project_skills for skill in skills]))

In [10]:
all_interests

['UI/UX Design',
 'Web Development',
 'Data Analysis',
 'Java Programming',
 'Machine Learning',
 'Android Mobile App Programming',
 'C++ Programming']

In [11]:
#Create numerical vectors for user interests and project skills required
user_vectors = np.zeros((len(user_interests), len(all_interests)))
for i, interests in enumerate(user_interests):
    for interest in interests:
        user_vectors[i, all_interests.index(interest)] = 1

In [12]:
user_vectors

array([[1., 0., 0., ..., 0., 1., 1.],
       [1., 1., 1., ..., 1., 0., 1.],
       [1., 0., 1., ..., 0., 1., 1.],
       ...,
       [1., 1., 0., ..., 0., 1., 1.],
       [1., 1., 1., ..., 1., 0., 1.],
       [1., 1., 0., ..., 0., 1., 0.]])

In [13]:
user_vectors.size

7000

In [14]:
project_vectors = np.zeros((len(project_skills), len(all_skills)))
for i, skills in enumerate(project_skills):
    for skill in skills:
        project_vectors[i, all_skills.index(skill)] = 1

In [15]:
project_vectors

array([[1., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 1., 0.],
       ...,
       [0., 0., 0., ..., 1., 1., 0.],
       [0., 1., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 1., 0.]])

In [16]:
#Calculate cosine similarity
similarity_matrix = cosine_similarity(user_vectors, project_vectors)

In [17]:
# Step 4: Sort projects based on cosine similarity scores
sorted_indices = np.argsort(similarity_matrix, axis=1)[:, ::-1]  # Descending order


In [18]:
# Step 5: Return top 10 project recommendations for each user
num_recommendations = 10
rec_df = pd.DataFrame(columns=['email', 'recommended_projects'])

for i, user in enumerate(users_df['email']):
    user_recommendations = []
    for j in range(num_recommendations):
        project_index = sorted_indices[i, j]
        project_id = projects_df['project_id'].iloc[project_index]
        user_recommendations.append(project_id)

        #print("Recommendations for user", user, ":", user_recommendations)
    rec_df.loc[i] = [user, user_recommendations]
        
rec_df.to_csv('rec_cosSim.csv', index=False)