In [None]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MultiLabelBinarizer

# 1. Load the data
transformed_courses = pd.read_csv("C:/Users/AnanyaSarkar/Documents/project/datascienceandengg/staging/transformed/transformed_courses.csv")
transformed_users = pd.read_csv("C:/Users/AnanyaSarkar/Documents/project/datascienceandengg/staging/transformed/transformed_users.csv")
transformed_progress = pd.read_csv("C:/Users/AnanyaSarkar/Documents/project/datascienceandengg/staging/transformed/transformed_progress.csv")
user_skills = pd.read_csv("C:/Users/AnanyaSarkar/Documents/project/datascienceandengg/staging/raw/user_skills.csv")
designations_skills = pd.read_csv("C:/Users/AnanyaSarkar/Documents/project/datascienceandengg/staging/raw/designations_skill.csv")
course_skills = pd.read_csv("C:/Users/AnanyaSarkar/Documents/project/datascienceandengg/staging/raw/course_skills.csv")

# 2. Prepare the dataset
# Merge user skills with designation skills
designation_skills_merged = designations_skills.merge(transformed_users[['designation_id']], on='designation_id')

# Create a mapping of userId to their skills
user_skills_grouped = user_skills.groupby('userId')['skillId'].apply(list).reset_index()

# Create a mapping of courseId to its skills
course_skills_grouped = course_skills.groupby('courseId')['skillId'].apply(list).reset_index()

# 3. Feature Engineering
mlb_user_skills = MultiLabelBinarizer()
user_skill_matrix = mlb_user_skills.fit_transform(user_skills_grouped['skillId'])
user_skill_df = pd.DataFrame(user_skill_matrix, columns=mlb_user_skills.classes_, index=user_skills_grouped['userId'])

mlb_course_skills = MultiLabelBinarizer()
course_skill_matrix = mlb_course_skills.fit_transform(course_skills_grouped['skillId'])
course_skill_df = pd.DataFrame(course_skill_matrix, columns=mlb_course_skills.classes_, index=course_skills_grouped['courseId'])

# 4. Calculate similarity
course_similarity = cosine_similarity(course_skill_df)
course_similarity_df = pd.DataFrame(course_similarity, index=course_skill_df.index, columns=course_skill_df.index)

In [66]:
def recommend_courses(user_id, top_n=5):
    # Get user's skills
    if user_id not in user_skill_df.index:
        return f"User ID {user_id} not found."

    # Extract user's skills as a DataFrame and convert to a vector
    user_skills_vector = user_skill_df.loc[user_id].values.reshape(1, -1)

    # Get common skills
    common_skills = user_skill_df.columns.intersection(course_skill_df.columns)

    # Align user skills vector with common skills
    user_skills_vector = pd.DataFrame(user_skills_vector, columns=user_skill_df.columns)[common_skills].values

    # Debugging print statements
    print("User skills vector shape:", user_skills_vector.shape)  # Check shape of user skills vector
    print("Course skills DataFrame shape:", course_skill_df[common_skills].shape)  # Check shape of course skills DataFrame

    # Calculate similarity score with each course
    scores = cosine_similarity(user_skills_vector, course_skill_df[common_skills])

    # Create a DataFrame for scores
    scores_df = pd.DataFrame(scores.flatten(), index=course_skill_df.index, columns=['score']).sort_values(by='score', ascending=False)

    # Get top N recommended courses
    recommended_courses = scores_df.head(top_n).index.tolist()

    # Return course titles
    return transformed_courses[transformed_courses['courseId'].isin(recommended_courses)][['courseId', 'title']]

# Example usage
user_id_example = 145  # Replace with actual user ID
recommended_courses = recommend_courses(user_id_example)
print(recommended_courses)


User skills vector shape: (1, 31)
Course skills DataFrame shape: (30, 31)
    courseId                                     title
2          3                   SQL for Data Management
11        12  Next.js: Server-Side Rendering Made Easy
19        20                  Azure Cloud Fundamentals
20        21           Power BI for Data Visualization
27        28             Effective Communication in HR


: 

In [65]:
#Recommended Courses for User 1: [np.int64(17), np.int64(16), np.int64(25), np.int64(8), np.int64(12)]
#Recommended Courses for User 3: [np.int64(25), np.int64(17), np.int64(15), np.int64(18), np.int64(12)]
#Recommended Courses for User 35: [np.int64(6), np.int64(1), np.int64(17), np.int64(10), np.int64(8)]
#Recommended Courses for User 201: [np.int64(12), np.int64(8), np.int64(5), np.int64(11), np.int64(14)]