## Generate course recommendations based on user profile and course genre vectors

In [None]:
import pandas as pd
import numpy as np

rs = 123

course_genre_url = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBM-ML321EN-SkillsNetwork/labs/datasets/course_genre.csv"
course_genres_df = pd.read_csv(course_genre_url)

profile_genre_url = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBM-ML321EN-SkillsNetwork/labs/datasets/user_profile.csv"
profile_df = pd.read_csv(profile_genre_url)

test_users_url = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBM-ML321EN-SkillsNetwork/labs/datasets/rs_content_test.csv"
test_users_df = pd.read_csv(test_users_url)

test_users = test_users_df.groupby(['user']).max().reset_index(drop=False)
test_user_ids = test_users['user'].to_list()
print(f"Total numbers of test users {len(test_user_ids)}")

test_user_profiles = profile_df[profile_df['user'].isin(test_user_ids)]
test_user_profiles_list = test_user_profiles.values.tolist()

score_threshold = 0.35

total_recommendations = 0
total_users = len(test_user_ids)

# Calculate recommendations for each user
for user_id in test_user_ids:
    user_profile = test_user_profiles[test_user_profiles['user'] == user_id].values[0][1:]
    enrolled_courses = test_users_df[test_users_df['user'] == user_id]['item'].to_list()
    enrolled_courses = set(enrolled_courses)

    all_courses = set(course_genres_df['COURSE_ID'].values)

    unknown_courses = all_courses.difference(enrolled_courses)

    recommendations = []

    # Calculate recommendation scores for unknown courses
    for unknown_course in unknown_courses:
        if unknown_course not in course_genres_df['COURSE_ID'].values:
            continue

        course_vector = course_genres_df[course_genres_df['COURSE_ID'] == unknown_course].values[0][1:]

        if len(course_vector) != len(user_profile):
            continue

        score = np.dot(user_profile, course_vector)

        if score > score_threshold:
            recommendations.append([unknown_course, score])

    total_recommendations += len(recommendations)

    # Check if there are any recommendations
    if recommendations:
        # Create a DataFrame from the recommendations list
        recommendations_df = pd.DataFrame(recommendations, columns=['COURSE_ID', 'Recommendation_Score'])
        print(f"Recommendations for User {user_id}:\n{recommendations_df}\n")
    else:
        print(f"No recommendations found for User {user_id}\n")

average_recommendations = total_recommendations / total_users
print(f"Average number of courses recommended per user: {average_recommendations:.2f}")
