## 🟩 Step 1 — Mount Google Drive & Setup Paths

We will first mount Google Drive and set up the project directory.
This ensures our notebook can access the datasets and folders inside
the Course Recommendation System structure.

In [1]:
# Step 1 → Mount Google Drive and setup project paths

from google.colab import drive
import os

# Mount drive
drive.mount('/content/drive')

# Define project path
project_path = "/content/drive/MyDrive/Projects/Machine_Learning/Course_Recomendatioan_System"

# Define data folder paths
data_path = os.path.join(project_path, "data", "processed")
file_path = os.path.join(data_path, "clean_courses.csv")

# Confirm everything
print("✅ Google Drive mounted successfully!")
print("📂 Project Path:", project_path)
print("📂 Data Folder:", data_path)
print("📄 Clean Dataset File:", file_path)

Mounted at /content/drive
✅ Google Drive mounted successfully!
📂 Project Path: /content/drive/MyDrive/Projects/Machine_Learning/Course_Recomendatioan_System
📂 Data Folder: /content/drive/MyDrive/Projects/Machine_Learning/Course_Recomendatioan_System/data/processed
📄 Clean Dataset File: /content/drive/MyDrive/Projects/Machine_Learning/Course_Recomendatioan_System/data/processed/clean_courses.csv


In [2]:
# Step 2 → Load the cleaned dataset

import pandas as pd

# Load dataset
df = pd.read_csv(file_path)

# Display basic info
print("✅ Dataset loaded successfully!")
print("📊 Shape:", df.shape)
print("📄 Columns:", df.columns.tolist())

# Show first few rows
df.head()

✅ Dataset loaded successfully!
📊 Shape: (3683, 20)
📄 Columns: ['course_id', 'title', 'url', 'is_paid', 'price', 'num_subscribers', 'num_reviews', 'num_lectures', 'level', 'content_duration', 'published_timestamp', 'subject', 'profit', 'published_date', 'published_time', 'year', 'month', 'day', 'combined_features', 'cluster']


Unnamed: 0,course_id,title,url,is_paid,price,num_subscribers,num_reviews,num_lectures,level,content_duration,published_timestamp,subject,profit,published_date,published_time,year,month,day,combined_features,cluster
0,1070968,ultimate investment banking course,https://www.udemy.com/ultimate-investment-bank...,True,200,2147,23,51,All Levels,1.5 hours,2017-01-18T20:58:58Z,business finance,429400,2017-01-18,20:58:58Z,2017,1,18,ultimate investment banking course business fi...,3
1,1113822,complete gst course certification grow your ca...,https://www.udemy.com/goods-and-services-tax/,True,75,2792,923,274,All Levels,39 hours,2017-03-09T16:34:20Z,business finance,209400,2017-03-09,16:34:20Z,2017,3,9,complete gst course certification grow your ca...,2
2,1006314,financial modeling for business analysts consu...,https://www.udemy.com/financial-modeling-for-b...,True,45,2174,74,51,Intermediate Level,2.5 hours,2016-12-19T19:26:30Z,business finance,97830,2016-12-19,19:26:30Z,2016,12,19,financial modeling for business analysts consu...,1
3,1210588,beginner to pro financial analysis in excel,https://www.udemy.com/complete-excel-finance-c...,True,95,2451,11,36,All Levels,3 hours,2017-05-30T20:07:24Z,business finance,232845,2017-05-30,20:07:24Z,2017,5,30,beginner to pro financial analysis in excel bu...,2
4,1011058,how to maximize your profits trading options,https://www.udemy.com/how-to-maximize-your-pro...,True,200,1276,45,26,Intermediate Level,2 hours,2016-12-13T14:57:18Z,business finance,255200,2016-12-13,14:57:18Z,2016,12,13,how to maximize your profits trading options b...,1


In [3]:
# Step 3: Importing required libraries
import pandas as pd
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import csr_matrix
import numpy as np
import matplotlib.pyplot as plt

print("✅ Libraries imported successfully!")

✅ Libraries imported successfully!


In [4]:
# Step 4: Create user-course interaction matrix

# For this dataset, let's simulate 'user_id' (since Udemy data is aggregated)
# We'll randomly assign user_ids for simulation
np.random.seed(42)
df['user_id'] = np.random.randint(1, 501, df.shape[0])  # 500 simulated users

# Pivot: users × courses with interaction as num_subscribers or reviews
interaction_matrix = df.pivot_table(
    index='user_id',
    columns='title',
    values='num_reviews',
    fill_value=0
)

print("✅ User-Course Matrix Created!")
interaction_matrix.head(5)

✅ User-Course Matrix Created!


title,a beginners guide to creating vector graphics with photoshop,a beginners guide to fingerpicking strumming guitar,a beginners guide to quantopian futures api,a beginners guide to technical analysis of stock charts,a beginners guide to the meteor javascript framework,a complete guide to becoming a trader on the forex market,a complete guide to successful freelance logo design career,a complete laravel basics from beginner to expert,a guide to analog synths for the modern edm enthusiast,a guide to trade binary options like an expert,...,you can play jazz saxophone,your business by the numbers,your first course on piano,your first guitar lessons learn how to play guitar,your first successful forex trades with case studies,your own site in min the complete wordpress course,your second course on piano two handed playing,zend framework learn the php framework zf from scratch,zoho books gestion financire dentreprise pas pas,zombie apocalypse photoshop actions
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [5]:
# Step 5: Convert interaction matrix to sparse format
interaction_sparse = csr_matrix(interaction_matrix.values)
print("✅ Sparse Matrix created with shape:", interaction_sparse.shape)

✅ Sparse Matrix created with shape: (500, 3575)


In [9]:
# Step 6 (Fixed): Train KNN model for course–course similarity

from sklearn.neighbors import NearestNeighbors

# Transpose the matrix so each row = course, each column = user
course_matrix = interaction_matrix.T

# Initialize model
knn_model = NearestNeighbors(metric="cosine", algorithm="brute")

# Fit on transposed matrix
knn_model.fit(course_matrix)

print("✅ KNN model trained on course–course similarity matrix!")
print(f"📊 Shape used for training: {course_matrix.shape}")

✅ KNN model trained on course–course similarity matrix!
📊 Shape used for training: (3575, 500)


In [10]:
# Step 7 : Test KNN recommendations for one course

# Convert matrix to course-based version for similarity between courses
course_matrix = interaction_matrix.T

# Randomly select a test course
test_course = np.random.choice(course_matrix.index)
print(f"🎓 Finding courses similar to: '{test_course}'")

# Find index of the selected course
course_idx = course_matrix.index.get_loc(test_course)

# Run KNN
distances, indices = knn_model.kneighbors(course_matrix.iloc[course_idx, :].values.reshape(1, -1), n_neighbors=6)

# Display top 5 similar courses
print("\n🧩 Top 5 similar courses:")
for i, idx in enumerate(indices.flatten()):
    if i == 0:
        continue
    print(f"{i}. {course_matrix.index[idx]}  (Similarity Score: {1 - distances.flatten()[i]:.4f})")


🎓 Finding courses similar to: 'forex trading comprehensive concise forex trading course'

🧩 Top 5 similar courses:
1. fun creative web engineering with python webpy  (Similarity Score: 1.0000)
2. forex trading comprehensive concise forex trading course  (Similarity Score: 1.0000)
3. canva making design simple  (Similarity Score: 1.0000)
4. your first course on piano  (Similarity Score: 0.0000)
5. your business by the numbers  (Similarity Score: 0.0000)
