In [1]:
!python -m pip install pandas openai



In [2]:
import pandas as pd

# Load the CSV
df = pd.read_csv("dataset.csv")
print(df.head())

  course_id                                     title  \
0      C001           Foundations of Machine Learning   
1      C002   Deep Learning with TensorFlow and Keras   
2      C003  Natural Language Processing Fundamentals   
3      C004      Computer Vision and Image Processing   
4      C005             Reinforcement Learning Basics   

                                         description  
0  Understand foundational machine learning algor...  
1  Explore neural network architectures using Ten...  
2  Dive into NLP techniques for processing and un...  
3  Learn the principles of computer vision and im...  
4  Get introduced to reinforcement learning parad...  


In [None]:
from langchain_openai import AzureOpenAIEmbeddings
import os

# Create embeddings for each description
embeddings = AzureOpenAIEmbeddings(
    model="text-embedding-3-small",
)



In [6]:
import chromadb
chroma_client = chromadb.Client()

In [7]:
collection = chroma_client.create_collection(name="course_recommendation")

In [None]:
df['course_id'] = df.index.astype(str)

df['text'] = df['title'] + ". " + df['description']


In [None]:
course_ids = df['course_id'].tolist()
course_texts = df['text'].tolist()

course_embeddings = [embeddings.embed_query(text) for text in course_texts]

collection.add(
    ids=course_ids,
    documents=course_texts,
    embeddings=course_embeddings
)


In [None]:
from typing import List, Tuple

def recommend_courses(profile: str, completed_ids: List[str]) -> List[Tuple[str, float]]:
    """
    Returns top-5 course recommendations based on user profile and completed courses
    """
    profile_embedding = embeddings.embed_query(profile)
    
    results = collection.query(
        query_embeddings=[profile_embedding],
        n_results=10 
    )
    
    recommended = []
    for course_id, score in zip(results['ids'][0], results['distances'][0]):
        if course_id not in completed_ids:
            recommended.append((course_id, score))
        if len(recommended) >= 5:
            break
            
    return recommended


In [None]:
profile_text = "I’ve completed the ‘Python Programming for Data Science’ course and enjoy data visualization."
completed = ['0'] 
recommendations = recommend_courses(profile_text, completed)
print("Top-5 recommendations:", recommendations)

for course_id, score in recommendations:
    title = df[df['course_id'] == course_id]['title'].values[0]
    print(f"{title} - Similarity: {score:.4f}")


Top-5 recommendations: [('15', 0.9122487306594849), ('13', 1.105047583580017), ('10', 1.152431607246399), ('16', 1.171453833580017), ('3', 1.2131596803665161)]
Python Programming for Data Science - Similarity: 0.9122
Data Visualization with Tableau - Similarity: 1.1050
Big Data Analytics with Spark - Similarity: 1.1524
R Programming and Statistical Analysis - Similarity: 1.1715
Computer Vision and Image Processing - Similarity: 1.2132
