In [16]:
!pip install langchain_openai langchain_chroma  pandas dotenv --quiet

In [79]:
import os
from dotenv import load_dotenv

load_dotenv()  # Load variables from .env

embedding_model_name = "text-embedding-ada-002"

In [8]:
import pandas as pd
df = pd.read_csv("./courses_dataset.csv")

In [10]:
from langchain_openai import AzureOpenAIEmbeddings
embeddings = AzureOpenAIEmbeddings(model=embedding_model_name,api_version="2024-12-01-preview")

In [None]:
from langchain_core.documents import Document
documents = [
    Document(page_content= row["title"] + ": " + row["description"], metadata={"course_id": str(row["course_id"]), "title": row["title"]})
    for _, row in df.iterrows()
]

In [17]:
from langchain_chroma import Chroma
vectorstore = Chroma.from_documents(documents=documents, embedding=embeddings)


In [73]:
def recommend_courses(profile: str, completed_ids: list[str]) -> list[tuple[str, str, float]]:
    filter_ = {"course_id": {"$nin": completed_ids}} if len(completed_ids) > 0 else None

    results = vectorstore.similarity_search_with_score(query=profile, k=5, filter=filter_)

    return [(doc.metadata["course_id"], doc.metadata["title"], score) for doc, score in results]

In [74]:
recommend_courses(
    "I’ve completed the ‘Python Programming for Data Science’ course and enjoy data visualization. What should I take next?",
    ['C014']
    )

[('C016', 'Python Programming for Data Science', 0.3308434784412384),
 ('C011', 'Big Data Analytics with Spark', 0.3907507658004761),
 ('C004', 'Computer Vision and Image Processing', 0.41815078258514404),
 ('C017', 'R Programming and Statistical Analysis', 0.42748209834098816),
 ('C012', 'SQL for Data Analysis', 0.4406697750091553)]

In [75]:
recommend_courses(
    "I know Azure basics and want to manage containers and build CI/CD pipelines. Recommend courses.",
    []
)

[('C007', 'Cloud Computing with Azure', 0.311148077249527),
 ('C009', 'Containerization with Docker and Kubernetes', 0.33455008268356323),
 ('C008', 'DevOps Practices and CI/CD', 0.3488312363624573),
 ('C010', 'APIs and Microservices Architecture', 0.402306467294693),
 ('C025', 'MLOps: Productionizing Machine Learning', 0.4050137400627136)]

In [76]:
recommend_courses(
    "My background is in ML fundamentals; I’d like to specialize in neural networks and production workflows.",
    []
)

[('C025', 'MLOps: Productionizing Machine Learning', 0.3238612115383148),
 ('C002', 'Deep Learning with TensorFlow and Keras', 0.3684665858745575),
 ('C004', 'Computer Vision and Image Processing', 0.378982812166214),
 ('C003', 'Natural Language Processing Fundamentals', 0.3809294104576111),
 ('C001', 'Foundations of Machine Learning', 0.38297075033187866)]

In [77]:
recommend_courses(
    "I want to learn to build and deploy microservices with Kubernetes—what courses fit best?",
    []
)

[('C009', 'Containerization with Docker and Kubernetes', 0.2381223887205124),
 ('C010', 'APIs and Microservices Architecture', 0.32079559564590454),
 ('C008', 'DevOps Practices and CI/CD', 0.3699899911880493),
 ('C007', 'Cloud Computing with Azure', 0.3801954388618469),
 ('C025', 'MLOps: Productionizing Machine Learning', 0.38485318422317505)]

In [78]:
recommend_courses(
    "I’m interested in blockchain and smart contracts but have no prior experience. Which courses do you suggest?",
    []
)

[('C023', 'Blockchain Technology and Smart Contracts', 0.28837913274765015),
 ('C010', 'APIs and Microservices Architecture', 0.48249563574790955),
 ('C022', 'Internet of Things (IoT) Development', 0.48679864406585693),
 ('C013', 'NoSQL Databases and MongoDB', 0.4970172643661499),
 ('C009', 'Containerization with Docker and Kubernetes', 0.5001373887062073)]