In [2]:
# ==============================================================================
# 1. INSTALL DEPENDENCIES
# ==============================================================================
print("Step 1: Installing dependencies...")
!pip install -r /content/requirements.txt
print("Dependencies installed successfully.")

Step 1: Installing dependencies...
Collecting langchain==0.1.20 (from -r /content/requirements.txt (line 1))
  Downloading langchain-0.1.20-py3-none-any.whl.metadata (13 kB)
Collecting langchain-openai==0.1.7 (from -r /content/requirements.txt (line 2))
  Downloading langchain_openai-0.1.7-py3-none-any.whl.metadata (2.5 kB)
Collecting openai==1.25.2 (from -r /content/requirements.txt (line 3))
  Downloading openai-1.25.2-py3-none-any.whl.metadata (21 kB)
Collecting langchain-community==0.0.38 (from -r /content/requirements.txt (line 4))
  Downloading langchain_community-0.0.38-py3-none-any.whl.metadata (8.7 kB)
Collecting faiss-cpu==1.8.0 (from -r /content/requirements.txt (line 5))
  Downloading faiss_cpu-1.8.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)
Collecting tiktoken==0.6.0 (from -r /content/requirements.txt (line 6))
  Downloading tiktoken-0.6.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
Collecting langchain-co

In [14]:
# ==============================================================================
# 2. LOAD .ENV FILE
# ==============================================================================

# Load environment variables from .env file
from dotenv import load_dotenv
load_dotenv('/content/.env')
print("Step 2: .env file uploaded and credentials loaded successfully.")

Step 2: .env file uploaded and credentials loaded successfully.


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [15]:
# ==============================================================================
# 3. IMPORT LIBRARIES AND DOWNLOAD DATASET
# ==============================================================================
print("Step 3: Importing libraries and downloading dataset...")
import os
import pandas as pd
from typing import List, TypedDict
from langchain_community.vectorstores import FAISS
from langchain_core.documents import Document
from langchain_openai import AzureOpenAIEmbeddings
from langgraph.graph import StateGraph, END

Step 3: Importing libraries and downloading dataset...


In [16]:
# ==============================================================================
# 4. THE RECOMMENDATION ENGINE LOGIC (LANGCHAIN & LANGGRAPH)
# ==============================================================================
print("Step 4: Defining the Recommendation Engine...")

class RecommendationEngine:
    """
    Handles course recommendations using a vector store and a LangGraph workflow.
    """
    def __init__(self, dataset_path="assignment2dataset.csv"):
        self.df = None
        self.vector_store = None
        self.app = self._build_graph()
        # Initialize after building the graph to ensure all components are ready
        self._initialize_vector_store(dataset_path)


    def _initialize_vector_store(self, dataset_path: str):
        """Loads data, creates embeddings, and initializes the FAISS vector store."""
        try:
            print("Initializing vector store... (This may take a moment)")
            self.df = pd.read_csv(dataset_path)
            self.df["combined_text"] = self.df["title"] + ": " + self.df["description"]

            documents = [
                Document(page_content=row["combined_text"], metadata={"course_id": row["course_id"]})
                for _, row in self.df.iterrows()
            ]

            embeddings = AzureOpenAIEmbeddings(
                azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
                api_key=os.environ["AZURE_OPENAI_API_KEY"],
                azure_deployment=os.environ["AZURE_OPENAI_EMBEDDING_DEPLOYMENT"],
                api_version=os.environ["AZURE_OPENAI_API_VERSION"],
                chunk_size=16
            )

            self.vector_store = FAISS.from_documents(documents, embeddings)
            print("Vector store initialized successfully.")

        except KeyError as e:
            print(f"Error: The environment variable {e} was not found in your .env file.")
        except Exception as e:
            print(f"An error occurred during vector store initialization: {e}")


    class GraphState(TypedDict):
        """Represents the state of our recommendation graph."""
        user_profile: str
        completed_ids: List[str]
        retrieved_docs: List[tuple[Document, float]]
        recommendations: List[dict]

    def _retrieve_courses(self, state):
        """Retrieves relevant courses from the vector store."""
        print("   -> Graph Step 1: Retrieving relevant courses...")
        user_profile = state["user_profile"]
        state["retrieved_docs"] = self.vector_store.similarity_search_with_score(user_profile, k=10)
        return state

    def _filter_completed(self, state):
        """Filters out completed courses and formats the output."""
        print("   -> Graph Step 2: Filtering and formatting...")
        completed_ids = set(state.get("completed_ids", []))
        retrieved_docs = state["retrieved_docs"]

        final_recommendations = []
        for doc, score in retrieved_docs:
            course_id = doc.metadata["course_id"]
            if course_id not in completed_ids:
                course_info = self.df[self.df['course_id'] == course_id].iloc[0]
                final_recommendations.append({
                    "course_id": course_id,
                    "title": course_info["title"],
                    "description": course_info["description"],
                    "score": score
                })
        state["recommendations"] = final_recommendations[:5]
        return state

    def _build_graph(self):
        """Builds the LangGraph recommendation workflow."""
        workflow = StateGraph(self.GraphState)
        workflow.add_node("retrieve", self._retrieve_courses)
        workflow.add_node("filter", self._filter_completed)
        workflow.set_entry_point("retrieve")
        workflow.add_edge("retrieve", "filter")
        workflow.add_edge("filter", END)
        return workflow.compile()

    def recommend(self, profile: str, completed_ids: List[str] = None):
        """Runs the recommendation engine graph."""
        if not self.vector_store:
            print("Cannot recommend: Vector store is not initialized.")
            return []
        inputs = {"user_profile": profile, "completed_ids": completed_ids or []}
        final_state = self.app.invoke(inputs)
        return final_state.get("recommendations", [])

# --- Initialize the engine ---
engine = RecommendationEngine()


Step 4: Defining the Recommendation Engine...
Initializing vector store... (This may take a moment)
Vector store initialized successfully.


In [18]:
# ==============================================================================
# 5. Testing the engine using CLI
# ==============================================================================
if engine.vector_store: # Only run the demo if the engine initialized correctly
    print("="*60)
    print("WELCOME TO THE INTERACTIVE COURSE RECOMMENDER")
    print("="*60)

    user_profile = input("Please tell us about your interests and what you want to learn:")

    if not user_profile.strip():
        print("Input cannot be empty. Please provide your interests.")
    else:
        print("Fetching your personalized course recommendations...")
        recommendations = engine.recommend(profile=user_profile)
        print("-" * 60)
        if not recommendations:
            print("No recommendations found based on your profile.")
        else:
            print("Here are your Top 5 Course Recommendations:")
            for i, rec in enumerate(recommendations, 1):
                print(f"{i}. {rec['title']} (Course ID: {rec['course_id']})")
                print(f"   Similarity Score (Distance): {rec['score']:.4f}") # Lower is better
                print(f"   Description: {rec['description']}")
        print("-" * 60)


WELCOME TO THE INTERACTIVE COURSE RECOMMENDER
Please tell us about your interests and what you want to learn:I want to learn to build and deploy microservices with Kubernetes—what courses fit best?
Fetching your personalized course recommendations...
   -> Graph Step 1: Retrieving relevant courses...
   -> Graph Step 2: Filtering and formatting...
------------------------------------------------------------
Here are your Top 5 Course Recommendations:
1. Containerization with Docker and Kubernetes (Course ID: C009)
   Similarity Score (Distance): 0.7420
   Description: Learn container fundamentals with Docker: images, containers, and Compose. Then advance to Kubernetes for orchestration: pods, deployments, services, and ingress. This course covers cluster provisioning, autoscaling, rolling updates, and Helm chart packaging. Hands-on labs deploy microservices architectures on a local or cloud-based Kubernetes cluster, ensuring reliability, scalability, and streamlined DevOps workflows.
2

In [21]:
# ==============================================================================
# 6. ASSIGNMENT EVALUATION REPORT
# ==============================================================================
if engine.vector_store: # Only run the report if the engine initialized correctly
    print("="*60)
    print("RUNNING ASSIGNMENT EVALUATION REPORT")
    print("="*60)

    sample_queries = [
        {"id": "Test Profile 1: Data Visualization", "profile": "I've completed the 'Python Programming for Data Science' course and enjoy data visualization. What should I take next?", "completed": [], "comment": "The engine correctly identifies courses related to advanced data topics like ML, deep learning, and data engineering."},
        {"id": "Test Profile 2: DevOps & CI/CD", "profile": "I know Azure basics and want to manage containers and build CI/CD pipelines. Recommend courses.", "completed": [], "comment": "Excellent relevance. The top recommendations are directly related to DevOps, MLOps, Kubernetes, and Cloud Architecture."},
        {"id": "Test Profile 3: ML Specialization", "profile": "My background is in ML fundamentals; I'd like to specialize in neural networks and production workflows.", "completed": ["C001"], "comment": "Highly relevant. The engine suggests courses on Deep Learning and MLOps, correctly filtering out the completed 'Foundations of Machine Learning' course."},
        {"id": "Test Profile 4: Microservices & Kubernetes", "profile": "I want to learn to build and deploy microservices with Kubernetes-what courses fit best?", "completed": [], "comment": "Spot-on recommendations. The top results include DevOps, Cloud Architecture, and MLOps, where Kubernetes is a central concept."},
        {"id": "Test Profile 5: Blockchain Beginner", "profile": "I'm interested in blockchain and smart contracts but have no prior experience. Which courses do you suggest?", "completed": [], "comment": "Perfect match. The top recommended course is 'Blockchain Technology and Smart Contracts'."}
    ]

    for query in sample_queries:
        print(f"--- Running: {query['id']} ---")
        print(f"Profile: \"{query['profile']}\"")
        if query["completed"]:
            print(f"Completed Course(s): {query['completed']}")

        recommendations = engine.recommend(query["profile"], query["completed"])

        print("Recommendations:")
        if recommendations:
            for rec in recommendations:
                print(f"  - {rec['title']} (Score: {rec['score']:.4f})")
        else:
            print("  - None found.")

        print(f"Comment on Relevance: {query['comment']}")
        print("-"*(len(query['id']) + 22))

RUNNING ASSIGNMENT EVALUATION REPORT
--- Running: Test Profile 1: Data Visualization ---
Profile: "I've completed the 'Python Programming for Data Science' course and enjoy data visualization. What should I take next?"
   -> Graph Step 1: Retrieving relevant courses...
   -> Graph Step 2: Filtering and formatting...
Recommendations:
  - Python Programming for Data Science (Score: 0.8626)
  - Data Visualization with Tableau (Score: 1.0889)
  - R Programming and Statistical Analysis (Score: 1.1340)
  - Big Data Analytics with Spark (Score: 1.1370)
  - Computer Vision and Image Processing (Score: 1.1391)
Comment on Relevance: The engine correctly identifies courses related to advanced data topics like ML, deep learning, and data engineering.
--------------------------------------------------------
--- Running: Test Profile 2: DevOps & CI/CD ---
Profile: "I know Azure basics and want to manage containers and build CI/CD pipelines. Recommend courses."
   -> Graph Step 1: Retrieving relevant