From 5ba4f9edcaa2933af288799a0baf93208d367765 Mon Sep 17 00:00:00 2001
From: Angela <angelashu103@gmail.com>
Date: Fri, 24 Oct 2025 15:16:55 -0400
Subject: [PATCH 1/8] add new methods

---
 server/python/src/models/models.py  |  14 +-
 server/python/src/routers/movies.py | 431 +++++++++++++++++++++++++++-
 2 files changed, 441 insertions(+), 4 deletions(-)

diff --git a/server/python/src/models/models.py b/server/python/src/models/models.py
index 9e66a82..e027116 100644
--- a/server/python/src/models/models.py
+++ b/server/python/src/models/models.py
@@ -92,9 +92,18 @@ class CreateMovieRequest(BaseModel):
     rated: Optional[str]  = None
     runtime: Optional[int]  = None
     poster: Optional[str]  = None    
+    
+class Comment(BaseModel):
+    id: Optional[str] = Field(alias="_id")
+    name: str
+    email: str
+    movie_id: str
+    text: str
+    date: datetime
 
-
-
+    model_config = {
+        "populate_by_name": True
+    }
 class SuccessResponse(BaseModel, Generic[T]):
     success: bool = True
     message: Optional[str]
@@ -113,4 +122,3 @@ class ErrorResponse(BaseModel):
     message: str
     error: ErrorDetails
     timestamp: str
-    
\ No newline at end of file
diff --git a/server/python/src/routers/movies.py b/server/python/src/routers/movies.py
index efdf6dd..fab2e73 100644
--- a/server/python/src/routers/movies.py
+++ b/server/python/src/routers/movies.py
@@ -1,10 +1,12 @@
-from fastapi import APIRouter, Query
+from fastapi import APIRouter, Query, Body, HTTPException
 from src.database.mongo_client import db, get_collection
 from src.models.models import CreateMovieRequest, Movie, SuccessResponse
 from typing import List
 from datetime import datetime
 from src.utils.errorHandler import create_success_response, create_error_response
 import re
+from bson import ObjectId
+from bson.errors import InvalidId
 
 '''
 This file contains all the business logic for movie operations.
@@ -22,7 +24,26 @@
 # Place get_movie_by_id endpoint here
 #------------------------------------
 
+"""
+    GET /api/movies/{id}
+    Retrieve a single movie by its ID.
+    Path Parameters:
+        id (str): The ObjectId of the movie to retrieve.
+    Returns:
+        SuccessResponse[Movie]: A response object containing the movie data.
+"""
+
+@router.get("/{id}", response_model=SuccessResponse[Movie])
+async def get_movie_by_id(id: str):
+    # Validate ObjectId format
+    object_id = ObjectId(id)
+
+    # Use findOne() to get a single document by _id
+    movie = await db.movies.find_one({"_id": object_id})
 
+    movie["_id"] = str(movie["_id"]) # Convert ObjectId to string
+    
+    return create_success_response(movie, "Movie retrieved successfully")
 
 """
     GET /api/movies/
@@ -101,6 +122,33 @@ async def get_all_movies(
 # Place create_movie endpoint here
 #------------------------------------
 
+"""
+    POST /api/movies/
+    Create a new movie.
+    Request Body:
+        title (str): The title of the movie.
+        genre (str): The genre of the movie.
+        year (int): The year the movie was released.
+        min_rating (float): The minimum IMDB rating.
+        max_rating (float): The maximum IMDB rating.
+    Returns:
+        SuccessResponse[Movie]: A response object containing the created movie data.
+"""
+
+@router.post("/", response_model=SuccessResponse[CreateMovieRequest], status_code=201)
+async def create_movie(movie: CreateMovieRequest):
+    # Pydantic will automatically validate the structure
+    movie_data = movie.model_dump(by_alias=True, exclude_none=True)
+    
+    result = await db.movies.insert_one(movie_data)
+
+    # Retrieve the created document to return complete data
+    created_movie = await db.movies.find_one({"_id": result.inserted_id})
+
+    created_movie["_id"] = str(created_movie["_id"]) # Convert ObjectId to string
+    
+    return create_success_response(created_movie, f"Movie '{movie_data['title']}' created successfully")
+
 #------------------------------------
 # Place create_movies_batch endpoint here
 #------------------------------------
@@ -159,6 +207,31 @@ async def create_movies_batch(movies: List[CreateMovieRequest]):
 # Place delete_movie endpoint here
 #------------------------------------
 
+
+"""
+    DELETE /api/movies/{id}
+    Delete a single movie by its ID.
+    Path Parameters:
+        id (str): The ObjectId of the movie to delete.
+    Returns:
+        SuccessResponse[dict]: A response object containing deletion details.
+"""
+
+@router.delete("/{id}", response_model=SuccessResponse[dict])
+async def delete_movie_by_id(id: str):
+    object_id = ObjectId(id)
+
+    # Use deleteOne() to remove a single document
+    result = await db.movies.delete_one({"_id": object_id})
+
+    if result.deleted_count == 0:
+        raise HTTPException(status_code=404, detail="Movie not found")
+    
+    return create_success_response(
+        {"deletedCount": result.deleted_count}, 
+        "Movie deleted successfully"
+    )
+
 #------------------------------------
 # Place delete_movies_by_batch endpoint here
 #------------------------------------
@@ -167,6 +240,362 @@ async def create_movies_batch(movies: List[CreateMovieRequest]):
 # Place find_and_delete_movie endpoint here
 #------------------------------------
 
+"""
+    DELETE /api/movies/{id}/find-and-delete
+    Finds and deletes a movie in a single atomic operation.
+    Demonstrates the findOneAndDelete() operation.
+    Path Parameters:
+        id (str): The ObjectId of the movie to find and delete.
+    Returns:
+        SuccessResponse[Movie]: A response object containing the deleted movie data.
+"""
+
+@router.delete("/{id}/find-and-delete", response_model=SuccessResponse[Movie])
+async def find_and_delete_movie(id: str):
+    object_id = ObjectId(id)
+
+    # Use find_one_and_delete() to find and delete in a single atomic operation
+    # This is useful when you need to return the deleted document
+    # or ensure the document exists before deletion
+    deleted_movie = await db.movies.find_one_and_delete({"_id": object_id})
+
+    if deleted_movie is None:
+        raise HTTPException(status_code=404, detail="Movie not found")
+
+    deleted_movie["_id"] = str(deleted_movie["_id"]) # Convert ObjectId to string
+    
+    return create_success_response(deleted_movie, "Movie found and deleted successfully")
+
+async def execute_aggregation(pipeline: list) -> list:
+    """Helper function to execute aggregation pipeline and return results"""
+    print(f"Executing pipeline: {pipeline}")  # Debug logging
+    print(f"Database name: {db.name if hasattr(db, 'name') else 'unknown'}")
+    print(f"Collection name: movies")
+    
+    # For motor (async MongoDB driver), we need to await the aggregate call
+    cursor = await db.movies.aggregate(pipeline)
+    results = await cursor.to_list(length=None)  # Convert cursor to list
+    
+    print(f"Aggregation returned {len(results)} results")  # Debug logging
+    if len(results) <= 3:  # Log first few results for debugging
+        for i, doc in enumerate(results):
+            print(f"Result {i+1}: {doc}")
+    
+    return results
+
+
+"""
+    GET /api/movies/aggregate/by-genre
+    Aggregate movies by genre with statistics using MongoDB aggregation pipeline.
+    Demonstrates grouping values from multiple documents and performing operations on grouped data.
+    Returns:
+        SuccessResponse[List[dict]]: A response object containing aggregated genre statistics.
+"""
+
+@router.get("/aggregate/by-genre", response_model=SuccessResponse[List[dict]])
+async def aggregate_movies_by_genre():
+    # Define an aggregation pipeline with match, unwind, group, and sort stages
+    pipeline = [
+        # Clean data: ensure year is an integer
+        {
+            "$match": {
+                "year": {"$type": "number", "$gte": 1800, "$lte": 2030}
+            }
+        },
+        {"$unwind": "$genres"},
+        {
+            "$group": {
+                "_id": "$genres",
+                "count": {"$sum": 1},
+                "avgRating": {"$avg": "$imdb.rating"},
+                "minYear": {"$min": "$year"},
+                "maxYear": {"$max": "$year"},
+                "totalVotes": {"$sum": "$imdb.votes"}
+            }
+        },
+        {"$sort": {"count": -1}},
+        {
+            "$project": {
+                "genre": "$_id",
+                "movieCount": "$count",
+                "averageRating": {"$round": ["$avgRating", 2]},
+                "yearRange": {
+                    "min": "$minYear",
+                    "max": "$maxYear"
+                },
+                "totalVotes": "$totalVotes",
+                "_id": 0
+            }
+        }
+    ]
+
+    # Execute the aggregation
+    results = await execute_aggregation(pipeline)
+    
+    return create_success_response(
+        results, 
+        f"Aggregated statistics for {len(results)} genres"
+    )
+
+
+"""
+    GET /api/movies/aggregate/recent-commented
+    Aggregate movies with their most recent comments using MongoDB $lookup aggregation.
+    Joins movies with comments collection to show recent comment activity.
+    Query Parameters:
+        limit (int, optional): Number of results to return (default: 10, max: 50).
+        movie_id (str, optional): Filter by specific movie ObjectId.
+    Returns:
+        SuccessResponse[List[dict]]: A response object containing movies with their most recent comments.
+"""
+
+@router.get("/aggregate/recent-commented", response_model=SuccessResponse[List[dict]])
+async def aggregate_movies_recent_commented(
+    limit: int = Query(default=10, ge=1, le=50),
+    movie_id: str = Query(default=None)
+):
+    # Define aggregation pipeline to join movies with their most recent comments
+    pipeline = [
+        {
+            "$match": {
+                "year": {"$type": "number", "$gte": 1800, "$lte": 2030}
+            }
+        }
+    ]
+    
+    # Add movie_id filter if provided
+    if movie_id:
+        try:
+            object_id = ObjectId(movie_id)
+            pipeline[0]["$match"]["_id"] = object_id
+        except Exception:
+            raise HTTPException(status_code=400, detail="Invalid movie_id format")
+    
+    # Add lookup and additional pipeline stages
+    pipeline.extend([
+        {
+            "$lookup": {
+                "from": "comments",
+                "localField": "_id",
+                "foreignField": "movie_id",
+                "as": "comments"
+            }
+        },
+        {
+            "$match": {
+                "comments": {"$ne": []}
+            }
+        },
+        {
+            "$addFields": {
+                "recentComments": {
+                    "$slice": [
+                        {
+                            "$sortArray": {
+                                "input": "$comments",
+                                "sortBy": {"date": -1}
+                            }
+                        },
+                        limit
+                    ]
+                },
+                "mostRecentCommentDate": {
+                    "$max": "$comments.date"
+                }
+            }
+        },
+        {
+            "$sort": {"mostRecentCommentDate": -1}
+        },
+        {
+            "$limit": 50 if movie_id else 20
+        },
+        {
+            "$project": {
+                "title": 1,
+                "year": 1,
+                "genres": 1,
+                "imdbRating": "$imdb.rating",
+                "recentComments": {
+                    "$map": {
+                        "input": "$recentComments",
+                        "as": "comment",
+                        "in": {
+                            "userName": "$$comment.name",
+                            "userEmail": "$$comment.email",
+                            "text": "$$comment.text",
+                            "date": "$$comment.date"
+                        }
+                    }
+                },
+                "totalComments": {"$size": "$comments"},
+                "_id": 1
+            }
+        }
+    ])
+
+    # Execute the aggregation
+    results = await execute_aggregation(pipeline)
+    
+    # Convert ObjectId to string for response
+    for result in results:
+        if "_id" in result:
+            result["_id"] = str(result["_id"])
+    
+    # Calculate total comments from all movies
+    total_comments = sum(result.get("totalComments", 0) for result in results)
+    
+    return create_success_response(
+        results, 
+        f"Found {total_comments} comments from movie{'s' if len(results) != 1 else ''}"
+    )
+
+
+"""
+    GET /api/movies/aggregate/by-year
+    Aggregate movies by year with average rating and movie count.
+    Reports yearly statistics including average rating and total movies per year.
+    Returns:
+        SuccessResponse[List[dict]]: A response object containing yearly movie statistics.
+"""
+
+@router.get("/aggregate/by-year", response_model=SuccessResponse[List[dict]])
+async def aggregate_movies_by_year():
+    # Define aggregation pipeline to group movies by year
+    pipeline = [
+        # Clean data: ensure year is an integer and within reasonable range
+        {
+            "$match": {
+                "year": {"$type": "number", "$gte": 1800, "$lte": 2030}
+            }
+        },
+        # Group by year and calculate statistics
+        {
+            "$group": {
+                "_id": "$year",
+                "movieCount": {"$sum": 1},
+                "averageRating": {
+                    "$avg": {
+                        "$cond": [
+                            {"$and": [
+                                {"$ne": ["$imdb.rating", None]},
+                                {"$ne": ["$imdb.rating", ""]},
+                                {"$eq": [{"$type": "$imdb.rating"}, "double"]}
+                            ]},
+                            "$imdb.rating",
+                            "$$REMOVE"
+                        ]
+                    }
+                },
+                "highestRating": {
+                    "$max": {
+                        "$cond": [
+                            {"$and": [
+                                {"$ne": ["$imdb.rating", None]},
+                                {"$ne": ["$imdb.rating", ""]},
+                                {"$eq": [{"$type": "$imdb.rating"}, "double"]}
+                            ]},
+                            "$imdb.rating",
+                            "$$REMOVE"
+                        ]
+                    }
+                },
+                "lowestRating": {
+                    "$min": {
+                        "$cond": [
+                            {"$and": [
+                                {"$ne": ["$imdb.rating", None]},
+                                {"$ne": ["$imdb.rating", ""]},
+                                {"$eq": [{"$type": "$imdb.rating"}, "double"]}
+                            ]},
+                            "$imdb.rating",
+                            "$$REMOVE"
+                        ]
+                    }
+                },
+                "totalVotes": {"$sum": "$imdb.votes"}
+            }
+        },
+        {
+            "$project": {
+                "year": "$_id",
+                "movieCount": 1,
+                "averageRating": {"$round": ["$averageRating", 2]},
+                "highestRating": 1,
+                "lowestRating": 1,
+                "totalVotes": 1,
+                "_id": 0
+            }
+        },
+        {"$sort": {"year": -1}}
+    ]
+
+    # Execute the aggregation
+    results = await execute_aggregation(pipeline)
+    
+    return create_success_response(
+        results, 
+        f"Aggregated statistics for {len(results)} years"
+    )
+
+
+"""
+    GET /api/movies/aggregate/directors
+    Aggregate directors with the most movies and their statistics.
+    Reports directors sorted by number of movies directed.
+    Query Parameters:
+        limit (int, optional): Number of results to return (default: 20, max: 100).
+        min_movies (int, optional): Minimum number of movies to include director (default: 1).
+    Returns:
+        SuccessResponse[List[dict]]: A response object containing director statistics.
+"""
+
+@router.get("/aggregate/directors", response_model=SuccessResponse[List[dict]])
+async def aggregate_directors_most_movies(
+    limit: int = Query(default=20, ge=1, le=100)
+):
+    # Define aggregation pipeline to find directors with most movies
+    pipeline = [
+        {
+            "$match": {
+                "directors": {"$exists": True, "$ne": None, "$ne": []},
+                "year": {"$type": "number", "$gte": 1800, "$lte": 2030}
+            }
+        },
+        {
+            "$unwind": "$directors"
+        },
+        {
+            "$match": {
+                "directors": {"$ne": None, "$ne": ""}
+            }
+        },
+        {
+            "$group": {
+                "_id": "$directors",
+                "movieCount": {"$sum": 1},
+                "averageRating": {"$avg": "$imdb.rating"}
+            }
+        },
+        {"$sort": {"movieCount": -1}},
+        {"$limit": limit},
+        {
+            "$project": {
+                "director": "$_id",
+                "movieCount": 1,
+                "averageRating": {"$round": ["$averageRating", 2]},
+                "_id": 0
+            }
+        }
+    ]
+
+    # Execute the aggregation
+    results = await execute_aggregation(pipeline)
+    
+    return create_success_response(
+        results, 
+        f"Found {len(results)} directors with most movies"
+    )
 
 # ---- Old testing endpoint, will be removed later ----
 '''

From 1731299c18f763342cf259d1aea1d5bb26667c11 Mon Sep 17 00:00:00 2001
From: Angela <angelashu103@gmail.com>
Date: Fri, 24 Oct 2025 15:20:57 -0400
Subject: [PATCH 2/8] remove comment line

---
 server/python/src/routers/movies.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/server/python/src/routers/movies.py b/server/python/src/routers/movies.py
index fab2e73..7c60599 100644
--- a/server/python/src/routers/movies.py
+++ b/server/python/src/routers/movies.py
@@ -545,7 +545,6 @@ async def aggregate_movies_by_year():
     Reports directors sorted by number of movies directed.
     Query Parameters:
         limit (int, optional): Number of results to return (default: 20, max: 100).
-        min_movies (int, optional): Minimum number of movies to include director (default: 1).
     Returns:
         SuccessResponse[List[dict]]: A response object containing director statistics.
 """

From 303fecc160007e75004cdd53df931cfc9619b724 Mon Sep 17 00:00:00 2001
From: Angela <angelashu103@gmail.com>
Date: Fri, 24 Oct 2025 15:32:57 -0400
Subject: [PATCH 3/8] add comment for vector search

---
 server/python/src/routers/movies.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/server/python/src/routers/movies.py b/server/python/src/routers/movies.py
index 7c60599..237ef9d 100644
--- a/server/python/src/routers/movies.py
+++ b/server/python/src/routers/movies.py
@@ -609,4 +609,7 @@ async def test_error():
                 code="TEST_ERROR",
                 details=str(e)
             )
-'''
\ No newline at end of file
+'''
+
+# ---- Place Vector Search Here ----
+

From e6942b2b8296fb5c28542cceb47084418841acd5 Mon Sep 17 00:00:00 2001
From: Angela <angelashu103@gmail.com>
Date: Mon, 27 Oct 2025 15:40:37 -0400
Subject: [PATCH 4/8] feedback

---
 server/python/src/routers/movies.py | 95 +++++++++++++++++++++--------
 1 file changed, 69 insertions(+), 26 deletions(-)

diff --git a/server/python/src/routers/movies.py b/server/python/src/routers/movies.py
index 237ef9d..b4e5128 100644
--- a/server/python/src/routers/movies.py
+++ b/server/python/src/routers/movies.py
@@ -36,10 +36,19 @@
 @router.get("/{id}", response_model=SuccessResponse[Movie])
 async def get_movie_by_id(id: str):
     # Validate ObjectId format
-    object_id = ObjectId(id)
+    try:
+        object_id = ObjectId(id)
+    except InvalidId:
+        raise HTTPException(status_code=400, detail="Invalid movie ID format")
+
+    movies_collection = get_collection("movies")
+    try:
+        movie = await movies_collection.find_one({"_id": object_id})
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Database error occurred: {str(e)}")
 
-    # Use findOne() to get a single document by _id
-    movie = await db.movies.find_one({"_id": object_id})
+    if movie is None:
+        raise HTTPException(status_code=400, detail="Movie not found")
 
     movie["_id"] = str(movie["_id"]) # Convert ObjectId to string
     
@@ -105,7 +114,10 @@ async def get_all_movies(
     cursor = movies_collection.find(filter_dict).sort(sort).skip(skip).limit(limit)    
     movies = []
     async for movie in cursor:
+        if movie is None:
+            raise HTTPException(status_code=400, detail="Movie not found")
         movie["_id"] = str(movie["_id"]) # Convert ObjectId to string
+
         # Ensure that the year field contains int value.
         if "year" in movie and not isinstance(movie["year"], int):
             cleaned_year = re.sub(r"\D", "", str(movie["year"]))
@@ -126,24 +138,35 @@ async def get_all_movies(
     POST /api/movies/
     Create a new movie.
     Request Body:
-        title (str): The title of the movie.
-        genre (str): The genre of the movie.
-        year (int): The year the movie was released.
-        min_rating (float): The minimum IMDB rating.
-        max_rating (float): The maximum IMDB rating.
+        movie (CreateMovieRequest): A movie object containing the movie data.
+            See CreateMovieRequest model for available fields.
     Returns:
         SuccessResponse[Movie]: A response object containing the created movie data.
 """
 
-@router.post("/", response_model=SuccessResponse[CreateMovieRequest], status_code=201)
+@router.post("/", response_model=SuccessResponse[Movie], status_code=201)
 async def create_movie(movie: CreateMovieRequest):
     # Pydantic will automatically validate the structure
     movie_data = movie.model_dump(by_alias=True, exclude_none=True)
     
-    result = await db.movies.insert_one(movie_data)
-
-    # Retrieve the created document to return complete data
-    created_movie = await db.movies.find_one({"_id": result.inserted_id})
+    movies_collection = get_collection("movies")
+    try:
+        result = await movies_collection.insert_one(movie_data)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Database error occurred: {str(e)}")
+    
+    # Verify that the document was created before querying it
+    if not result.acknowledged:
+        raise HTTPException(status_code=500, detail="Failed to create movie")
+    
+    try:
+        # Retrieve the created document to return complete data
+        created_movie = await movies_collection.find_one({"_id": result.inserted_id})
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Database error occurred: {str(e)}")
+    
+    if created_movie is None:
+        raise HTTPException(status_code=500, detail="Movie was created but could not be retrieved")
 
     created_movie["_id"] = str(created_movie["_id"]) # Convert ObjectId to string
     
@@ -219,13 +242,24 @@ async def create_movies_batch(movies: List[CreateMovieRequest]):
 
 @router.delete("/{id}", response_model=SuccessResponse[dict])
 async def delete_movie_by_id(id: str):
-    object_id = ObjectId(id)
+    try:
+        object_id = ObjectId(id)
+    except InvalidId:
+        raise HTTPException(status_code=400, detail="Invalid movie ID format")
 
-    # Use deleteOne() to remove a single document
-    result = await db.movies.delete_one({"_id": object_id})
+    movies_collection = get_collection("movies")
+    try:
+        # Use deleteOne() to remove a single document
+        result = await movies_collection.delete_one({"_id": object_id})
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Database error occurred: {str(e)}")
 
     if result.deleted_count == 0:
-        raise HTTPException(status_code=404, detail="Movie not found")
+        return create_error_response(
+            message="Movie not found",
+            code="MOVIE_NOT_FOUND", 
+            details=f"No movie found with ID: {id}"
+        )
     
     return create_success_response(
         {"deletedCount": result.deleted_count}, 
@@ -252,16 +286,26 @@ async def delete_movie_by_id(id: str):
 
 @router.delete("/{id}/find-and-delete", response_model=SuccessResponse[Movie])
 async def find_and_delete_movie(id: str):
-    object_id = ObjectId(id)
+    try:
+        object_id = ObjectId(id)
+    except InvalidId:
+        raise HTTPException(status_code=400, detail="Invalid movie ID format")
 
+    movies_collection = get_collection("movies")
     # Use find_one_and_delete() to find and delete in a single atomic operation
     # This is useful when you need to return the deleted document
     # or ensure the document exists before deletion
-    deleted_movie = await db.movies.find_one_and_delete({"_id": object_id})
+    try:
+        deleted_movie = await movies_collection.find_one_and_delete({"_id": object_id})
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Database error occurred: {str(e)}")
 
     if deleted_movie is None:
-        raise HTTPException(status_code=404, detail="Movie not found")
-
+        return create_error_response(
+            message="Movie not found",
+            code="MOVIE_NOT_FOUND", 
+            details=f"No movie found with ID: {id}"
+        )
     deleted_movie["_id"] = str(deleted_movie["_id"]) # Convert ObjectId to string
     
     return create_success_response(deleted_movie, "Movie found and deleted successfully")
@@ -269,12 +313,11 @@ async def find_and_delete_movie(id: str):
 async def execute_aggregation(pipeline: list) -> list:
     """Helper function to execute aggregation pipeline and return results"""
     print(f"Executing pipeline: {pipeline}")  # Debug logging
-    print(f"Database name: {db.name if hasattr(db, 'name') else 'unknown'}")
-    print(f"Collection name: movies")
     
-    # For motor (async MongoDB driver), we need to await the aggregate call
-    cursor = await db.movies.aggregate(pipeline)
-    results = await cursor.to_list(length=None)  # Convert cursor to list
+    movies_collection = get_collection("movies")
+    # For the async Pymongo driver, we need to await the aggregate call
+    cursor = await movies_collection.aggregate(pipeline)
+    results = await cursor.to_list(length=None)  # Convert cursor to list to collect all data at once rather than processing data per document
     
     print(f"Aggregation returned {len(results)} results")  # Debug logging
     if len(results) <= 3:  # Log first few results for debugging

From aa1db244e7b5ee72024db786c00c12d25cf65dc1 Mon Sep 17 00:00:00 2001
From: Angela <angelashu103@gmail.com>
Date: Mon, 27 Oct 2025 16:33:25 -0400
Subject: [PATCH 5/8] pr feedback

---
 server/python/src/routers/movies.py | 222 ++++++++++++++++------------
 1 file changed, 126 insertions(+), 96 deletions(-)

diff --git a/server/python/src/routers/movies.py b/server/python/src/routers/movies.py
index b4e5128..6c4895f 100644
--- a/server/python/src/routers/movies.py
+++ b/server/python/src/routers/movies.py
@@ -39,16 +39,28 @@ async def get_movie_by_id(id: str):
     try:
         object_id = ObjectId(id)
     except InvalidId:
-        raise HTTPException(status_code=400, detail="Invalid movie ID format")
+        return create_error_response(
+            message="Invalid movie ID format",
+            code="INTERNAL_SERVER_ERROR",
+            details=f"The provided ID '{id}' is not a valid ObjectId"
+        )
 
     movies_collection = get_collection("movies")
     try:
         movie = await movies_collection.find_one({"_id": object_id})
     except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Database error occurred: {str(e)}")
+        return create_error_response(
+            message="Database error occurred",
+            code="INTERNAL_SERVER_ERROR",
+            details=str(e)
+        )
 
     if movie is None:
-        raise HTTPException(status_code=400, detail="Movie not found")
+        return create_error_response(
+            message="Movie not found",
+            code="INTERNAL_SERVER_ERROR",
+            details=f"No movie found with ID: {id}"
+        )
 
     movie["_id"] = str(movie["_id"]) # Convert ObjectId to string
     
@@ -111,21 +123,28 @@ async def get_all_movies(
     sort = [(sort_by, sort_order)]
 
     # Query the database with the constructed filter, sort, skip, and limit.
-    cursor = movies_collection.find(filter_dict).sort(sort).skip(skip).limit(limit)    
-    movies = []
-    async for movie in cursor:
-        if movie is None:
-            raise HTTPException(status_code=400, detail="Movie not found")
-        movie["_id"] = str(movie["_id"]) # Convert ObjectId to string
-
-        # Ensure that the year field contains int value.
-        if "year" in movie and not isinstance(movie["year"], int):
-            cleaned_year = re.sub(r"\D", "", str(movie["year"]))
-            try:
-                movie["year"] = int(cleaned_year) if cleaned_year else None
-            except ValueError:
-                movie["year"] = None
-        movies.append(movie)            
+    try:
+        cursor = movies_collection.find(filter_dict).sort(sort).skip(skip).limit(limit)    
+        movies = []
+        async for movie in cursor:
+            if movie is None:
+                continue  # Skip null movies instead of raising exception
+            movie["_id"] = str(movie["_id"]) # Convert ObjectId to string
+
+            # Ensure that the year field contains int value.
+            if "year" in movie and not isinstance(movie["year"], int):
+                cleaned_year = re.sub(r"\D", "", str(movie["year"]))
+                try:
+                    movie["year"] = int(cleaned_year) if cleaned_year else None
+                except ValueError:
+                    movie["year"] = None
+            movies.append(movie)
+    except Exception as e:
+        return create_error_response(
+            message="Database error occurred",
+            code="INTERNAL_SERVER_ERROR",
+            details=str(e)
+        )
 
     # Return the results wrapped in a SuccessResponse    
     return create_success_response(movies, f"Found {len(movies)} movies.")
@@ -153,20 +172,36 @@ async def create_movie(movie: CreateMovieRequest):
     try:
         result = await movies_collection.insert_one(movie_data)
     except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Database error occurred: {str(e)}")
+        return create_error_response(
+            message="Database error occurred",
+            code="INTERNAL_SERVER_ERROR",
+            details=str(e)
+        )
     
     # Verify that the document was created before querying it
     if not result.acknowledged:
-        raise HTTPException(status_code=500, detail="Failed to create movie")
+        return create_error_response(
+            message="Failed to create movie",
+            code="INTERNAL_SERVER_ERROR",
+            details="The database did not acknowledge the insert operation"
+        )
     
     try:
         # Retrieve the created document to return complete data
         created_movie = await movies_collection.find_one({"_id": result.inserted_id})
     except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Database error occurred: {str(e)}")
+        return create_error_response(
+            message="Database error occurred",
+            code="INTERNAL_SERVER_ERROR",
+            details=str(e)
+        )
     
     if created_movie is None:
-        raise HTTPException(status_code=500, detail="Movie was created but could not be retrieved")
+        return create_error_response(
+            message="Movie creation verification failed",
+            code="INTERNAL_SERVER_ERROR",
+            details="Movie was created but could not be retrieved for verification"
+        )
 
     created_movie["_id"] = str(created_movie["_id"]) # Convert ObjectId to string
     
@@ -208,7 +243,16 @@ async def create_movies_batch(movies: List[CreateMovieRequest]):
     movies_dicts = []
     for movie in movies:
         movies_dicts.append(movie.model_dump(exclude_unset=True, exclude_none=True))
-    result = await movies_collection.insert_many(movies_dicts)
+    
+    try:
+        result = await movies_collection.insert_many(movies_dicts)
+    except Exception as e:
+        return create_error_response(
+            message="Database error occurred during batch creation",
+            code="INTERNAL_SERVER_ERROR",
+            details=str(e)
+        )
+    
     return create_success_response({
         "insertedCount": len(result.inserted_ids),
         "insertedIds": [str(_id) for _id in result.inserted_ids]
@@ -245,19 +289,27 @@ async def delete_movie_by_id(id: str):
     try:
         object_id = ObjectId(id)
     except InvalidId:
-        raise HTTPException(status_code=400, detail="Invalid movie ID format")
+        return create_error_response(
+            message="Invalid movie ID format",
+            code="INTERNAL_SERVER_ERROR",
+            details=f"The provided ID '{id}' is not a valid ObjectId"
+        )
 
     movies_collection = get_collection("movies")
     try:
         # Use deleteOne() to remove a single document
         result = await movies_collection.delete_one({"_id": object_id})
     except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Database error occurred: {str(e)}")
+        return create_error_response(
+            message="Database error occurred",
+            code="INTERNAL_SERVER_ERROR",
+            details=str(e)
+        )
 
     if result.deleted_count == 0:
         return create_error_response(
             message="Movie not found",
-            code="MOVIE_NOT_FOUND", 
+            code="INTERNAL_SERVER_ERROR", 
             details=f"No movie found with ID: {id}"
         )
     
@@ -289,7 +341,11 @@ async def find_and_delete_movie(id: str):
     try:
         object_id = ObjectId(id)
     except InvalidId:
-        raise HTTPException(status_code=400, detail="Invalid movie ID format")
+        return create_error_response(
+            message="Invalid movie ID format",
+            code="INTERNAL_SERVER_ERROR",
+            details=f"The provided ID '{id}' is not a valid ObjectId"
+        )
 
     movies_collection = get_collection("movies")
     # Use find_one_and_delete() to find and delete in a single atomic operation
@@ -298,12 +354,16 @@ async def find_and_delete_movie(id: str):
     try:
         deleted_movie = await movies_collection.find_one_and_delete({"_id": object_id})
     except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Database error occurred: {str(e)}")
+        return create_error_response(
+            message="Database error occurred",
+            code="INTERNAL_SERVER_ERROR",
+            details=str(e)
+        )
 
     if deleted_movie is None:
         return create_error_response(
             message="Movie not found",
-            code="MOVIE_NOT_FOUND", 
+            code="INTERNAL_SERVER_ERROR", 
             details=f"No movie found with ID: {id}"
         )
     deleted_movie["_id"] = str(deleted_movie["_id"]) # Convert ObjectId to string
@@ -326,63 +386,8 @@ async def execute_aggregation(pipeline: list) -> list:
     
     return results
 
-
-"""
-    GET /api/movies/aggregate/by-genre
-    Aggregate movies by genre with statistics using MongoDB aggregation pipeline.
-    Demonstrates grouping values from multiple documents and performing operations on grouped data.
-    Returns:
-        SuccessResponse[List[dict]]: A response object containing aggregated genre statistics.
-"""
-
-@router.get("/aggregate/by-genre", response_model=SuccessResponse[List[dict]])
-async def aggregate_movies_by_genre():
-    # Define an aggregation pipeline with match, unwind, group, and sort stages
-    pipeline = [
-        # Clean data: ensure year is an integer
-        {
-            "$match": {
-                "year": {"$type": "number", "$gte": 1800, "$lte": 2030}
-            }
-        },
-        {"$unwind": "$genres"},
-        {
-            "$group": {
-                "_id": "$genres",
-                "count": {"$sum": 1},
-                "avgRating": {"$avg": "$imdb.rating"},
-                "minYear": {"$min": "$year"},
-                "maxYear": {"$max": "$year"},
-                "totalVotes": {"$sum": "$imdb.votes"}
-            }
-        },
-        {"$sort": {"count": -1}},
-        {
-            "$project": {
-                "genre": "$_id",
-                "movieCount": "$count",
-                "averageRating": {"$round": ["$avgRating", 2]},
-                "yearRange": {
-                    "min": "$minYear",
-                    "max": "$maxYear"
-                },
-                "totalVotes": "$totalVotes",
-                "_id": 0
-            }
-        }
-    ]
-
-    # Execute the aggregation
-    results = await execute_aggregation(pipeline)
-    
-    return create_success_response(
-        results, 
-        f"Aggregated statistics for {len(results)} genres"
-    )
-
-
 """
-    GET /api/movies/aggregate/recent-commented
+    GET /api/movies/reportingByComments
     Aggregate movies with their most recent comments using MongoDB $lookup aggregation.
     Joins movies with comments collection to show recent comment activity.
     Query Parameters:
@@ -392,7 +397,7 @@ async def aggregate_movies_by_genre():
         SuccessResponse[List[dict]]: A response object containing movies with their most recent comments.
 """
 
-@router.get("/aggregate/recent-commented", response_model=SuccessResponse[List[dict]])
+@router.get("/api/movies/reportingByComments", response_model=SuccessResponse[List[dict]])
 async def aggregate_movies_recent_commented(
     limit: int = Query(default=10, ge=1, le=50),
     movie_id: str = Query(default=None)
@@ -412,7 +417,11 @@ async def aggregate_movies_recent_commented(
             object_id = ObjectId(movie_id)
             pipeline[0]["$match"]["_id"] = object_id
         except Exception:
-            raise HTTPException(status_code=400, detail="Invalid movie_id format")
+            return create_error_response(
+                message="Invalid movie ID format",
+                code="INTERNAL_SERVER_ERROR",
+                details="The provided movie_id is not a valid ObjectId"
+            )
     
     # Add lookup and additional pipeline stages
     pipeline.extend([
@@ -451,7 +460,7 @@ async def aggregate_movies_recent_commented(
             "$sort": {"mostRecentCommentDate": -1}
         },
         {
-            "$limit": 50 if movie_id else 20
+            "$limit": limit
         },
         {
             "$project": {
@@ -478,8 +487,15 @@ async def aggregate_movies_recent_commented(
     ])
 
     # Execute the aggregation
-    results = await execute_aggregation(pipeline)
-    
+    try:
+        results = await execute_aggregation(pipeline)
+    except Exception as e:
+        return create_error_response(
+            message="Database error occurred during aggregation",
+            code="INTERNAL_SERVER_ERROR",
+            details=str(e)
+        )
+
     # Convert ObjectId to string for response
     for result in results:
         if "_id" in result:
@@ -495,14 +511,14 @@ async def aggregate_movies_recent_commented(
 
 
 """
-    GET /api/movies/aggregate/by-year
+    GET /api/movies/reportingByYear
     Aggregate movies by year with average rating and movie count.
     Reports yearly statistics including average rating and total movies per year.
     Returns:
         SuccessResponse[List[dict]]: A response object containing yearly movie statistics.
 """
 
-@router.get("/aggregate/by-year", response_model=SuccessResponse[List[dict]])
+@router.get("/api/movies/reportingByYear", response_model=SuccessResponse[List[dict]])
 async def aggregate_movies_by_year():
     # Define aggregation pipeline to group movies by year
     pipeline = [
@@ -574,7 +590,14 @@ async def aggregate_movies_by_year():
     ]
 
     # Execute the aggregation
-    results = await execute_aggregation(pipeline)
+    try:
+        results = await execute_aggregation(pipeline)
+    except Exception as e:
+        return create_error_response(
+            message="Database error occurred during aggregation",
+            code="INTERNAL_SERVER_ERROR",
+            details=str(e)
+        )
     
     return create_success_response(
         results, 
@@ -583,7 +606,7 @@ async def aggregate_movies_by_year():
 
 
 """
-    GET /api/movies/aggregate/directors
+    GET /api/movies/reportingByDirectors
     Aggregate directors with the most movies and their statistics.
     Reports directors sorted by number of movies directed.
     Query Parameters:
@@ -592,7 +615,7 @@ async def aggregate_movies_by_year():
         SuccessResponse[List[dict]]: A response object containing director statistics.
 """
 
-@router.get("/aggregate/directors", response_model=SuccessResponse[List[dict]])
+@router.get("/api/movies/reportingByDirectors", response_model=SuccessResponse[List[dict]])
 async def aggregate_directors_most_movies(
     limit: int = Query(default=20, ge=1, le=100)
 ):
@@ -632,7 +655,14 @@ async def aggregate_directors_most_movies(
     ]
 
     # Execute the aggregation
-    results = await execute_aggregation(pipeline)
+    try:
+        results = await execute_aggregation(pipeline)
+    except Exception as e:
+        return create_error_response(
+            message="Database error occurred during aggregation",
+            code="INTERNAL_SERVER_ERROR",
+            details=str(e)
+        )
     
     return create_success_response(
         results, 

From 0362be78d9a54b3785e9f12983687d29cfcfedbc Mon Sep 17 00:00:00 2001
From: Angela <angelashu103@gmail.com>
Date: Mon, 27 Oct 2025 16:39:04 -0400
Subject: [PATCH 6/8] remove unneeded imports

---
 server/python/src/routers/movies.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/server/python/src/routers/movies.py b/server/python/src/routers/movies.py
index 6c4895f..0d2f785 100644
--- a/server/python/src/routers/movies.py
+++ b/server/python/src/routers/movies.py
@@ -1,5 +1,5 @@
-from fastapi import APIRouter, Query, Body, HTTPException
-from src.database.mongo_client import db, get_collection
+from fastapi import APIRouter, Query
+from src.database.mongo_client import get_collection
 from src.models.models import CreateMovieRequest, Movie, SuccessResponse
 from typing import List
 from datetime import datetime

From 1cc125a0be7a7e9cc15e6e5b3503757749d01252 Mon Sep 17 00:00:00 2001
From: Angela <angelashu103@gmail.com>
Date: Tue, 28 Oct 2025 16:01:12 -0400
Subject: [PATCH 7/8] add agg stage comments

---
 server/python/src/routers/movies.py | 164 ++++++++++++++++++++++------
 1 file changed, 129 insertions(+), 35 deletions(-)

diff --git a/server/python/src/routers/movies.py b/server/python/src/routers/movies.py
index 0d2f785..bbde50f 100644
--- a/server/python/src/routers/movies.py
+++ b/server/python/src/routers/movies.py
@@ -165,7 +165,7 @@ async def get_all_movies(
 
 @router.post("/", response_model=SuccessResponse[Movie], status_code=201)
 async def create_movie(movie: CreateMovieRequest):
-    # Pydantic will automatically validate the structure
+    # Pydantic automatically validates the structure
     movie_data = movie.model_dump(by_alias=True, exclude_none=True)
     
     movies_collection = get_collection("movies")
@@ -423,8 +423,29 @@ async def aggregate_movies_recent_commented(
                 details="The provided movie_id is not a valid ObjectId"
             )
     
-    # Add lookup and additional pipeline stages
+    # Add a multi-stage aggregation that:
+    # 1. Filters movies by valid year range
+    # 2. Joins with comments collection (like SQL JOIN)
+    # 3. Filters to only movies that have comments
+    # 4. Sorts comments by date and extracts most recent ones
+    # 5. Sorts movies by their most recent comment date
+    # 6. Shapes the final output with transformed comment structure
+    
+    pipeline = [
+        # STAGE 1: $match - Initial Filter
+        # Filter movies to only those with valid year data
+        # Tip: Use $match early to reduce the initial dataset for better performance
+        {
+            "$match": {
+                "year": {"$type": "number", "$gte": 1800, "$lte": 2030}
+            }
+        }
+    ]
+    
+    # Add remaining pipeline stages
     pipeline.extend([
+        # STAGE 2: $lookup - Join with the 'comments' Collection
+        # This gives each movie document a 'comments' array containing all its comments
         {
             "$lookup": {
                 "from": "comments",
@@ -433,59 +454,74 @@ async def aggregate_movies_recent_commented(
                 "as": "comments"
             }
         },
+        # STAGE 3: $match - Filter Movies with at Least One Comment
+        # This helps reduces dataset to only movies with user engagement
         {
             "$match": {
                 "comments": {"$ne": []}
             }
         },
+        # STAGE 4: $addFields - Add New Computed Fields
         {
             "$addFields": {
+                # Add computed field 'recentComments' that extracts only the N most recent comments (up to 'limit')
                 "recentComments": {
                     "$slice": [
                         {
                             "$sortArray": {
                                 "input": "$comments",
-                                "sortBy": {"date": -1}
+                                "sortBy": {"date": -1}  # -1 = descending (newest first)
                             }
                         },
-                        limit
+                        limit  # Number of comments to keep
                     ]
                 },
+                # Add computed field 'mostRecentCommentDate' that gets the date of the most recent comment (to use in the next $sort stage)
                 "mostRecentCommentDate": {
                     "$max": "$comments.date"
                 }
             }
         },
+        # STAGE 5: $sort - Sort Movies by Most Recent Comment Date
         {
             "$sort": {"mostRecentCommentDate": -1}
         },
+        # STAGE 6: $limit - Restrict Result Set Size
+        # - If querying single movie: return up to 50 results
+        # - If querying all movies: return up to 20 results
+        # Tip: This prevents overwhelming the client with too much data
         {
-            "$limit": limit
+            "$limit": 50 if movie_id else 20
         },
+        # STAGE 7: $project - Shape Final Response Output
         {
             "$project": {
+                # Include basic movie fields
                 "title": 1,
                 "year": 1,
                 "genres": 1,
+                "_id": 1,
+                # Extract nested field: imdb.rating -> imdbRating
                 "imdbRating": "$imdb.rating",
+                # Use $map to reshape computed 'recentComments' field with cleaner field names
                 "recentComments": {
                     "$map": {
                         "input": "$recentComments",
                         "as": "comment",
                         "in": {
-                            "userName": "$$comment.name",
-                            "userEmail": "$$comment.email",
-                            "text": "$$comment.text",
-                            "date": "$$comment.date"
+                            "userName": "$$comment.name",      # Rename: name -> userName
+                            "userEmail": "$$comment.email",    # Rename: email -> userEmail
+                            "text": "$$comment.text",          # Keep: text
+                            "date": "$$comment.date"           # Keep: date
                         }
                     }
                 },
-                "totalComments": {"$size": "$comments"},
-                "_id": 1
+                # Calculate the total number of comments into 'totalComments' (not just 'recentComments')
+                # Used in display (e.g., "Showing 5 of 127 comments")
+                "totalComments": {"$size": "$comments"}
             }
         }
     ])
-
     # Execute the aggregation
     try:
         results = await execute_aggregation(pipeline)
@@ -520,32 +556,48 @@ async def aggregate_movies_recent_commented(
 
 @router.get("/api/movies/reportingByYear", response_model=SuccessResponse[List[dict]])
 async def aggregate_movies_by_year():
-    # Define aggregation pipeline to group movies by year
+    # Define aggregation pipeline to group movies by year with statistics
+    # This pipeline demonstrates grouping, statistical calculations, and data cleaning
+
+    # Add a multi-stage aggregation that:
+    # 1. Filters movies by valid year range (data quality filter)
+    # 2. Groups movies by release year and calculates statistics per year
+    # 3. Shapes the final output with clean field names and rounded averages
+    # 4. Sorts results by year (newest first) for chronological presentation
+    
     pipeline = [
+        # STAGE 1: $match - Data Quality Filter
         # Clean data: ensure year is an integer and within reasonable range
+        # Tip: Filter early to reduce dataset size and improve performance
         {
             "$match": {
                 "year": {"$type": "number", "$gte": 1800, "$lte": 2030}
             }
         },
-        # Group by year and calculate statistics
+        
+        # STAGE 2: $group - Aggregate Movies by Year
+        # Group all movies by their release year and calculate various statistics
         {
             "$group": {
-                "_id": "$year",
-                "movieCount": {"$sum": 1},
+                "_id": "$year",  # Group by year field
+                "movieCount": {"$sum": 1},  # Count total movies per year
+                
+                # Calculate average rating (only for valid numeric ratings)
                 "averageRating": {
                     "$avg": {
                         "$cond": [
                             {"$and": [
-                                {"$ne": ["$imdb.rating", None]},
-                                {"$ne": ["$imdb.rating", ""]},
-                                {"$eq": [{"$type": "$imdb.rating"}, "double"]}
+                                {"$ne": ["$imdb.rating", None]},           # Not null
+                                {"$ne": ["$imdb.rating", ""]},             # Not empty string
+                                {"$eq": [{"$type": "$imdb.rating"}, "double"]}  # Is numeric
                             ]},
-                            "$imdb.rating",
-                            "$$REMOVE"
+                            "$imdb.rating",  # Include valid IMDB ratings
+                            "$$REMOVE"       # Exclude invalid IMDB ratings
                         ]
                     }
                 },
+                
+                # Find highest rating for the year (same validation as average rating)
                 "highestRating": {
                     "$max": {
                         "$cond": [
@@ -559,6 +611,8 @@ async def aggregate_movies_by_year():
                         ]
                     }
                 },
+                
+                # Find lowest rating for the year (same validation as average and highest rating)
                 "lowestRating": {
                     "$min": {
                         "$cond": [
@@ -572,21 +626,29 @@ async def aggregate_movies_by_year():
                         ]
                     }
                 },
+                
+                # Sum total votes across all movies in the year
                 "totalVotes": {"$sum": "$imdb.votes"}
             }
         },
+        
+        # STAGE 3: $project - Shape Final Output
+        # Transform the grouped data into a clean, readable format
         {
             "$project": {
-                "year": "$_id",
+                "year": "$_id",  # Rename _id back to year because grouping was done by year but values were stored in _id
                 "movieCount": 1,
-                "averageRating": {"$round": ["$averageRating", 2]},
+                "averageRating": {"$round": ["$averageRating", 2]},  # Round to 2 decimal places
                 "highestRating": 1,
                 "lowestRating": 1,
                 "totalVotes": 1,
-                "_id": 0
+                "_id": 0  # Exclude the _id field from output
             }
         },
-        {"$sort": {"year": -1}}
+        
+        # STAGE 4: $sort - Sort by Year (Newest First)
+        # Sort results in descending order to show most recent years first
+        {"$sort": {"year": -1}}  # -1 = descending order
     ]
 
     # Execute the aggregation
@@ -619,37 +681,69 @@ async def aggregate_movies_by_year():
 async def aggregate_directors_most_movies(
     limit: int = Query(default=20, ge=1, le=100)
 ):
-    # Define aggregation pipeline to find directors with most movies
+    # Define aggregation pipeline to find directors with the most movies
+    # This pipeline demonstrates array unwinding, filtering, and ranking
+    
+    # Add a multi-stage aggregation that:
+    # 1. Filters movies with valid directors and year data (data quality filter)
+    # 2. Unwinds directors array to create separate documents per director
+    # 3. Cleans director names by filtering out null/empty names
+    # 4. Groups movies by individual director and calculates statistics per director
+    # 5. Sorts directors based on movie count
+    # 6. Limits results to top N directors
+    # 7. Shapes the final output with clean field names and rounded averages
+
     pipeline = [
+        # STAGE 1: $match - Initial Data Quality Filter
+        # Filter movies that have director information and valid years
         {
             "$match": {
-                "directors": {"$exists": True, "$ne": None, "$ne": []},
-                "year": {"$type": "number", "$gte": 1800, "$lte": 2030}
+                "directors": {"$exists": True, "$ne": None, "$ne": []},  # Has directors array
+                "year": {"$type": "number", "$gte": 1800, "$lte": 2030}  # Valid year range
             }
         },
+        
+        # STAGE 2: $unwind - Flatten Directors Array
+        # Convert each movie's directors array into separate documents
+        # Example: Movie with ["Director A", "Director B"] becomes 2 documents
         {
             "$unwind": "$directors"
         },
+        
+        # STAGE 3: $match - Clean Director Names
+        # Filter out any null or empty director names after unwinding
         {
             "$match": {
                 "directors": {"$ne": None, "$ne": ""}
             }
         },
+        
+        # STAGE 4: $group - Aggregate by Director
+        # Group all movies by director name and calculate statistics
         {
             "$group": {
-                "_id": "$directors",
-                "movieCount": {"$sum": 1},
-                "averageRating": {"$avg": "$imdb.rating"}
+                "_id": "$directors",  # Group by individual director name
+                "movieCount": {"$sum": 1},  # Count movies per director
+                "averageRating": {"$avg": "$imdb.rating"}  # Average rating of director's movies
             }
         },
-        {"$sort": {"movieCount": -1}},
+        
+        # STAGE 5: $sort - Rank Directors by Movie Count
+        # Sort directors by number of movies (highest first)
+        {"$sort": {"movieCount": -1}},  # -1 = descending (most movies first)
+        
+        # STAGE 6: $limit - Restrict Results
+        # Limit to top N directors based on user input
         {"$limit": limit},
+        
+        # STAGE 7: $project - Shape Final Output
+        # Transform the grouped data into a clean, readable format
         {
             "$project": {
-                "director": "$_id",
+                "director": "$_id",  # Rename _id to director
                 "movieCount": 1,
-                "averageRating": {"$round": ["$averageRating", 2]},
-                "_id": 0
+                "averageRating": {"$round": ["$averageRating", 2]},  # Round to 2 decimal places
+                "_id": 0  # Exclude the _id field from output
             }
         }
     ]

From 49b967a841648beb1d21c7737eada963c4725732 Mon Sep 17 00:00:00 2001
From: Angela <angelashu103@gmail.com>
Date: Tue, 28 Oct 2025 16:23:00 -0400
Subject: [PATCH 8/8] fix broken code

---
 server/python/src/routers/movies.py | 32 +++++++++++------------------
 1 file changed, 12 insertions(+), 20 deletions(-)

diff --git a/server/python/src/routers/movies.py b/server/python/src/routers/movies.py
index bbde50f..6e8d959 100644
--- a/server/python/src/routers/movies.py
+++ b/server/python/src/routers/movies.py
@@ -402,26 +402,6 @@ async def aggregate_movies_recent_commented(
     limit: int = Query(default=10, ge=1, le=50),
     movie_id: str = Query(default=None)
 ):
-    # Define aggregation pipeline to join movies with their most recent comments
-    pipeline = [
-        {
-            "$match": {
-                "year": {"$type": "number", "$gte": 1800, "$lte": 2030}
-            }
-        }
-    ]
-    
-    # Add movie_id filter if provided
-    if movie_id:
-        try:
-            object_id = ObjectId(movie_id)
-            pipeline[0]["$match"]["_id"] = object_id
-        except Exception:
-            return create_error_response(
-                message="Invalid movie ID format",
-                code="INTERNAL_SERVER_ERROR",
-                details="The provided movie_id is not a valid ObjectId"
-            )
     
     # Add a multi-stage aggregation that:
     # 1. Filters movies by valid year range
@@ -441,6 +421,18 @@ async def aggregate_movies_recent_commented(
             }
         }
     ]
+
+    # Add movie_id filter if provided
+    if movie_id:
+        try:
+            object_id = ObjectId(movie_id)
+            pipeline[0]["$match"]["_id"] = object_id
+        except Exception:
+            return create_error_response(
+                message="Invalid movie ID format",
+                code="INTERNAL_SERVER_ERROR",
+                details="The provided movie_id is not a valid ObjectId"
+            )
     
     # Add remaining pipeline stages
     pipeline.extend([