From 5ba4f9edcaa2933af288799a0baf93208d367765 Mon Sep 17 00:00:00 2001 From: Angela Date: Fri, 24 Oct 2025 15:16:55 -0400 Subject: [PATCH 1/8] add new methods --- server/python/src/models/models.py | 14 +- server/python/src/routers/movies.py | 431 +++++++++++++++++++++++++++- 2 files changed, 441 insertions(+), 4 deletions(-) diff --git a/server/python/src/models/models.py b/server/python/src/models/models.py index 9e66a82..e027116 100644 --- a/server/python/src/models/models.py +++ b/server/python/src/models/models.py @@ -92,9 +92,18 @@ class CreateMovieRequest(BaseModel): rated: Optional[str] = None runtime: Optional[int] = None poster: Optional[str] = None + +class Comment(BaseModel): + id: Optional[str] = Field(alias="_id") + name: str + email: str + movie_id: str + text: str + date: datetime - - + model_config = { + "populate_by_name": True + } class SuccessResponse(BaseModel, Generic[T]): success: bool = True message: Optional[str] @@ -113,4 +122,3 @@ class ErrorResponse(BaseModel): message: str error: ErrorDetails timestamp: str - \ No newline at end of file diff --git a/server/python/src/routers/movies.py b/server/python/src/routers/movies.py index efdf6dd..fab2e73 100644 --- a/server/python/src/routers/movies.py +++ b/server/python/src/routers/movies.py @@ -1,10 +1,12 @@ -from fastapi import APIRouter, Query +from fastapi import APIRouter, Query, Body, HTTPException from src.database.mongo_client import db, get_collection from src.models.models import CreateMovieRequest, Movie, SuccessResponse from typing import List from datetime import datetime from src.utils.errorHandler import create_success_response, create_error_response import re +from bson import ObjectId +from bson.errors import InvalidId ''' This file contains all the business logic for movie operations. @@ -22,7 +24,26 @@ # Place get_movie_by_id endpoint here #------------------------------------ +""" + GET /api/movies/{id} + Retrieve a single movie by its ID. + Path Parameters: + id (str): The ObjectId of the movie to retrieve. + Returns: + SuccessResponse[Movie]: A response object containing the movie data. +""" + +@router.get("/{id}", response_model=SuccessResponse[Movie]) +async def get_movie_by_id(id: str): + # Validate ObjectId format + object_id = ObjectId(id) + + # Use findOne() to get a single document by _id + movie = await db.movies.find_one({"_id": object_id}) + movie["_id"] = str(movie["_id"]) # Convert ObjectId to string + + return create_success_response(movie, "Movie retrieved successfully") """ GET /api/movies/ @@ -101,6 +122,33 @@ async def get_all_movies( # Place create_movie endpoint here #------------------------------------ +""" + POST /api/movies/ + Create a new movie. + Request Body: + title (str): The title of the movie. + genre (str): The genre of the movie. + year (int): The year the movie was released. + min_rating (float): The minimum IMDB rating. + max_rating (float): The maximum IMDB rating. + Returns: + SuccessResponse[Movie]: A response object containing the created movie data. +""" + +@router.post("/", response_model=SuccessResponse[CreateMovieRequest], status_code=201) +async def create_movie(movie: CreateMovieRequest): + # Pydantic will automatically validate the structure + movie_data = movie.model_dump(by_alias=True, exclude_none=True) + + result = await db.movies.insert_one(movie_data) + + # Retrieve the created document to return complete data + created_movie = await db.movies.find_one({"_id": result.inserted_id}) + + created_movie["_id"] = str(created_movie["_id"]) # Convert ObjectId to string + + return create_success_response(created_movie, f"Movie '{movie_data['title']}' created successfully") + #------------------------------------ # Place create_movies_batch endpoint here #------------------------------------ @@ -159,6 +207,31 @@ async def create_movies_batch(movies: List[CreateMovieRequest]): # Place delete_movie endpoint here #------------------------------------ + +""" + DELETE /api/movies/{id} + Delete a single movie by its ID. + Path Parameters: + id (str): The ObjectId of the movie to delete. + Returns: + SuccessResponse[dict]: A response object containing deletion details. +""" + +@router.delete("/{id}", response_model=SuccessResponse[dict]) +async def delete_movie_by_id(id: str): + object_id = ObjectId(id) + + # Use deleteOne() to remove a single document + result = await db.movies.delete_one({"_id": object_id}) + + if result.deleted_count == 0: + raise HTTPException(status_code=404, detail="Movie not found") + + return create_success_response( + {"deletedCount": result.deleted_count}, + "Movie deleted successfully" + ) + #------------------------------------ # Place delete_movies_by_batch endpoint here #------------------------------------ @@ -167,6 +240,362 @@ async def create_movies_batch(movies: List[CreateMovieRequest]): # Place find_and_delete_movie endpoint here #------------------------------------ +""" + DELETE /api/movies/{id}/find-and-delete + Finds and deletes a movie in a single atomic operation. + Demonstrates the findOneAndDelete() operation. + Path Parameters: + id (str): The ObjectId of the movie to find and delete. + Returns: + SuccessResponse[Movie]: A response object containing the deleted movie data. +""" + +@router.delete("/{id}/find-and-delete", response_model=SuccessResponse[Movie]) +async def find_and_delete_movie(id: str): + object_id = ObjectId(id) + + # Use find_one_and_delete() to find and delete in a single atomic operation + # This is useful when you need to return the deleted document + # or ensure the document exists before deletion + deleted_movie = await db.movies.find_one_and_delete({"_id": object_id}) + + if deleted_movie is None: + raise HTTPException(status_code=404, detail="Movie not found") + + deleted_movie["_id"] = str(deleted_movie["_id"]) # Convert ObjectId to string + + return create_success_response(deleted_movie, "Movie found and deleted successfully") + +async def execute_aggregation(pipeline: list) -> list: + """Helper function to execute aggregation pipeline and return results""" + print(f"Executing pipeline: {pipeline}") # Debug logging + print(f"Database name: {db.name if hasattr(db, 'name') else 'unknown'}") + print(f"Collection name: movies") + + # For motor (async MongoDB driver), we need to await the aggregate call + cursor = await db.movies.aggregate(pipeline) + results = await cursor.to_list(length=None) # Convert cursor to list + + print(f"Aggregation returned {len(results)} results") # Debug logging + if len(results) <= 3: # Log first few results for debugging + for i, doc in enumerate(results): + print(f"Result {i+1}: {doc}") + + return results + + +""" + GET /api/movies/aggregate/by-genre + Aggregate movies by genre with statistics using MongoDB aggregation pipeline. + Demonstrates grouping values from multiple documents and performing operations on grouped data. + Returns: + SuccessResponse[List[dict]]: A response object containing aggregated genre statistics. +""" + +@router.get("/aggregate/by-genre", response_model=SuccessResponse[List[dict]]) +async def aggregate_movies_by_genre(): + # Define an aggregation pipeline with match, unwind, group, and sort stages + pipeline = [ + # Clean data: ensure year is an integer + { + "$match": { + "year": {"$type": "number", "$gte": 1800, "$lte": 2030} + } + }, + {"$unwind": "$genres"}, + { + "$group": { + "_id": "$genres", + "count": {"$sum": 1}, + "avgRating": {"$avg": "$imdb.rating"}, + "minYear": {"$min": "$year"}, + "maxYear": {"$max": "$year"}, + "totalVotes": {"$sum": "$imdb.votes"} + } + }, + {"$sort": {"count": -1}}, + { + "$project": { + "genre": "$_id", + "movieCount": "$count", + "averageRating": {"$round": ["$avgRating", 2]}, + "yearRange": { + "min": "$minYear", + "max": "$maxYear" + }, + "totalVotes": "$totalVotes", + "_id": 0 + } + } + ] + + # Execute the aggregation + results = await execute_aggregation(pipeline) + + return create_success_response( + results, + f"Aggregated statistics for {len(results)} genres" + ) + + +""" + GET /api/movies/aggregate/recent-commented + Aggregate movies with their most recent comments using MongoDB $lookup aggregation. + Joins movies with comments collection to show recent comment activity. + Query Parameters: + limit (int, optional): Number of results to return (default: 10, max: 50). + movie_id (str, optional): Filter by specific movie ObjectId. + Returns: + SuccessResponse[List[dict]]: A response object containing movies with their most recent comments. +""" + +@router.get("/aggregate/recent-commented", response_model=SuccessResponse[List[dict]]) +async def aggregate_movies_recent_commented( + limit: int = Query(default=10, ge=1, le=50), + movie_id: str = Query(default=None) +): + # Define aggregation pipeline to join movies with their most recent comments + pipeline = [ + { + "$match": { + "year": {"$type": "number", "$gte": 1800, "$lte": 2030} + } + } + ] + + # Add movie_id filter if provided + if movie_id: + try: + object_id = ObjectId(movie_id) + pipeline[0]["$match"]["_id"] = object_id + except Exception: + raise HTTPException(status_code=400, detail="Invalid movie_id format") + + # Add lookup and additional pipeline stages + pipeline.extend([ + { + "$lookup": { + "from": "comments", + "localField": "_id", + "foreignField": "movie_id", + "as": "comments" + } + }, + { + "$match": { + "comments": {"$ne": []} + } + }, + { + "$addFields": { + "recentComments": { + "$slice": [ + { + "$sortArray": { + "input": "$comments", + "sortBy": {"date": -1} + } + }, + limit + ] + }, + "mostRecentCommentDate": { + "$max": "$comments.date" + } + } + }, + { + "$sort": {"mostRecentCommentDate": -1} + }, + { + "$limit": 50 if movie_id else 20 + }, + { + "$project": { + "title": 1, + "year": 1, + "genres": 1, + "imdbRating": "$imdb.rating", + "recentComments": { + "$map": { + "input": "$recentComments", + "as": "comment", + "in": { + "userName": "$$comment.name", + "userEmail": "$$comment.email", + "text": "$$comment.text", + "date": "$$comment.date" + } + } + }, + "totalComments": {"$size": "$comments"}, + "_id": 1 + } + } + ]) + + # Execute the aggregation + results = await execute_aggregation(pipeline) + + # Convert ObjectId to string for response + for result in results: + if "_id" in result: + result["_id"] = str(result["_id"]) + + # Calculate total comments from all movies + total_comments = sum(result.get("totalComments", 0) for result in results) + + return create_success_response( + results, + f"Found {total_comments} comments from movie{'s' if len(results) != 1 else ''}" + ) + + +""" + GET /api/movies/aggregate/by-year + Aggregate movies by year with average rating and movie count. + Reports yearly statistics including average rating and total movies per year. + Returns: + SuccessResponse[List[dict]]: A response object containing yearly movie statistics. +""" + +@router.get("/aggregate/by-year", response_model=SuccessResponse[List[dict]]) +async def aggregate_movies_by_year(): + # Define aggregation pipeline to group movies by year + pipeline = [ + # Clean data: ensure year is an integer and within reasonable range + { + "$match": { + "year": {"$type": "number", "$gte": 1800, "$lte": 2030} + } + }, + # Group by year and calculate statistics + { + "$group": { + "_id": "$year", + "movieCount": {"$sum": 1}, + "averageRating": { + "$avg": { + "$cond": [ + {"$and": [ + {"$ne": ["$imdb.rating", None]}, + {"$ne": ["$imdb.rating", ""]}, + {"$eq": [{"$type": "$imdb.rating"}, "double"]} + ]}, + "$imdb.rating", + "$$REMOVE" + ] + } + }, + "highestRating": { + "$max": { + "$cond": [ + {"$and": [ + {"$ne": ["$imdb.rating", None]}, + {"$ne": ["$imdb.rating", ""]}, + {"$eq": [{"$type": "$imdb.rating"}, "double"]} + ]}, + "$imdb.rating", + "$$REMOVE" + ] + } + }, + "lowestRating": { + "$min": { + "$cond": [ + {"$and": [ + {"$ne": ["$imdb.rating", None]}, + {"$ne": ["$imdb.rating", ""]}, + {"$eq": [{"$type": "$imdb.rating"}, "double"]} + ]}, + "$imdb.rating", + "$$REMOVE" + ] + } + }, + "totalVotes": {"$sum": "$imdb.votes"} + } + }, + { + "$project": { + "year": "$_id", + "movieCount": 1, + "averageRating": {"$round": ["$averageRating", 2]}, + "highestRating": 1, + "lowestRating": 1, + "totalVotes": 1, + "_id": 0 + } + }, + {"$sort": {"year": -1}} + ] + + # Execute the aggregation + results = await execute_aggregation(pipeline) + + return create_success_response( + results, + f"Aggregated statistics for {len(results)} years" + ) + + +""" + GET /api/movies/aggregate/directors + Aggregate directors with the most movies and their statistics. + Reports directors sorted by number of movies directed. + Query Parameters: + limit (int, optional): Number of results to return (default: 20, max: 100). + min_movies (int, optional): Minimum number of movies to include director (default: 1). + Returns: + SuccessResponse[List[dict]]: A response object containing director statistics. +""" + +@router.get("/aggregate/directors", response_model=SuccessResponse[List[dict]]) +async def aggregate_directors_most_movies( + limit: int = Query(default=20, ge=1, le=100) +): + # Define aggregation pipeline to find directors with most movies + pipeline = [ + { + "$match": { + "directors": {"$exists": True, "$ne": None, "$ne": []}, + "year": {"$type": "number", "$gte": 1800, "$lte": 2030} + } + }, + { + "$unwind": "$directors" + }, + { + "$match": { + "directors": {"$ne": None, "$ne": ""} + } + }, + { + "$group": { + "_id": "$directors", + "movieCount": {"$sum": 1}, + "averageRating": {"$avg": "$imdb.rating"} + } + }, + {"$sort": {"movieCount": -1}}, + {"$limit": limit}, + { + "$project": { + "director": "$_id", + "movieCount": 1, + "averageRating": {"$round": ["$averageRating", 2]}, + "_id": 0 + } + } + ] + + # Execute the aggregation + results = await execute_aggregation(pipeline) + + return create_success_response( + results, + f"Found {len(results)} directors with most movies" + ) # ---- Old testing endpoint, will be removed later ---- ''' From 1731299c18f763342cf259d1aea1d5bb26667c11 Mon Sep 17 00:00:00 2001 From: Angela Date: Fri, 24 Oct 2025 15:20:57 -0400 Subject: [PATCH 2/8] remove comment line --- server/python/src/routers/movies.py | 1 - 1 file changed, 1 deletion(-) diff --git a/server/python/src/routers/movies.py b/server/python/src/routers/movies.py index fab2e73..7c60599 100644 --- a/server/python/src/routers/movies.py +++ b/server/python/src/routers/movies.py @@ -545,7 +545,6 @@ async def aggregate_movies_by_year(): Reports directors sorted by number of movies directed. Query Parameters: limit (int, optional): Number of results to return (default: 20, max: 100). - min_movies (int, optional): Minimum number of movies to include director (default: 1). Returns: SuccessResponse[List[dict]]: A response object containing director statistics. """ From 303fecc160007e75004cdd53df931cfc9619b724 Mon Sep 17 00:00:00 2001 From: Angela Date: Fri, 24 Oct 2025 15:32:57 -0400 Subject: [PATCH 3/8] add comment for vector search --- server/python/src/routers/movies.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/server/python/src/routers/movies.py b/server/python/src/routers/movies.py index 7c60599..237ef9d 100644 --- a/server/python/src/routers/movies.py +++ b/server/python/src/routers/movies.py @@ -609,4 +609,7 @@ async def test_error(): code="TEST_ERROR", details=str(e) ) -''' \ No newline at end of file +''' + +# ---- Place Vector Search Here ---- + From e6942b2b8296fb5c28542cceb47084418841acd5 Mon Sep 17 00:00:00 2001 From: Angela Date: Mon, 27 Oct 2025 15:40:37 -0400 Subject: [PATCH 4/8] feedback --- server/python/src/routers/movies.py | 95 +++++++++++++++++++++-------- 1 file changed, 69 insertions(+), 26 deletions(-) diff --git a/server/python/src/routers/movies.py b/server/python/src/routers/movies.py index 237ef9d..b4e5128 100644 --- a/server/python/src/routers/movies.py +++ b/server/python/src/routers/movies.py @@ -36,10 +36,19 @@ @router.get("/{id}", response_model=SuccessResponse[Movie]) async def get_movie_by_id(id: str): # Validate ObjectId format - object_id = ObjectId(id) + try: + object_id = ObjectId(id) + except InvalidId: + raise HTTPException(status_code=400, detail="Invalid movie ID format") + + movies_collection = get_collection("movies") + try: + movie = await movies_collection.find_one({"_id": object_id}) + except Exception as e: + raise HTTPException(status_code=500, detail=f"Database error occurred: {str(e)}") - # Use findOne() to get a single document by _id - movie = await db.movies.find_one({"_id": object_id}) + if movie is None: + raise HTTPException(status_code=400, detail="Movie not found") movie["_id"] = str(movie["_id"]) # Convert ObjectId to string @@ -105,7 +114,10 @@ async def get_all_movies( cursor = movies_collection.find(filter_dict).sort(sort).skip(skip).limit(limit) movies = [] async for movie in cursor: + if movie is None: + raise HTTPException(status_code=400, detail="Movie not found") movie["_id"] = str(movie["_id"]) # Convert ObjectId to string + # Ensure that the year field contains int value. if "year" in movie and not isinstance(movie["year"], int): cleaned_year = re.sub(r"\D", "", str(movie["year"])) @@ -126,24 +138,35 @@ async def get_all_movies( POST /api/movies/ Create a new movie. Request Body: - title (str): The title of the movie. - genre (str): The genre of the movie. - year (int): The year the movie was released. - min_rating (float): The minimum IMDB rating. - max_rating (float): The maximum IMDB rating. + movie (CreateMovieRequest): A movie object containing the movie data. + See CreateMovieRequest model for available fields. Returns: SuccessResponse[Movie]: A response object containing the created movie data. """ -@router.post("/", response_model=SuccessResponse[CreateMovieRequest], status_code=201) +@router.post("/", response_model=SuccessResponse[Movie], status_code=201) async def create_movie(movie: CreateMovieRequest): # Pydantic will automatically validate the structure movie_data = movie.model_dump(by_alias=True, exclude_none=True) - result = await db.movies.insert_one(movie_data) - - # Retrieve the created document to return complete data - created_movie = await db.movies.find_one({"_id": result.inserted_id}) + movies_collection = get_collection("movies") + try: + result = await movies_collection.insert_one(movie_data) + except Exception as e: + raise HTTPException(status_code=500, detail=f"Database error occurred: {str(e)}") + + # Verify that the document was created before querying it + if not result.acknowledged: + raise HTTPException(status_code=500, detail="Failed to create movie") + + try: + # Retrieve the created document to return complete data + created_movie = await movies_collection.find_one({"_id": result.inserted_id}) + except Exception as e: + raise HTTPException(status_code=500, detail=f"Database error occurred: {str(e)}") + + if created_movie is None: + raise HTTPException(status_code=500, detail="Movie was created but could not be retrieved") created_movie["_id"] = str(created_movie["_id"]) # Convert ObjectId to string @@ -219,13 +242,24 @@ async def create_movies_batch(movies: List[CreateMovieRequest]): @router.delete("/{id}", response_model=SuccessResponse[dict]) async def delete_movie_by_id(id: str): - object_id = ObjectId(id) + try: + object_id = ObjectId(id) + except InvalidId: + raise HTTPException(status_code=400, detail="Invalid movie ID format") - # Use deleteOne() to remove a single document - result = await db.movies.delete_one({"_id": object_id}) + movies_collection = get_collection("movies") + try: + # Use deleteOne() to remove a single document + result = await movies_collection.delete_one({"_id": object_id}) + except Exception as e: + raise HTTPException(status_code=500, detail=f"Database error occurred: {str(e)}") if result.deleted_count == 0: - raise HTTPException(status_code=404, detail="Movie not found") + return create_error_response( + message="Movie not found", + code="MOVIE_NOT_FOUND", + details=f"No movie found with ID: {id}" + ) return create_success_response( {"deletedCount": result.deleted_count}, @@ -252,16 +286,26 @@ async def delete_movie_by_id(id: str): @router.delete("/{id}/find-and-delete", response_model=SuccessResponse[Movie]) async def find_and_delete_movie(id: str): - object_id = ObjectId(id) + try: + object_id = ObjectId(id) + except InvalidId: + raise HTTPException(status_code=400, detail="Invalid movie ID format") + movies_collection = get_collection("movies") # Use find_one_and_delete() to find and delete in a single atomic operation # This is useful when you need to return the deleted document # or ensure the document exists before deletion - deleted_movie = await db.movies.find_one_and_delete({"_id": object_id}) + try: + deleted_movie = await movies_collection.find_one_and_delete({"_id": object_id}) + except Exception as e: + raise HTTPException(status_code=500, detail=f"Database error occurred: {str(e)}") if deleted_movie is None: - raise HTTPException(status_code=404, detail="Movie not found") - + return create_error_response( + message="Movie not found", + code="MOVIE_NOT_FOUND", + details=f"No movie found with ID: {id}" + ) deleted_movie["_id"] = str(deleted_movie["_id"]) # Convert ObjectId to string return create_success_response(deleted_movie, "Movie found and deleted successfully") @@ -269,12 +313,11 @@ async def find_and_delete_movie(id: str): async def execute_aggregation(pipeline: list) -> list: """Helper function to execute aggregation pipeline and return results""" print(f"Executing pipeline: {pipeline}") # Debug logging - print(f"Database name: {db.name if hasattr(db, 'name') else 'unknown'}") - print(f"Collection name: movies") - # For motor (async MongoDB driver), we need to await the aggregate call - cursor = await db.movies.aggregate(pipeline) - results = await cursor.to_list(length=None) # Convert cursor to list + movies_collection = get_collection("movies") + # For the async Pymongo driver, we need to await the aggregate call + cursor = await movies_collection.aggregate(pipeline) + results = await cursor.to_list(length=None) # Convert cursor to list to collect all data at once rather than processing data per document print(f"Aggregation returned {len(results)} results") # Debug logging if len(results) <= 3: # Log first few results for debugging From aa1db244e7b5ee72024db786c00c12d25cf65dc1 Mon Sep 17 00:00:00 2001 From: Angela Date: Mon, 27 Oct 2025 16:33:25 -0400 Subject: [PATCH 5/8] pr feedback --- server/python/src/routers/movies.py | 222 ++++++++++++++++------------ 1 file changed, 126 insertions(+), 96 deletions(-) diff --git a/server/python/src/routers/movies.py b/server/python/src/routers/movies.py index b4e5128..6c4895f 100644 --- a/server/python/src/routers/movies.py +++ b/server/python/src/routers/movies.py @@ -39,16 +39,28 @@ async def get_movie_by_id(id: str): try: object_id = ObjectId(id) except InvalidId: - raise HTTPException(status_code=400, detail="Invalid movie ID format") + return create_error_response( + message="Invalid movie ID format", + code="INTERNAL_SERVER_ERROR", + details=f"The provided ID '{id}' is not a valid ObjectId" + ) movies_collection = get_collection("movies") try: movie = await movies_collection.find_one({"_id": object_id}) except Exception as e: - raise HTTPException(status_code=500, detail=f"Database error occurred: {str(e)}") + return create_error_response( + message="Database error occurred", + code="INTERNAL_SERVER_ERROR", + details=str(e) + ) if movie is None: - raise HTTPException(status_code=400, detail="Movie not found") + return create_error_response( + message="Movie not found", + code="INTERNAL_SERVER_ERROR", + details=f"No movie found with ID: {id}" + ) movie["_id"] = str(movie["_id"]) # Convert ObjectId to string @@ -111,21 +123,28 @@ async def get_all_movies( sort = [(sort_by, sort_order)] # Query the database with the constructed filter, sort, skip, and limit. - cursor = movies_collection.find(filter_dict).sort(sort).skip(skip).limit(limit) - movies = [] - async for movie in cursor: - if movie is None: - raise HTTPException(status_code=400, detail="Movie not found") - movie["_id"] = str(movie["_id"]) # Convert ObjectId to string - - # Ensure that the year field contains int value. - if "year" in movie and not isinstance(movie["year"], int): - cleaned_year = re.sub(r"\D", "", str(movie["year"])) - try: - movie["year"] = int(cleaned_year) if cleaned_year else None - except ValueError: - movie["year"] = None - movies.append(movie) + try: + cursor = movies_collection.find(filter_dict).sort(sort).skip(skip).limit(limit) + movies = [] + async for movie in cursor: + if movie is None: + continue # Skip null movies instead of raising exception + movie["_id"] = str(movie["_id"]) # Convert ObjectId to string + + # Ensure that the year field contains int value. + if "year" in movie and not isinstance(movie["year"], int): + cleaned_year = re.sub(r"\D", "", str(movie["year"])) + try: + movie["year"] = int(cleaned_year) if cleaned_year else None + except ValueError: + movie["year"] = None + movies.append(movie) + except Exception as e: + return create_error_response( + message="Database error occurred", + code="INTERNAL_SERVER_ERROR", + details=str(e) + ) # Return the results wrapped in a SuccessResponse return create_success_response(movies, f"Found {len(movies)} movies.") @@ -153,20 +172,36 @@ async def create_movie(movie: CreateMovieRequest): try: result = await movies_collection.insert_one(movie_data) except Exception as e: - raise HTTPException(status_code=500, detail=f"Database error occurred: {str(e)}") + return create_error_response( + message="Database error occurred", + code="INTERNAL_SERVER_ERROR", + details=str(e) + ) # Verify that the document was created before querying it if not result.acknowledged: - raise HTTPException(status_code=500, detail="Failed to create movie") + return create_error_response( + message="Failed to create movie", + code="INTERNAL_SERVER_ERROR", + details="The database did not acknowledge the insert operation" + ) try: # Retrieve the created document to return complete data created_movie = await movies_collection.find_one({"_id": result.inserted_id}) except Exception as e: - raise HTTPException(status_code=500, detail=f"Database error occurred: {str(e)}") + return create_error_response( + message="Database error occurred", + code="INTERNAL_SERVER_ERROR", + details=str(e) + ) if created_movie is None: - raise HTTPException(status_code=500, detail="Movie was created but could not be retrieved") + return create_error_response( + message="Movie creation verification failed", + code="INTERNAL_SERVER_ERROR", + details="Movie was created but could not be retrieved for verification" + ) created_movie["_id"] = str(created_movie["_id"]) # Convert ObjectId to string @@ -208,7 +243,16 @@ async def create_movies_batch(movies: List[CreateMovieRequest]): movies_dicts = [] for movie in movies: movies_dicts.append(movie.model_dump(exclude_unset=True, exclude_none=True)) - result = await movies_collection.insert_many(movies_dicts) + + try: + result = await movies_collection.insert_many(movies_dicts) + except Exception as e: + return create_error_response( + message="Database error occurred during batch creation", + code="INTERNAL_SERVER_ERROR", + details=str(e) + ) + return create_success_response({ "insertedCount": len(result.inserted_ids), "insertedIds": [str(_id) for _id in result.inserted_ids] @@ -245,19 +289,27 @@ async def delete_movie_by_id(id: str): try: object_id = ObjectId(id) except InvalidId: - raise HTTPException(status_code=400, detail="Invalid movie ID format") + return create_error_response( + message="Invalid movie ID format", + code="INTERNAL_SERVER_ERROR", + details=f"The provided ID '{id}' is not a valid ObjectId" + ) movies_collection = get_collection("movies") try: # Use deleteOne() to remove a single document result = await movies_collection.delete_one({"_id": object_id}) except Exception as e: - raise HTTPException(status_code=500, detail=f"Database error occurred: {str(e)}") + return create_error_response( + message="Database error occurred", + code="INTERNAL_SERVER_ERROR", + details=str(e) + ) if result.deleted_count == 0: return create_error_response( message="Movie not found", - code="MOVIE_NOT_FOUND", + code="INTERNAL_SERVER_ERROR", details=f"No movie found with ID: {id}" ) @@ -289,7 +341,11 @@ async def find_and_delete_movie(id: str): try: object_id = ObjectId(id) except InvalidId: - raise HTTPException(status_code=400, detail="Invalid movie ID format") + return create_error_response( + message="Invalid movie ID format", + code="INTERNAL_SERVER_ERROR", + details=f"The provided ID '{id}' is not a valid ObjectId" + ) movies_collection = get_collection("movies") # Use find_one_and_delete() to find and delete in a single atomic operation @@ -298,12 +354,16 @@ async def find_and_delete_movie(id: str): try: deleted_movie = await movies_collection.find_one_and_delete({"_id": object_id}) except Exception as e: - raise HTTPException(status_code=500, detail=f"Database error occurred: {str(e)}") + return create_error_response( + message="Database error occurred", + code="INTERNAL_SERVER_ERROR", + details=str(e) + ) if deleted_movie is None: return create_error_response( message="Movie not found", - code="MOVIE_NOT_FOUND", + code="INTERNAL_SERVER_ERROR", details=f"No movie found with ID: {id}" ) deleted_movie["_id"] = str(deleted_movie["_id"]) # Convert ObjectId to string @@ -326,63 +386,8 @@ async def execute_aggregation(pipeline: list) -> list: return results - -""" - GET /api/movies/aggregate/by-genre - Aggregate movies by genre with statistics using MongoDB aggregation pipeline. - Demonstrates grouping values from multiple documents and performing operations on grouped data. - Returns: - SuccessResponse[List[dict]]: A response object containing aggregated genre statistics. -""" - -@router.get("/aggregate/by-genre", response_model=SuccessResponse[List[dict]]) -async def aggregate_movies_by_genre(): - # Define an aggregation pipeline with match, unwind, group, and sort stages - pipeline = [ - # Clean data: ensure year is an integer - { - "$match": { - "year": {"$type": "number", "$gte": 1800, "$lte": 2030} - } - }, - {"$unwind": "$genres"}, - { - "$group": { - "_id": "$genres", - "count": {"$sum": 1}, - "avgRating": {"$avg": "$imdb.rating"}, - "minYear": {"$min": "$year"}, - "maxYear": {"$max": "$year"}, - "totalVotes": {"$sum": "$imdb.votes"} - } - }, - {"$sort": {"count": -1}}, - { - "$project": { - "genre": "$_id", - "movieCount": "$count", - "averageRating": {"$round": ["$avgRating", 2]}, - "yearRange": { - "min": "$minYear", - "max": "$maxYear" - }, - "totalVotes": "$totalVotes", - "_id": 0 - } - } - ] - - # Execute the aggregation - results = await execute_aggregation(pipeline) - - return create_success_response( - results, - f"Aggregated statistics for {len(results)} genres" - ) - - """ - GET /api/movies/aggregate/recent-commented + GET /api/movies/reportingByComments Aggregate movies with their most recent comments using MongoDB $lookup aggregation. Joins movies with comments collection to show recent comment activity. Query Parameters: @@ -392,7 +397,7 @@ async def aggregate_movies_by_genre(): SuccessResponse[List[dict]]: A response object containing movies with their most recent comments. """ -@router.get("/aggregate/recent-commented", response_model=SuccessResponse[List[dict]]) +@router.get("/api/movies/reportingByComments", response_model=SuccessResponse[List[dict]]) async def aggregate_movies_recent_commented( limit: int = Query(default=10, ge=1, le=50), movie_id: str = Query(default=None) @@ -412,7 +417,11 @@ async def aggregate_movies_recent_commented( object_id = ObjectId(movie_id) pipeline[0]["$match"]["_id"] = object_id except Exception: - raise HTTPException(status_code=400, detail="Invalid movie_id format") + return create_error_response( + message="Invalid movie ID format", + code="INTERNAL_SERVER_ERROR", + details="The provided movie_id is not a valid ObjectId" + ) # Add lookup and additional pipeline stages pipeline.extend([ @@ -451,7 +460,7 @@ async def aggregate_movies_recent_commented( "$sort": {"mostRecentCommentDate": -1} }, { - "$limit": 50 if movie_id else 20 + "$limit": limit }, { "$project": { @@ -478,8 +487,15 @@ async def aggregate_movies_recent_commented( ]) # Execute the aggregation - results = await execute_aggregation(pipeline) - + try: + results = await execute_aggregation(pipeline) + except Exception as e: + return create_error_response( + message="Database error occurred during aggregation", + code="INTERNAL_SERVER_ERROR", + details=str(e) + ) + # Convert ObjectId to string for response for result in results: if "_id" in result: @@ -495,14 +511,14 @@ async def aggregate_movies_recent_commented( """ - GET /api/movies/aggregate/by-year + GET /api/movies/reportingByYear Aggregate movies by year with average rating and movie count. Reports yearly statistics including average rating and total movies per year. Returns: SuccessResponse[List[dict]]: A response object containing yearly movie statistics. """ -@router.get("/aggregate/by-year", response_model=SuccessResponse[List[dict]]) +@router.get("/api/movies/reportingByYear", response_model=SuccessResponse[List[dict]]) async def aggregate_movies_by_year(): # Define aggregation pipeline to group movies by year pipeline = [ @@ -574,7 +590,14 @@ async def aggregate_movies_by_year(): ] # Execute the aggregation - results = await execute_aggregation(pipeline) + try: + results = await execute_aggregation(pipeline) + except Exception as e: + return create_error_response( + message="Database error occurred during aggregation", + code="INTERNAL_SERVER_ERROR", + details=str(e) + ) return create_success_response( results, @@ -583,7 +606,7 @@ async def aggregate_movies_by_year(): """ - GET /api/movies/aggregate/directors + GET /api/movies/reportingByDirectors Aggregate directors with the most movies and their statistics. Reports directors sorted by number of movies directed. Query Parameters: @@ -592,7 +615,7 @@ async def aggregate_movies_by_year(): SuccessResponse[List[dict]]: A response object containing director statistics. """ -@router.get("/aggregate/directors", response_model=SuccessResponse[List[dict]]) +@router.get("/api/movies/reportingByDirectors", response_model=SuccessResponse[List[dict]]) async def aggregate_directors_most_movies( limit: int = Query(default=20, ge=1, le=100) ): @@ -632,7 +655,14 @@ async def aggregate_directors_most_movies( ] # Execute the aggregation - results = await execute_aggregation(pipeline) + try: + results = await execute_aggregation(pipeline) + except Exception as e: + return create_error_response( + message="Database error occurred during aggregation", + code="INTERNAL_SERVER_ERROR", + details=str(e) + ) return create_success_response( results, From 0362be78d9a54b3785e9f12983687d29cfcfedbc Mon Sep 17 00:00:00 2001 From: Angela Date: Mon, 27 Oct 2025 16:39:04 -0400 Subject: [PATCH 6/8] remove unneeded imports --- server/python/src/routers/movies.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/python/src/routers/movies.py b/server/python/src/routers/movies.py index 6c4895f..0d2f785 100644 --- a/server/python/src/routers/movies.py +++ b/server/python/src/routers/movies.py @@ -1,5 +1,5 @@ -from fastapi import APIRouter, Query, Body, HTTPException -from src.database.mongo_client import db, get_collection +from fastapi import APIRouter, Query +from src.database.mongo_client import get_collection from src.models.models import CreateMovieRequest, Movie, SuccessResponse from typing import List from datetime import datetime From 1cc125a0be7a7e9cc15e6e5b3503757749d01252 Mon Sep 17 00:00:00 2001 From: Angela Date: Tue, 28 Oct 2025 16:01:12 -0400 Subject: [PATCH 7/8] add agg stage comments --- server/python/src/routers/movies.py | 164 ++++++++++++++++++++++------ 1 file changed, 129 insertions(+), 35 deletions(-) diff --git a/server/python/src/routers/movies.py b/server/python/src/routers/movies.py index 0d2f785..bbde50f 100644 --- a/server/python/src/routers/movies.py +++ b/server/python/src/routers/movies.py @@ -165,7 +165,7 @@ async def get_all_movies( @router.post("/", response_model=SuccessResponse[Movie], status_code=201) async def create_movie(movie: CreateMovieRequest): - # Pydantic will automatically validate the structure + # Pydantic automatically validates the structure movie_data = movie.model_dump(by_alias=True, exclude_none=True) movies_collection = get_collection("movies") @@ -423,8 +423,29 @@ async def aggregate_movies_recent_commented( details="The provided movie_id is not a valid ObjectId" ) - # Add lookup and additional pipeline stages + # Add a multi-stage aggregation that: + # 1. Filters movies by valid year range + # 2. Joins with comments collection (like SQL JOIN) + # 3. Filters to only movies that have comments + # 4. Sorts comments by date and extracts most recent ones + # 5. Sorts movies by their most recent comment date + # 6. Shapes the final output with transformed comment structure + + pipeline = [ + # STAGE 1: $match - Initial Filter + # Filter movies to only those with valid year data + # Tip: Use $match early to reduce the initial dataset for better performance + { + "$match": { + "year": {"$type": "number", "$gte": 1800, "$lte": 2030} + } + } + ] + + # Add remaining pipeline stages pipeline.extend([ + # STAGE 2: $lookup - Join with the 'comments' Collection + # This gives each movie document a 'comments' array containing all its comments { "$lookup": { "from": "comments", @@ -433,59 +454,74 @@ async def aggregate_movies_recent_commented( "as": "comments" } }, + # STAGE 3: $match - Filter Movies with at Least One Comment + # This helps reduces dataset to only movies with user engagement { "$match": { "comments": {"$ne": []} } }, + # STAGE 4: $addFields - Add New Computed Fields { "$addFields": { + # Add computed field 'recentComments' that extracts only the N most recent comments (up to 'limit') "recentComments": { "$slice": [ { "$sortArray": { "input": "$comments", - "sortBy": {"date": -1} + "sortBy": {"date": -1} # -1 = descending (newest first) } }, - limit + limit # Number of comments to keep ] }, + # Add computed field 'mostRecentCommentDate' that gets the date of the most recent comment (to use in the next $sort stage) "mostRecentCommentDate": { "$max": "$comments.date" } } }, + # STAGE 5: $sort - Sort Movies by Most Recent Comment Date { "$sort": {"mostRecentCommentDate": -1} }, + # STAGE 6: $limit - Restrict Result Set Size + # - If querying single movie: return up to 50 results + # - If querying all movies: return up to 20 results + # Tip: This prevents overwhelming the client with too much data { - "$limit": limit + "$limit": 50 if movie_id else 20 }, + # STAGE 7: $project - Shape Final Response Output { "$project": { + # Include basic movie fields "title": 1, "year": 1, "genres": 1, + "_id": 1, + # Extract nested field: imdb.rating -> imdbRating "imdbRating": "$imdb.rating", + # Use $map to reshape computed 'recentComments' field with cleaner field names "recentComments": { "$map": { "input": "$recentComments", "as": "comment", "in": { - "userName": "$$comment.name", - "userEmail": "$$comment.email", - "text": "$$comment.text", - "date": "$$comment.date" + "userName": "$$comment.name", # Rename: name -> userName + "userEmail": "$$comment.email", # Rename: email -> userEmail + "text": "$$comment.text", # Keep: text + "date": "$$comment.date" # Keep: date } } }, - "totalComments": {"$size": "$comments"}, - "_id": 1 + # Calculate the total number of comments into 'totalComments' (not just 'recentComments') + # Used in display (e.g., "Showing 5 of 127 comments") + "totalComments": {"$size": "$comments"} } } ]) - # Execute the aggregation try: results = await execute_aggregation(pipeline) @@ -520,32 +556,48 @@ async def aggregate_movies_recent_commented( @router.get("/api/movies/reportingByYear", response_model=SuccessResponse[List[dict]]) async def aggregate_movies_by_year(): - # Define aggregation pipeline to group movies by year + # Define aggregation pipeline to group movies by year with statistics + # This pipeline demonstrates grouping, statistical calculations, and data cleaning + + # Add a multi-stage aggregation that: + # 1. Filters movies by valid year range (data quality filter) + # 2. Groups movies by release year and calculates statistics per year + # 3. Shapes the final output with clean field names and rounded averages + # 4. Sorts results by year (newest first) for chronological presentation + pipeline = [ + # STAGE 1: $match - Data Quality Filter # Clean data: ensure year is an integer and within reasonable range + # Tip: Filter early to reduce dataset size and improve performance { "$match": { "year": {"$type": "number", "$gte": 1800, "$lte": 2030} } }, - # Group by year and calculate statistics + + # STAGE 2: $group - Aggregate Movies by Year + # Group all movies by their release year and calculate various statistics { "$group": { - "_id": "$year", - "movieCount": {"$sum": 1}, + "_id": "$year", # Group by year field + "movieCount": {"$sum": 1}, # Count total movies per year + + # Calculate average rating (only for valid numeric ratings) "averageRating": { "$avg": { "$cond": [ {"$and": [ - {"$ne": ["$imdb.rating", None]}, - {"$ne": ["$imdb.rating", ""]}, - {"$eq": [{"$type": "$imdb.rating"}, "double"]} + {"$ne": ["$imdb.rating", None]}, # Not null + {"$ne": ["$imdb.rating", ""]}, # Not empty string + {"$eq": [{"$type": "$imdb.rating"}, "double"]} # Is numeric ]}, - "$imdb.rating", - "$$REMOVE" + "$imdb.rating", # Include valid IMDB ratings + "$$REMOVE" # Exclude invalid IMDB ratings ] } }, + + # Find highest rating for the year (same validation as average rating) "highestRating": { "$max": { "$cond": [ @@ -559,6 +611,8 @@ async def aggregate_movies_by_year(): ] } }, + + # Find lowest rating for the year (same validation as average and highest rating) "lowestRating": { "$min": { "$cond": [ @@ -572,21 +626,29 @@ async def aggregate_movies_by_year(): ] } }, + + # Sum total votes across all movies in the year "totalVotes": {"$sum": "$imdb.votes"} } }, + + # STAGE 3: $project - Shape Final Output + # Transform the grouped data into a clean, readable format { "$project": { - "year": "$_id", + "year": "$_id", # Rename _id back to year because grouping was done by year but values were stored in _id "movieCount": 1, - "averageRating": {"$round": ["$averageRating", 2]}, + "averageRating": {"$round": ["$averageRating", 2]}, # Round to 2 decimal places "highestRating": 1, "lowestRating": 1, "totalVotes": 1, - "_id": 0 + "_id": 0 # Exclude the _id field from output } }, - {"$sort": {"year": -1}} + + # STAGE 4: $sort - Sort by Year (Newest First) + # Sort results in descending order to show most recent years first + {"$sort": {"year": -1}} # -1 = descending order ] # Execute the aggregation @@ -619,37 +681,69 @@ async def aggregate_movies_by_year(): async def aggregate_directors_most_movies( limit: int = Query(default=20, ge=1, le=100) ): - # Define aggregation pipeline to find directors with most movies + # Define aggregation pipeline to find directors with the most movies + # This pipeline demonstrates array unwinding, filtering, and ranking + + # Add a multi-stage aggregation that: + # 1. Filters movies with valid directors and year data (data quality filter) + # 2. Unwinds directors array to create separate documents per director + # 3. Cleans director names by filtering out null/empty names + # 4. Groups movies by individual director and calculates statistics per director + # 5. Sorts directors based on movie count + # 6. Limits results to top N directors + # 7. Shapes the final output with clean field names and rounded averages + pipeline = [ + # STAGE 1: $match - Initial Data Quality Filter + # Filter movies that have director information and valid years { "$match": { - "directors": {"$exists": True, "$ne": None, "$ne": []}, - "year": {"$type": "number", "$gte": 1800, "$lte": 2030} + "directors": {"$exists": True, "$ne": None, "$ne": []}, # Has directors array + "year": {"$type": "number", "$gte": 1800, "$lte": 2030} # Valid year range } }, + + # STAGE 2: $unwind - Flatten Directors Array + # Convert each movie's directors array into separate documents + # Example: Movie with ["Director A", "Director B"] becomes 2 documents { "$unwind": "$directors" }, + + # STAGE 3: $match - Clean Director Names + # Filter out any null or empty director names after unwinding { "$match": { "directors": {"$ne": None, "$ne": ""} } }, + + # STAGE 4: $group - Aggregate by Director + # Group all movies by director name and calculate statistics { "$group": { - "_id": "$directors", - "movieCount": {"$sum": 1}, - "averageRating": {"$avg": "$imdb.rating"} + "_id": "$directors", # Group by individual director name + "movieCount": {"$sum": 1}, # Count movies per director + "averageRating": {"$avg": "$imdb.rating"} # Average rating of director's movies } }, - {"$sort": {"movieCount": -1}}, + + # STAGE 5: $sort - Rank Directors by Movie Count + # Sort directors by number of movies (highest first) + {"$sort": {"movieCount": -1}}, # -1 = descending (most movies first) + + # STAGE 6: $limit - Restrict Results + # Limit to top N directors based on user input {"$limit": limit}, + + # STAGE 7: $project - Shape Final Output + # Transform the grouped data into a clean, readable format { "$project": { - "director": "$_id", + "director": "$_id", # Rename _id to director "movieCount": 1, - "averageRating": {"$round": ["$averageRating", 2]}, - "_id": 0 + "averageRating": {"$round": ["$averageRating", 2]}, # Round to 2 decimal places + "_id": 0 # Exclude the _id field from output } } ] From 49b967a841648beb1d21c7737eada963c4725732 Mon Sep 17 00:00:00 2001 From: Angela Date: Tue, 28 Oct 2025 16:23:00 -0400 Subject: [PATCH 8/8] fix broken code --- server/python/src/routers/movies.py | 32 +++++++++++------------------ 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/server/python/src/routers/movies.py b/server/python/src/routers/movies.py index bbde50f..6e8d959 100644 --- a/server/python/src/routers/movies.py +++ b/server/python/src/routers/movies.py @@ -402,26 +402,6 @@ async def aggregate_movies_recent_commented( limit: int = Query(default=10, ge=1, le=50), movie_id: str = Query(default=None) ): - # Define aggregation pipeline to join movies with their most recent comments - pipeline = [ - { - "$match": { - "year": {"$type": "number", "$gte": 1800, "$lte": 2030} - } - } - ] - - # Add movie_id filter if provided - if movie_id: - try: - object_id = ObjectId(movie_id) - pipeline[0]["$match"]["_id"] = object_id - except Exception: - return create_error_response( - message="Invalid movie ID format", - code="INTERNAL_SERVER_ERROR", - details="The provided movie_id is not a valid ObjectId" - ) # Add a multi-stage aggregation that: # 1. Filters movies by valid year range @@ -441,6 +421,18 @@ async def aggregate_movies_recent_commented( } } ] + + # Add movie_id filter if provided + if movie_id: + try: + object_id = ObjectId(movie_id) + pipeline[0]["$match"]["_id"] = object_id + except Exception: + return create_error_response( + message="Invalid movie ID format", + code="INTERNAL_SERVER_ERROR", + details="The provided movie_id is not a valid ObjectId" + ) # Add remaining pipeline stages pipeline.extend([