In [17]:
from pymongo import MongoClient
# Connect to a local MongoDB instance
client = MongoClient('mongodb://localhost:27017/')
db = client['mflix']
movies_collection = db.get_collection('movies')

1.(i) Top N Movies with the Highest IMDB Rating

In [39]:
def top_n_movies_highest_imdb_rating(n):
    pipeline = [
        {"$match": {"imdb.rating": {"$ne": ''}}},
        {"$sort": {"imdb.rating": -1}},
        {"$limit": n}
    ]
    top_movies = list(movies_collection.aggregate(pipeline))
    for movie in top_movies:
        # Assuming the field for movie title is 'title'
        print(f"{movie['title']} with IMDB Rating {movie['imdb']['rating']}")

top_n_movies_highest_imdb_rating(10)


Band of Brothers with IMDB Rating 9.6
Planet Earth with IMDB Rating 9.5
The Civil War with IMDB Rating 9.4
A Brave Heart: The Lizzie Velasquez Story with IMDB Rating 9.4
The Civil War with IMDB Rating 9.4
The Real Miyagi with IMDB Rating 9.3
The Shawshank Redemption with IMDB Rating 9.3
The Shawshank Redemption with IMDB Rating 9.3
Cosmos with IMDB Rating 9.3
The Decalogue with IMDB Rating 9.2


1.(ii) Top N Movies with the Highest IMDB Rating in a Given Year

In [58]:
def top_n_movies_highest_imdb_rating_in_year(n, year):
    pipeline = [
        {"$match": {"year": year, "imdb.rating": {"$ne": ''}}},
        {"$sort": {"imdb.rating": -1}},
        {"$limit": n}
    ]
    top_movies = list(movies_collection.aggregate(pipeline))
    for movie in top_movies:
        # Assuming the field for movie title is 'title'
        print(f"{movie['title']} with IMDB Rating {movie['imdb']['rating']} of year {movie['year']}")
    
top_n_movies_highest_imdb_rating_in_year(10,2000)

Hera Pheri with IMDB Rating 8.5 of year 2000
A Dog's Will with IMDB Rating 8.5 of year 2000
Memento with IMDB Rating 8.5 of year 2000
Gladiator with IMDB Rating 8.5 of year 2000
Requiem for a Dream with IMDB Rating 8.4 of year 2000
FLCL with IMDB Rating 8.4 of year 2000
The 10th Kingdom with IMDB Rating 8.4 of year 2000
Snatch. with IMDB Rating 8.3 of year 2000
Mourning Rock with IMDB Rating 8.3 of year 2000
Werckmeister Harmonies with IMDB Rating 8.2 of year 2000


1.(iii) Top N Movies with the Highest IMDB Rating and Number of Votes > 1000

In [35]:
def top_n_movies_highest_imdb_rating_votes_gt_1000(n):
    pipeline = [
        {"$match": {"imdb.rating": {"$ne": ''},"imdb.votes": {"$gt": 1000}}},
        {"$sort": {"imdb.rating": -1}},
        {"$limit": n}
    ]
    top_movies = list(movies_collection.aggregate(pipeline))
    for movie in top_movies:
        # Assuming the field for movie title is 'title'
        print(f"{movie['title']} with IMDB Rating {movie['imdb']['rating']} having votes {movie['imdb']['votes']}")
    

top_n_movies_highest_imdb_rating_votes_gt_1000(10)

Band of Brothers with IMDB Rating 9.6 having votes 183802
Planet Earth with IMDB Rating 9.5 having votes 82896
The Civil War with IMDB Rating 9.4 having votes 4624
The Civil War with IMDB Rating 9.4 having votes 4625
The Shawshank Redemption with IMDB Rating 9.3 having votes 1513145
Cosmos with IMDB Rating 9.3 having votes 17174
The Shawshank Redemption with IMDB Rating 9.3 having votes 1521105
The Godfather with IMDB Rating 9.2 having votes 1038358
The Blue Planet with IMDB Rating 9.2 having votes 7093
The Decalogue with IMDB Rating 9.2 having votes 10958


1.(iv) Top N Movies with Title Matching a Given Pattern Sorted by Highest Tomatoes Ratings

In [72]:
def top_n_movies_matching_pattern_sorted_by_tomatoes_rating(n, pattern):
    pipeline = [
        {"$match": {"title": {"$regex": pattern, "$options": "i"}}},
        {"$sort": {"tomatoes.viewer.rating": -1}},
        {"$limit": n}
    ]
    top_movies = list(movies_collection.aggregate(pipeline))
    for movie in top_movies:
        # Assuming the field for movie title is 'title'
        print(f"{movie['title']} with Tomatoes Rating {movie['tomatoes']['viewer']['rating']} following pattern \"{pattern}\"")
    

top_n_movies_matching_pattern_sorted_by_tomatoes_rating(10, "rit")

Operation Homecoming: Writing the Wartime Experience with Tomatoes Rating 4.5 following pattern "rit"
Spirited Away with Tomatoes Rating 4.2 following pattern "rit"
The Spirit of the Beehive with Tomatoes Rating 4.2 following pattern "rit"
Fierce Light: When Spirit Meets Action with Tomatoes Rating 4.2 following pattern "rit"
Spirit of the Marathon with Tomatoes Rating 4.1 following pattern "rit"
The Inheritance or Fuckoffguysgoodday with Tomatoes Rating 4.1 following pattern "rit"
Inherit the Wind with Tomatoes Rating 4.1 following pattern "rit"
The Battle of Britain with Tomatoes Rating 4 following pattern "rit"
Juliet of the Spirits with Tomatoes Rating 4 following pattern "rit"
Ritual with Tomatoes Rating 4 following pattern "rit"


----------------------------------------------------------------------------------------------------------------------------------

2.(i) Top N directors who created the maximum number of movies:

In [132]:
def top_n_directors_most_movies(n):
    pipeline = [
        {"$match": {"directors": {"$ne": None}}},
        {"$unwind": "$directors"},
        {"$group": {"_id": "$directors", "count": {"$sum": 1}}},
        {"$sort": {"count": -1}},
        {"$limit": n}
    ]
    top_directors = list(movies_collection.aggregate(pipeline))
    for top_director in top_directors:
        print(f"{top_director['_id']} with total movies {top_director['count']}")
top_n_directors_most_movies(10)

Woody Allen with total movies 40
John Ford with total movies 35
Takashi Miike with total movies 34
John Huston with total movies 34
Werner Herzog with total movies 33
Martin Scorsese with total movies 32
Alfred Hitchcock with total movies 31
Sidney Lumet with total movies 30
Steven Spielberg with total movies 29
Michael Apted with total movies 29


2.(ii) Top N directors who created the maximum number of movies in a given year

In [151]:
def top_n_directors_most_movies_in_year(year, n):
    # Pipeline to find top N directors with most movies in the given year
    pipeline = [
        {"$match": {"year": year, "directors": {"$exists": True, "$ne": None}}},
        {"$unwind": "$directors"},
        {"$group": {"_id": "$directors", "count": {"$sum": 1}}},
        {"$sort": {"count": -1}},
        {"$limit": n}
    ]

    # Execute the aggregation pipeline
    top_directors = list(movies_collection.aggregate(pipeline))
    for top_director in top_directors:
        print(top_director['_id'],"with total movies",top_director['count'])
    

year = 2002
n = 5
top_n_directors_most_movies_in_year(year, n)


Lone Scherfig with total movies 3
Lucas Belvaux with total movies 3
Ken Loach with total movies 3
Mira Nair with total movies 3
SABU with total movies 3


2.(iii) Top N directors who created the maximum number of movies for a given genre

In [155]:
def top_n_directors_most_movies_in_genre(genre, n):
    # Pipeline to find top N directors with most movies in the given genre
    pipeline = [
        {"$match": {"genres": genre, "directors": {"$exists": True, "$ne": None}}},
        {"$unwind": "$directors"},
        {"$group": {"_id": "$directors", "count": {"$sum": 1}}},
        {"$sort": {"count": -1}},
        {"$limit": n}
    ]

    # Execute the aggregation pipeline
    top_directors = list(movies_collection.aggregate(pipeline))
    for top_director in top_directors:
        print(top_director['_id'],"with total movies",top_director['count'],"in the genre of",genre)
    
genre = "Action"
n = 5
top_n_directors_most_movies_in_genre(genre, n)


Hark Tsui with total movies 17 in the genre of Action
John Woo with total movies 15 in the genre of Action
Robert Rodriguez with total movies 14 in the genre of Action
Tony Scott with total movies 14 in the genre of Action
Takashi Miike with total movies 12 in the genre of Action


---------------------------------------------------------------------------------------------------------------------------------------

3.(i) Top N actors who starred in the maximum number of movies overall

In [166]:
def top_n_actors_most_movies(n):
    # Pipeline to find top N actors with most movies overall
    pipeline = [
        {"$match": {"cast": {"$exists": True, "$ne": None}}},
        {"$unwind": "$cast"},
        {"$group": {"_id": "$cast", "count": {"$sum": 1}}},
        {"$sort": {"count": -1}},
        {"$limit": n}
    ]

    # Execute the aggregation pipeline
    top_actors = list(movies_collection.aggregate(pipeline))
    for actor in top_actors:
        print(f"{actor['_id']} with total movies {actor['count']}")
    

top_n_actors_most_movies(10)

Gèrard Depardieu with total movies 68
Robert De Niro with total movies 60
Michael Caine with total movies 53
Marcello Mastroianni with total movies 50
Bruce Willis with total movies 49
Max von Sydow with total movies 49
Samuel L. Jackson with total movies 48
Morgan Freeman with total movies 48
Christopher Plummer with total movies 47
Gene Hackman with total movies 46


3.(ii) Top N actors who starred in the maximum number of movies in a given year

In [165]:
def top_n_actors_most_movies_in_year(year, n):
    # Pipeline to find top N actors with most movies in the given year
    pipeline = [
        {"$match": {"year": year, "cast": {"$exists": True, "$ne": None}}},
        {"$unwind": "$cast"},
        {"$group": {"_id": "$cast", "count": {"$sum": 1}}},
        {"$sort": {"count": -1}},
        {"$limit": n}
    ]

    # Execute the aggregation pipeline
    top_actors = list(movies_collection.aggregate(pipeline))
    for actor in top_actors:
        print(f"{actor['_id']} with total movies {actor['count']} in year {year}")
        
top_n_actors_most_movies_in_year(1998,10)

Gena Rowlands with total movies 6 in year 1998
K. Krishna with total movies 5 in year 1998
Neve Campbell with total movies 5 in year 1998
Gwyneth Paltrow with total movies 5 in year 1998
Sonu Sisupal with total movies 5 in year 1998
Vishwas with total movies 5 in year 1998
Brittany Murphy with total movies 5 in year 1998
Ayesha Dharker with total movies 5 in year 1998
Oliver Platt with total movies 4 in year 1998
William H. Macy with total movies 4 in year 1998


3.(iii) Top N actors who starred in the maximum number of movies for a given genre

In [164]:
def top_n_actors_most_movies_in_genre(genre, n):
    # Pipeline to find top N actors with most movies in the given genre
    pipeline = [
        {"$match": {"genres": genre, "cast": {"$exists": True, "$ne": None}}},
        {"$unwind": "$cast"},
        {"$group": {"_id": "$cast", "count": {"$sum": 1}}},
        {"$sort": {"count": -1}},
        {"$limit": n}
    ]

    # Execute the aggregation pipeline
    top_actors = list(movies_collection.aggregate(pipeline))
    for actor in top_actors:
        print(f"{actor['_id']} with total movies {actor['count']} with genre {genre}")

top_n_actors_most_movies_in_genre("Comedy", 10)

Jackie Chan with total movies 34 with genre Comedy
Eddie Murphy with total movies 34 with genre Comedy
Gèrard Depardieu with total movies 30 with genre Comedy
Adam Sandler with total movies 28 with genre Comedy
Jack Lemmon with total movies 27 with genre Comedy
Robin Williams with total movies 27 with genre Comedy
Akshay Kumar with total movies 25 with genre Comedy
Ben Stiller with total movies 24 with genre Comedy
Danny DeVito with total movies 24 with genre Comedy
Will Ferrell with total movies 24 with genre Comedy


----------------------------------------------------------------------------------------------------------------------------------------

4. Find top `N` movies for each genre with the highest IMDB rating

In [176]:
def top_n_movies_per_genre(n):
    pipeline = [
        {"$unwind": "$genres"},
        {"$match": {"imdb.rating": {"$exists": True, "$ne": ""}}},
        {"$sort": {"genres": 1, "imdb.rating": -1}},
        {"$group": {"_id": "$genres", "top_movies": {"$push": {"title": "$title", "imdb_rating": "$imdb.rating"}}}},
        {"$project": {"_id": 0, "genre": "$_id", "top_movies": {"$slice": ["$top_movies", n]}}}
    ]

    # Execute the aggregation pipeline
    top_movies_per_genre = list(movies_collection.aggregate(pipeline))

    # Print the results
    for genre_movies in top_movies_per_genre:
        print(f"Top {n} movies for {genre_movies['genre']}:")
        for movie in genre_movies['top_movies']:
            print(f"{movie['title']} - IMDB Rating: {movie['imdb_rating']}")
        print("\n")


top_n_movies_per_genre(1)  # Change the value of 'n' as needed


Top 1 movies for Musical:
Dr. Horrible's Sing-Along Blog - IMDB Rating: 8.7


Top 1 movies for News:
Most Likely to Succeed - IMDB Rating: 8.9


Top 1 movies for Romance:
Pride and Prejudice - IMDB Rating: 9.1


Top 1 movies for Action:
Band of Brothers - IMDB Rating: 9.6


Top 1 movies for Documentary:
Planet Earth - IMDB Rating: 9.5


Top 1 movies for Talk-Show:
The Late Shift - IMDB Rating: 7


Top 1 movies for Family:
A Brave Heart: The Lizzie Velasquez Story - IMDB Rating: 9.4


Top 1 movies for Music:
Prerokbe Ognja - IMDB Rating: 9


Top 1 movies for Sport:
Baseball - IMDB Rating: 9.1


Top 1 movies for Animation:
Over the Garden Wall - IMDB Rating: 9.2


Top 1 movies for War:
The Civil War - IMDB Rating: 9.4


Top 1 movies for Horror:
Never Sleep Again: The Elm Street Legacy - IMDB Rating: 8.7


Top 1 movies for Film-Noir:
Sunset Blvd. - IMDB Rating: 8.5


Top 1 movies for Drama:
Band of Brothers - IMDB Rating: 9.6


Top 1 movies for History:
Band of Brothers - IMDB Rating: 9.6