In [1]:
import numpy as np
import pandas as pd

In [4]:
movie_data = pd.read_csv('IMDB-Movie-Data.csv')
movie_data

Unnamed: 0,Rank,Title,Genre,Director,Actors,Year,Runtime (Minutes),Rating,Votes,Revenue (Millions),Metascore
0,55,The Dark Knight,Action|Crime|Drama,Christopher Nolan,Christian Bale| Heath Ledger| Aaron Eckhart|Mi...,2008,152,9.0,1791916,533.32,82.0
1,81,Inception,Action|Adventure|Sci-Fi,Christopher Nolan,Leonardo DiCaprio| Joseph Gordon-Levitt| Ellen...,2010,148,8.8,1583625,292.57,74.0
2,118,Dangal,Action|Biography|Drama,Nitesh Tiwari,Aamir Khan| Sakshi Tanwar| Fatima Sana Shaikh|...,2016,161,8.8,48969,11.15,
3,37,Interstellar,Adventure|Drama|Sci-Fi,Christopher Nolan,Matthew McConaughey| Anne Hathaway| Jessica Ch...,2014,169,8.6,1047747,187.99,74.0
4,97,Kimi no na wa,Animation|Drama|Fantasy,Makoto Shinkai,Ryunosuke Kamiki| Mone Kamishiraishi| Ryo Nari...,2016,106,8.6,34110,4.68,79.0
...,...,...,...,...,...,...,...,...,...,...,...
995,969,Wrecker,Action|Horror|Thriller,Micheal Bafaro,Anna Hutchison| Andrea Whitburn| Jennifer Koen...,2015,83,3.5,1210,,37.0
996,648,Tall Men,Fantasy|Horror|Thriller,Jonathan Holbrook,Dan Crisafulli| Kay Whitney| Richard Garcia| P...,2016,133,3.2,173,,57.0
997,43,Don't Fuck in the Woods,Horror,Shawn Burkett,Brittany Blanton| Ayse Howard| Roman Jossart|N...,2016,73,2.7,496,,
998,872,Dragonball Evolution,Action|Adventure|Fantasy,James Wong,Justin Chatwin| James Marsters| Yun-Fat Chow| ...,2009,85,2.7,59512,9.35,45.0


 (1) Top‐3 movies with the highest ratings in 2016

In [38]:
movies_2016 = movie_data[movie_data['Year'] == 2016]
top_3_movies = movies_2016.sort_values(by='Rating', ascending=False).head(3)
result = top_3_movies['Title']

print('Top‐3 movies with the highest ratings in 2016:')
for i in result:
    print(i)

Top‐3 movies with the highest ratings in 2016:
Dangal
Kimi no na wa
Koe no katachi


(2)  The actor generating the highest average revenue

`平均收入＝演員出演的電影之revenue總和/演員出演的電影總數`


In [52]:
actors_revenue_data = movie_data[['Actors', 'Revenue (Millions)']].dropna()  # Drop rows with missing revenue

actors_revenue_data ['Actors'] = actors_revenue_data['Actors'].str.split('|')

# Expand the DataFrame so each actor appears in a separate row
actors_revenue_data = actors_revenue_data.explode('Actors')

actor_revenue = actors_revenue_data.groupby('Actors').agg(
    total_revenue=('Revenue (Millions)', 'sum'),
    movie_count=('Revenue (Millions)', 'count'))

actor_revenue['average_revenue'] = actor_revenue['total_revenue'] / actor_revenue['movie_count']

top_actor = actor_revenue.sort_values(by='average_revenue', ascending=False).head(1)
top_actor = top_actor.reset_index()

print('The actor generating the highest average revenue:')
print(top_actor['Actors'].values[0].strip())

The actor generating the highest average revenue:
John Boyega


(3)  The average rating of Emma Watson’s movies

In [62]:
actors_rating_data = movie_data[['Actors', 'Rating']].dropna() 
actors_rating_data ['Actors'] = actors_rating_data['Actors'].str.split('|')
actors_rating_data = actors_rating_data.explode('Actors')

emma_data = actors_rating_data[actors_rating_data['Actors'].apply(lambda actors: 'Emma Watson' in actors)]
average_rating = emma_data['Rating'].mean()

print("The average rating of Emma Watson's movies is: {:.3f}" .format(average_rating))

The average rating of Emma Watson's movies is: 7.175


(4)  Top‐3 directors who collaborate with the most actors? 

In [78]:
directors_actors_data = movie_data[['Director', 'Actors']].dropna()
directors_actors_data['Actors'] = directors_actors_data['Actors'].str.split('|')
directors_actors_data = directors_actors_data.explode('Actors')

# Count the number of unique actors each director has collaborated with
directors_actors_count = directors_actors_data.groupby('Director')['Actors'].nunique().reset_index()

top_director = directors_actors_count.sort_values(by='Actors', ascending=False).head(3)

print('Top‐3 directors who collaborate with the most actors:')
for i in top_director['Director']:
    print(i)

Top‐3 directors who collaborate with the most actors:
Ridley Scott
M. Night Shyamalan
Paul W.S. Anderson


(5)  Top‐2 actors playing in the most genres of movies

In [94]:
actors_genre_data = movie_data[['Actors', 'Genre']].dropna()
actors_genre_data['Actors'] = actors_genre_data['Actors'].str.split('|')
actors_genre_data['Genre'] = actors_genre_data['Genre'].str.split('|')    

actors_genre_data = actors_genre_data.explode('Actors')
actors_genre_data = actors_genre_data.explode('Genre')

actors_genre_count = actors_genre_data.groupby('Actors')['Genre'].nunique().reset_index()

# actors playing in the most genres of movies 的超過2人，所以不挑頭兩個
max_genres = actors_genre_count['Genre'].max()
top_actors = actors_genre_count[actors_genre_count['Genre'] == max_genres]

print('Top‐2 actors playing in the most genres of movies')
for i in top_actors['Actors']:
    print(i.strip())


Top‐2 actors playing in the most genres of movies
Hugh Jackman
Jake Gyllenhaal
Johnny Depp


(6) Top‐3 actors whose movies lead to the largest maximum gap of years

In [109]:
actors_year_data = movie_data[['Actors', 'Year']].dropna()
actors_year_data['Actors'] = actors_year_data['Actors'].str.split('|')

actors_year_data = actors_year_data.explode('Actors')
actor_year_range = actors_year_data.groupby('Actors')['Year'].agg(['min', 'max']).reset_index()
actor_year_range['gap'] = (actor_year_range['max'] - actor_year_range['min'])

max_gap = actor_year_range['gap'].max()
top_gap = actor_year_range[actor_year_range['gap'] == max_gap]

print('Top‐3 actors whose movies lead to the largest maximum gap of years:')
for i in top_gap['Actors']:
    print(i.strip())

Top‐3 actors whose movies lead to the largest maximum gap of years:
Audrey Tautou
Ben Kingsley
Bob Balaban
Bryce Dallas Howard
Chiwetel Ejiofor
Dustin Hoffman
Ellen Burstyn
Emily Blunt
Hugh Jackman
Jack Davenport
Jennifer Connelly
Jeremy Irons
Jessica Biel
Judi Dench
Justin Theroux
Marion Cotillard
Maya Rudolph
Michelle Monaghan
Morgan Freeman
Paula Patton
Rachel Weisz
Scarlett Johansson
Toni Collette
Anne Hathaway
Brad Pitt
Denzel Washington
Gerard Butler
Jennifer Aniston
Luke Wilson
Russell Crowe
Tom Cruise
Tom Hanks
Will Smith


(7) Find all actors who collaborate with Johnny Depp in direct and indirect ways

In [126]:
actors_data = movie_data[['Actors']].dropna()
actors_data['Actors'] = actors_data['Actors'].str.split('|')

# Build the graph
collaboration_graph = {}
for actors_list in actors_data['Actors']:
    # Create edges between every pair of actors in the same movie
    for i in range(len(actors_list)):
        for j in range(i + 1, len(actors_list)):
            if actors_list[i] not in collaboration_graph:
                collaboration_graph[actors_list[i]] = []
            if actors_list[j] not in collaboration_graph:
                collaboration_graph[actors_list[j]] = []
            # Add the collaboration between actor i and actor j
            if actors_list[j] not in collaboration_graph[actors_list[i]]:
                collaboration_graph[actors_list[i]].append(actors_list[j])
            if actors_list[i] not in collaboration_graph[actors_list[j]]:
                collaboration_graph[actors_list[j]].append(actors_list[i])

# Function to perform BFS using a list as a queue
def bfs_find_collaborators(graph, start_actor):
    visited = []  # List to keep track of visited actors
    queue = [start_actor]  # Queue for BFS using a list
    visited.append(start_actor)
    
    # BFS traversal
    while queue:
        actor = queue.pop(0)  # Dequeue the first actor
        for collaborator in graph.get(actor, []):
            if collaborator not in visited:
                visited.append(collaborator)
                queue.append(collaborator)
    
    return visited

# Find all actors who collaborated with Johnny Depp
collaborators_with_johnny_depp = bfs_find_collaborators(collaboration_graph, 'Johnny Depp')

print("Actors who directly or indirectly collaborated with Johnny Depp:")
for i in collaborators_with_johnny_depp:
    print(i.strip())


Actors who directly or indirectly collaborated with Johnny Depp:
Johnny Depp
Helena Bonham Carter
Alan Rickman
Timothy Spall
Orlando Bloom
Keira Knightley
Jack Davenport
Geoffrey Rush
Benedict Cumberbatch
Dakota Johnson
Joel Edgerton
Penelope Cruz
Ian McShane
Geoffrey Rush
Armie Hammer
William Fichtner
Tom Wilkinson
Rebecca Hall
Morgan Freeman
Cillian Murphy
Michelle Pfeiffer
Eva Green
Colin Firth
Derek Jacobi
Mia Wasikowska
Johnny Depp
Anne Hathaway
Anne Hathaway
Ben Whishaw
Dustin Hoffman
Francesc Albiol
Helen Mirren
Aaron Paul
Barkhad Abdi
Benedict Cumberbatch
Matthew Goode
Allen Leech
Chris Pine
Kevin Costner
Kenneth Branagh
David Oyelowo
Rosamund Pike
Tom Felton
Jim Sturgess
Sylvia Hoeks
Donald Sutherland
Nicholas Hope
Sam Neill
Ewen Leslie
Zachary Quinto
Zoe Saldana
Tilda Swinton
Matthias Schoenaerts
Ralph Fiennes
Tom Hardy
Nick Nolte
Jennifer Morrison
Jessica Chastain
Chris Pratt
Mark Strong
Leonardo DiCaprio
Carey Mulligan
Tobey Maguire
Ruth Negga
Will Dalton
Dean Mumford
Jason