## Imports

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
pd.options.display.max_columns = 30
pd.options.display.float_format = '{:.2f}'.format

## Data Import

In [2]:
df = pd.read_csv('movies_complete.csv', parse_dates=['release_date'])

In [3]:
df

Unnamed: 0,id,title,tagline,release_date,genres,belongs_to_collection,original_language,budget_musd,revenue_musd,production_companies,production_countries,vote_count,vote_average,popularity,runtime,overview,spoken_languages,poster_path,cast,cast_size,crew_size,director
0,862,Toy Story,,1995-10-30,Animation|Comedy|Family,Toy Story Collection,en,30.00,373.55,Pixar Animation Studios,United States of America,5415.00,7.70,21.95,81.00,"Led by Woody, Andy's toys live happily in his ...",English,<img src='http://image.tmdb.org/t/p/w185//uXDf...,Tom Hanks|Tim Allen|Don Rickles|Jim Varney|Wal...,13,106,John Lasseter
1,8844,Jumanji,Roll the dice and unleash the excitement!,1995-12-15,Adventure|Fantasy|Family,,en,65.00,262.80,TriStar Pictures|Teitler Film|Interscope Commu...,United States of America,2413.00,6.90,17.02,104.00,When siblings Judy and Peter discover an encha...,English|Français,<img src='http://image.tmdb.org/t/p/w185//vgpX...,Robin Williams|Jonathan Hyde|Kirsten Dunst|Bra...,26,16,Joe Johnston
2,15602,Grumpier Old Men,Still Yelling. Still Fighting. Still Ready for...,1995-12-22,Romance|Comedy,Grumpy Old Men Collection,en,,,Warner Bros.|Lancaster Gate,United States of America,92.00,6.50,11.71,101.00,A family wedding reignites the ancient feud be...,English,<img src='http://image.tmdb.org/t/p/w185//1FSX...,Walter Matthau|Jack Lemmon|Ann-Margret|Sophia ...,7,4,Howard Deutch
3,31357,Waiting to Exhale,Friends are the people who let you be yourself...,1995-12-22,Comedy|Drama|Romance,,en,16.00,81.45,Twentieth Century Fox Film Corporation,United States of America,34.00,6.10,3.86,127.00,"Cheated on, mistreated and stepped on, the wom...",English,<img src='http://image.tmdb.org/t/p/w185//4wjG...,Whitney Houston|Angela Bassett|Loretta Devine|...,10,10,Forest Whitaker
4,11862,Father of the Bride Part II,Just When His World Is Back To Normal... He's ...,1995-02-10,Comedy,Father of the Bride Collection,en,,76.58,Sandollar Productions|Touchstone Pictures,United States of America,173.00,5.70,8.39,106.00,Just when George Banks has recovered from his ...,English,<img src='http://image.tmdb.org/t/p/w185//lf9R...,Steve Martin|Diane Keaton|Martin Short|Kimberl...,12,7,Charles Shyer
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
44686,439050,Subdue,Rising and falling between a man and woman,NaT,Drama|Family,,fa,,,,Iran,1.00,4.00,0.07,90.00,Rising and falling between a man and woman.,فارسی,<img src='http://image.tmdb.org/t/p/w185//pfC8...,Leila Hatami|Kourosh Tahami|Elham Korda,3,9,Hamid Nematollah
44687,111109,Century of Birthing,,2011-11-17,Drama,,tl,,,Sine Olivia,Philippines,3.00,9.00,0.18,360.00,An artist struggles to finish his work while a...,,<img src='http://image.tmdb.org/t/p/w185//xZkm...,Angel Aquino|Perry Dizon|Hazel Orencio|Joel To...,11,6,Lav Diaz
44688,67758,Betrayal,A deadly game of wits.,2003-08-01,Action|Drama|Thriller,,en,,,American World Pictures,United States of America,6.00,3.80,0.90,90.00,"When one of her hits goes wrong, a professiona...",English,<img src='http://image.tmdb.org/t/p/w185//eGga...,Erika Eleniak|Adam Baldwin|Julie du Page|James...,15,5,Mark L. Lester
44689,227506,Satan Triumphant,,1917-10-21,,,en,,,Yermoliev,Russia,0.00,,0.00,87.00,"In a small town live two brothers, one a minis...",,<img src='http://image.tmdb.org/t/p/w185//aorB...,Iwan Mosschuchin|Nathalie Lissenko|Pavel Pavlo...,5,2,Yakov Protazanov


# Are Franchises More Successfull?

In [4]:
df.columns

Index(['id', 'title', 'tagline', 'release_date', 'genres',
       'belongs_to_collection', 'original_language', 'budget_musd',
       'revenue_musd', 'production_companies', 'production_countries',
       'vote_count', 'vote_average', 'popularity', 'runtime', 'overview',
       'spoken_languages', 'poster_path', 'cast', 'cast_size', 'crew_size',
       'director'],
      dtype='object')

In [5]:
df['franchise'] = df.belongs_to_collection.notna()

# notna()
# Detect existing (non-missing) values.
# It Returna a boolean same-sized object indicating if the values are not NA.
# If value is NaN -- returns False
# If value is anything else than NaN -- returns True.

# Here we are creating a new column of the result.
# But if we pass this series to dataframe, we will get a dataframe of Filterd NaN values.

In [6]:
df.franchise

0         True
1        False
2         True
3        False
4         True
         ...  
44686    False
44687    False
44688    False
44689    False
44690    False
Name: franchise, Length: 44691, dtype: bool

In [7]:
df.franchise.value_counts()

# So we have 4463 movies which belongs to a collection.
# Remaining 40228 are stand-alone movies.

False    40228
True      4463
Name: franchise, dtype: int64

In [8]:
df[['title', 'belongs_to_collection', 'franchise']].head(30)

Unnamed: 0,title,belongs_to_collection,franchise
0,Toy Story,Toy Story Collection,True
1,Jumanji,,False
2,Grumpier Old Men,Grumpy Old Men Collection,True
3,Waiting to Exhale,,False
4,Father of the Bride Part II,Father of the Bride Collection,True
5,Heat,,False
6,Sabrina,,False
7,Tom and Huck,,False
8,Sudden Death,,False
9,GoldenEye,James Bond Collection,True


### Franchise vs. Stand-alone: Average Revenue

In [9]:
# Mean of grouping by franchise.
# In franchise, we have only two values True or False. It means when we do groupby('franchise').
# Everything with True will collect together for aggrigate function(ex mean).
# Everything with False will collect together for manipulation.
# In the end we will have only Two rows True and False. Because we have only Two groups True and False.

In [10]:
df.groupby('franchise').revenue_musd.mean()

franchise
False    44.74
True    165.71
Name: revenue_musd, dtype: float64

### Franchise vs. Stand-alone: Return on Investment / Profitability (median)

In [11]:
df['ROI'] = df.revenue_musd.div(df.budget_musd)

In [12]:
df.groupby('franchise').ROI.median()

franchise
False   1.62
True    3.71
Name: ROI, dtype: float64

### Franchise vs. Stand-alone: Average Budget

In [13]:
df.groupby('franchise').budget_musd.mean()

franchise
False   18.05
True    38.32
Name: budget_musd, dtype: float64

### Franchise vs. Stand-alone: Average Popularity

In [14]:
df.groupby('franchise').popularity.mean()

franchise
False   2.59
True    6.25
Name: popularity, dtype: float64

### Franchise vs. Stand-alone: Average Rating

In [15]:
df.groupby('franchise').vote_average.mean()

franchise
False   6.01
True    5.96
Name: vote_average, dtype: float64

In [16]:
# We have done exactly same work in all the findings. Applied groupby to df by franchise and then applied aggrigate function.
# we can do this all in a single line also.
# Above we are getting a series. But Here we are getting a Dataframe. Which can be further manipulated.

df.groupby('franchise').agg({'revenue_musd':'mean', 'ROI':'median', 'budget_musd':'mean', 'popularity':'mean',
                             'vote_average':'mean'})

Unnamed: 0_level_0,revenue_musd,ROI,budget_musd,popularity,vote_average
franchise,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
False,44.74,1.62,18.05,2.59,6.01
True,165.71,3.71,38.32,6.25,5.96


# Most Successful Franchises

## Find the most successful Franchises in terms of
- __total number of movies__
- __total & mean budget__
- __total & mean revenue__
- __mean rating__

In [17]:
# We will create a new df of groupby('belongs_to_collection'). Then we will apply aggrigate fuction.

franchise = df.groupby('belongs_to_collection').agg({"title":'count', "budget_musd":['sum','mean'], "revenue_musd":['sum','mean'],
                                         "vote_average":'mean', 'vote_count':['sum', 'mean']})

In [18]:
franchise
# Yes this is the real data of collection. Because now we will see combined data of all movies of the collection.
# And this data is combined by either count(), mean() or sum().

Unnamed: 0_level_0,title,budget_musd,budget_musd,revenue_musd,revenue_musd,vote_average,vote_count,vote_count
Unnamed: 0_level_1,count,sum,mean,sum,mean,mean,sum,mean
belongs_to_collection,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
... Has Fallen Collection,2,130.00,65.00,366.78,183.39,6.00,4666.00,2333.00
00 Schneider Filmreihe,1,0.00,,0.00,,6.50,16.00,16.00
08/15 Collection,1,0.00,,0.00,,5.90,4.00,4.00
100 Girls Collection,2,0.00,,0.00,,5.15,128.00,64.00
101 Dalmatians (Animated) Collection,2,4.00,4.00,215.88,215.88,6.25,1874.00,937.00
...,...,...,...,...,...,...,...,...
Сказки Чуковского,1,0.00,,0.00,,3.00,3.00,3.00
Чебурашка и крокодил Гена,1,0.00,,0.00,,6.70,7.00,7.00
Что Творят мужчины! (Коллекция),2,2.00,2.00,0.00,,3.15,11.00,5.50
男はつらいよ シリーズ,3,0.00,,0.00,,7.00,2.00,0.67


## Most Successful Franchises: total number of movies

In [19]:
# Here we cannot just write sort_values(by='title') because....
#  The column label 'title' is not unique.
# For a multi-index, the label must be a tuple with elements corresponding to each level.

franchise.sort_values(by=('title', 'count'), ascending=False).head(20)

Unnamed: 0_level_0,title,budget_musd,budget_musd,revenue_musd,revenue_musd,vote_average,vote_count,vote_count
Unnamed: 0_level_1,count,sum,mean,sum,mean,mean,sum,mean
belongs_to_collection,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
The Bowery Boys,29,0.0,,0.0,,6.67,21.0,0.72
Totò Collection,27,0.0,,0.0,,6.84,487.0,18.04
James Bond Collection,26,1539.65,59.22,7106.97,273.35,6.34,33392.0,1284.31
Zatôichi: The Blind Swordsman,26,0.0,,0.0,,6.4,291.0,11.19
The Carry On Collection,25,0.0,,0.0,,6.17,526.0,21.04
Charlie Chan (Sidney Toler) Collection,21,0.0,,0.0,,6.61,122.0,5.81
Pokémon Collection,20,250.72,50.14,601.87,66.87,6.06,1422.0,71.1
Godzilla (Showa) Collection,16,2.81,0.56,0.0,,5.98,636.0,39.75
Dragon Ball Z (Movie) Collection,15,5.0,5.0,112.12,56.06,6.61,2000.0,133.33
Charlie Chan (Warner Oland) Collection,15,0.0,,0.0,,6.66,103.0,6.87


### recommanded way

In [20]:
# Method showed in tutorial.
# nlargest() works same as sort_values() + head()

franchise.nlargest(20, ('title', 'count'))

Unnamed: 0_level_0,title,budget_musd,budget_musd,revenue_musd,revenue_musd,vote_average,vote_count,vote_count
Unnamed: 0_level_1,count,sum,mean,sum,mean,mean,sum,mean
belongs_to_collection,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
The Bowery Boys,29,0.0,,0.0,,6.67,21.0,0.72
Totò Collection,27,0.0,,0.0,,6.84,487.0,18.04
James Bond Collection,26,1539.65,59.22,7106.97,273.35,6.34,33392.0,1284.31
Zatôichi: The Blind Swordsman,26,0.0,,0.0,,6.4,291.0,11.19
The Carry On Collection,25,0.0,,0.0,,6.17,526.0,21.04
Charlie Chan (Sidney Toler) Collection,21,0.0,,0.0,,6.61,122.0,5.81
Pokémon Collection,20,250.72,50.14,601.87,66.87,6.06,1422.0,71.1
Godzilla (Showa) Collection,16,2.81,0.56,0.0,,5.98,636.0,39.75
Charlie Chan (Warner Oland) Collection,15,0.0,,0.0,,6.66,103.0,6.87
Dragon Ball Z (Movie) Collection,15,5.0,5.0,112.12,56.06,6.61,2000.0,133.33


## Most Successful Franchises: total & mean budget

### Total

In [21]:
franchise.nlargest(10, ('budget_musd', 'sum'))

Unnamed: 0_level_0,title,budget_musd,budget_musd,revenue_musd,revenue_musd,vote_average,vote_count,vote_count
Unnamed: 0_level_1,count,sum,mean,sum,mean,mean,sum,mean
belongs_to_collection,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
James Bond Collection,26,1539.65,59.22,7106.97,273.35,6.34,33392.0,1284.31
Harry Potter Collection,8,1280.0,160.0,7707.37,963.42,7.54,47866.0,5983.25
Pirates of the Caribbean Collection,5,1250.0,250.0,4521.58,904.32,6.88,25080.0,5016.0
The Fast and the Furious Collection,8,1009.0,126.12,5125.1,640.64,6.66,25576.0,3197.0
X-Men Collection,6,983.0,163.83,2808.83,468.14,6.82,27563.0,4593.83
Transformers Collection,5,965.0,193.0,4366.1,873.22,6.14,15232.0,3046.4
Star Wars Collection,8,854.35,106.79,7434.49,929.31,7.37,43443.0,5430.38
The Hobbit Collection,3,750.0,250.0,2935.52,978.51,7.23,17944.0,5981.33
The Terminator Collection,5,661.4,132.28,1845.33,369.07,6.54,16832.0,3366.4
Mission: Impossible Collection,5,650.0,130.0,2778.98,555.8,6.6,14005.0,2801.0


### Mean

In [22]:
franchise.nlargest(10, ('budget_musd', 'mean'))

Unnamed: 0_level_0,title,budget_musd,budget_musd,revenue_musd,revenue_musd,vote_average,vote_count,vote_count
Unnamed: 0_level_1,count,sum,mean,sum,mean,mean,sum,mean
belongs_to_collection,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Tangled Collection,2,260.0,260.0,591.79,591.79,7.25,3802.0,1901.0
Pirates of the Caribbean Collection,5,1250.0,250.0,4521.58,904.32,6.88,25080.0,5016.0
The Avengers Collection,2,500.0,250.0,2924.96,1462.48,7.35,18908.0,9454.0
The Hobbit Collection,3,750.0,250.0,2935.52,978.51,7.23,17944.0,5981.33
Man of Steel Collection,2,475.0,237.5,1536.11,768.05,6.1,13651.0,6825.5
Avatar Collection,1,237.0,237.0,2787.97,2787.97,7.2,12114.0,12114.0
The Amazing Spider-Man Collection,2,415.0,207.5,1457.93,728.97,6.5,11008.0,5504.0
World War Z Collection,1,200.0,200.0,531.87,531.87,6.7,5683.0,5683.0
Spider-Man Collection,3,597.0,199.0,2496.35,832.12,6.47,13517.0,4505.67
The Dark Knight Collection,3,585.0,195.0,2463.72,821.24,7.8,29043.0,9681.0


## Most Successful Franchises:  total & mean revenue

### Total

In [23]:
franchise.nlargest(10, ('revenue_musd', 'sum'))

Unnamed: 0_level_0,title,budget_musd,budget_musd,revenue_musd,revenue_musd,vote_average,vote_count,vote_count
Unnamed: 0_level_1,count,sum,mean,sum,mean,mean,sum,mean
belongs_to_collection,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Harry Potter Collection,8,1280.0,160.0,7707.37,963.42,7.54,47866.0,5983.25
Star Wars Collection,8,854.35,106.79,7434.49,929.31,7.37,43443.0,5430.38
James Bond Collection,26,1539.65,59.22,7106.97,273.35,6.34,33392.0,1284.31
The Fast and the Furious Collection,8,1009.0,126.12,5125.1,640.64,6.66,25576.0,3197.0
Pirates of the Caribbean Collection,5,1250.0,250.0,4521.58,904.32,6.88,25080.0,5016.0
Transformers Collection,5,965.0,193.0,4366.1,873.22,6.14,15232.0,3046.4
Despicable Me Collection,6,299.0,74.75,3691.07,922.77,6.78,18248.0,3041.33
The Twilight Collection,5,385.0,77.0,3342.11,668.42,5.84,13851.0,2770.2
Ice Age Collection,5,429.0,85.8,3216.71,643.34,6.38,13219.0,2643.8
Jurassic Park Collection,4,379.0,94.75,3031.48,757.87,6.5,18435.0,4608.75


### Mean

In [24]:
franchise.nlargest(10, ('revenue_musd', 'mean'))

Unnamed: 0_level_0,title,budget_musd,budget_musd,revenue_musd,revenue_musd,vote_average,vote_count,vote_count
Unnamed: 0_level_1,count,sum,mean,sum,mean,mean,sum,mean
belongs_to_collection,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Avatar Collection,1,237.0,237.0,2787.97,2787.97,7.2,12114.0,12114.0
The Avengers Collection,2,500.0,250.0,2924.96,1462.48,7.35,18908.0,9454.0
Frozen Collection,2,150.0,150.0,1274.22,1274.22,7.1,6070.0,3035.0
Finding Nemo Collection,2,294.0,147.0,1968.91,984.45,7.2,10625.0,5312.5
The Hobbit Collection,3,750.0,250.0,2935.52,978.51,7.23,17944.0,5981.33
The Lord of the Rings Collection,3,266.0,88.67,2916.54,972.18,8.03,24759.0,8253.0
Harry Potter Collection,8,1280.0,160.0,7707.37,963.42,7.54,47866.0,5983.25
Star Wars Collection,8,854.35,106.79,7434.49,929.31,7.37,43443.0,5430.38
Despicable Me Collection,6,299.0,74.75,3691.07,922.77,6.78,18248.0,3041.33
Pirates of the Caribbean Collection,5,1250.0,250.0,4521.58,904.32,6.88,25080.0,5016.0


## Most Successful Franchises: mean rating

In [25]:
franchise[ franchise[('vote_count', 'mean')] >= 1000 ].nlargest(20, ('vote_average', 'mean'))

Unnamed: 0_level_0,title,budget_musd,budget_musd,revenue_musd,revenue_musd,vote_average,vote_count,vote_count
Unnamed: 0_level_1,count,sum,mean,sum,mean,mean,sum,mean
belongs_to_collection,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
The Lord of the Rings Collection,3,266.0,88.67,2916.54,972.18,8.03,24759.0,8253.0
The Godfather Collection,3,73.0,24.33,429.38,143.13,7.97,11031.0,3677.0
Blade Runner Collection,1,28.0,28.0,33.14,33.14,7.9,3833.0,3833.0
The Man With No Name Collection,3,2.0,0.67,35.5,11.83,7.83,4268.0,1422.67
The Dark Knight Collection,3,585.0,195.0,2463.72,821.24,7.8,29043.0,9681.0
Guardians of the Galaxy Collection,2,370.0,185.0,1636.74,818.37,7.75,14872.0,7436.0
Kill Bill Collection,2,60.0,30.0,333.11,166.55,7.7,9152.0,4576.0
Kingsman Collection,1,81.0,81.0,414.35,414.35,7.6,6069.0,6069.0
How to Train Your Dragon Collection,2,310.0,155.0,1104.0,552.0,7.55,7482.0,3741.0
Harry Potter Collection,8,1280.0,160.0,7707.37,963.42,7.54,47866.0,5983.25


## Most Successful Directors

6. __Find__ the __most successful Directors__ in terms of

- __total number of movies__
- __total revenue__
- __mean rating__

### Top Five directors with Max number of Movies.

In [26]:
df['director'].value_counts().head(5)

John Ford           66
Michael Curtiz      65
Werner Herzog       54
Alfred Hitchcock    53
Woody Allen         49
Name: director, dtype: int64

### Top 10 directors with Max Total Revenue.

In [27]:
df.groupby('director').sum().sort_values(by='revenue_musd', ascending=False).head(10)

Unnamed: 0_level_0,id,budget_musd,revenue_musd,vote_count,vote_average,popularity,runtime,cast_size,crew_size,franchise,ROI
director,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Steven Spielberg,1263357,1736.95,9256.62,62266.0,227.5,429.59,4276.0,1072,1624,7.0,310.0
Peter Jackson,268469,1326.78,6528.24,47571.0,92.8,244.86,1853.0,312,518,6.0,50.51
Michael Bay,941840,1670.0,6437.47,28767.0,83.1,166.17,1907.0,612,850,7.0,50.9
James Cameron,206262,760.05,5900.61,33736.0,76.2,308.79,1253.0,383,579,5.0,54.23
David Yates,1017165,1135.0,5334.56,31056.0,60.3,179.89,1293.0,563,180,5.0,31.02
Christopher Nolan,665524,1105.01,4747.41,67344.0,83.8,317.35,1314.0,631,928,3.0,44.42
Robert Zemeckis,1015066,1165.7,4138.23,37666.0,129.1,260.36,2141.0,665,990,4.0,87.22
Tim Burton,688494,1264.18,4032.92,36922.0,141.4,283.27,2039.0,517,803,4.0,61.22
Ridley Scott,892772,1641.9,3917.53,43083.0,158.5,436.91,3084.0,726,1430,5.0,53.88
Chris Columbus,394131,627.0,3866.84,26413.0,96.6,186.3,1782.0,469,602,5.0,88.39


### Top 10 directors with maximum vote count.

In [28]:
df.groupby('director').mean().sort_values(by='vote_count', ascending=False).head(10)

Unnamed: 0_level_0,id,budget_musd,revenue_musd,vote_count,vote_average,popularity,runtime,cast_size,crew_size,franchise,ROI
director,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Tim Miller,293660.0,58.0,783.11,11444.0,7.4,187.86,108.0,46.0,88.0,1.0,13.5
Christopher Nolan,60502.18,110.5,474.74,6122.18,7.62,28.85,119.45,57.36,84.36,0.27,4.44
Pierre Coffin,56904.0,72.5,757.14,5662.0,7.05,23.55,96.5,36.0,28.0,1.0,10.33
Pete Docter,44804.0,155.0,718.51,5019.0,7.53,19.14,71.5,34.0,97.75,0.5,4.67
Alex Garland,264660.0,15.0,36.87,4862.0,7.6,14.14,108.0,11.0,60.0,0.0,2.46
Andrew Stanton,46900.5,183.5,693.59,4808.5,7.08,17.68,106.75,28.75,81.75,0.5,4.78
Lee Unkrich,10193.0,200.0,1066.97,4710.0,7.6,16.97,103.0,45.0,38.0,1.0,5.33
Rich Moore,82690.0,165.0,471.22,4656.0,7.1,13.7,108.0,66.0,38.0,1.0,2.86
Lana Wachowski,4953.0,33.75,235.26,4641.0,7.4,22.91,122.0,25.0,43.0,0.5,4.46
Robert Stromberg,102651.0,180.0,758.54,4607.0,7.0,19.47,97.0,82.0,14.0,0.0,4.21


In [29]:
# Above method is still good. We have applied 2 conditions vote_count.mean() and vote_ave.mean() both should be highest.
# Which will give us a good idea about Director's rating.

In [30]:
# Now we will add one more condition, that director has atleast 10 movies. 

In [31]:
director = df.groupby('director').agg({"title":'count', "vote_count":'mean', "vote_average":'mean'})
director.head(5)

Unnamed: 0_level_0,title,vote_count,vote_average
director,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Dale Trevillion\t,2,2.0,4.0
Davide Manuli,1,10.0,6.9
E.W. Swackhamer,1,5.0,5.9
Vitaliy Vorobyov,1,3.0,5.5
Yeon Sang-Ho,4,259.75,6.6


### Top highest Rated Directors.

In [32]:
director[ (director['title'] >=10) & (director['vote_count'] >= 1000) ].nlargest(20, 'vote_average')

Unnamed: 0_level_0,title,vote_count,vote_average
director,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Hayao Miyazaki,14,1050.0,7.7
Christopher Nolan,11,6122.18,7.62
Quentin Tarantino,10,4591.0,7.49
Wes Anderson,10,1174.3,7.37
David Fincher,10,3758.8,7.35
Peter Jackson,13,3659.31,7.14
Joel Coen,17,1067.0,7.02
James Cameron,11,3066.91,6.93
Stanley Kubrick,16,1138.38,6.91
James Mangold,10,1660.7,6.9


### Most Successful Directors: Horror

In [33]:
# df.genres has NaN values. We will handle them by converting all column values as str type.
df['genres'] = df.genres.astype(str)

In [34]:
df [ df['genres'].str.contains('Horror') ].groupby('director').revenue_musd.sum().nlargest(20)

director
Paul W.S. Anderson    982.29
James Wan             861.31
Wes Craven            834.93
Francis Lawrence      816.23
Ridley Scott          689.00
Marc Forster          531.87
Steven Spielberg      500.10
William Friedkin      466.40
Darren Lynn Bousman   456.34
M. Night Shyamalan    375.37
Henry Joost           349.07
David R. Ellis        348.74
Adrian Lyne           346.11
James DeMonaco        316.70
Stephen Sommers       311.46
Gore Verbinski        275.91
Guillermo del Toro    261.63
John R. Leonetti      255.27
Fede Alvarez          254.64
Jordan Peele          252.43
Name: revenue_musd, dtype: float64

# Well Done!!!!