In [79]:
import requests
import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt
import time
from config import api_key
import pprint as pp
from collections import Counter

In [91]:
base_url = 'https://api.themoviedb.org/3/discover/movie?api_key='

In [92]:
#url string to pass to requests
url = f"{base_url}{api_key}"

#parameters (queries) to pass to API Call
language = 'en-US'
sort_by = 'revenue.desc'
primary_release_year = None
_pages = 10

#function to make a single API call for page 'p'
def make_request(p):
    
    payload = {'language': language,
               'sort_by': sort_by,
               'primary_release_year': primary_release_year,
               'page': p
          }
    
    r = requests.get(url, params=payload)
    
    return r.json()

In [21]:
#function to make 'n' api calls
def make_api_calls(n):
    
    results = []
    
    for i in range(1, n+1):
        
        results.append(make_request(i))
        time.sleep(1)
        
    return results

x = make_api_calls(_pages)

In [23]:
#function to add all movie ids from your disover query to a list
def extract_ids(data):
    
    list_ids = []
    
    for page in data:
        
        for result in page['results']:
            list_ids.append(result['id'])

    return list_ids

list_ids = extract_ids(x)

In [25]:
#function to query single film using list of ids
query_url = 'https://api.themoviedb.org/3/movie/'

def query_movie(list_ids):
    
    results = []
    
    payload = {}
    
    for id in list_ids:
        
        r = requests.get(f'{query_url}{id}?api_key={api_key}', params=payload)
        results.append(r.json())
        time.sleep(1)
    
    return results

_results = query_movie(list_ids)

In [35]:
_results[0].keys()

dict_keys(['adult', 'backdrop_path', 'belongs_to_collection', 'budget', 'genres', 'homepage', 'id', 'imdb_id', 'original_language', 'original_title', 'overview', 'popularity', 'poster_path', 'production_companies', 'production_countries', 'release_date', 'revenue', 'runtime', 'spoken_languages', 'status', 'tagline', 'title', 'video', 'vote_average', 'vote_count'])

In [245]:
#Create DataFrame
df = pd.DataFrame.from_dict(_results)

In [246]:
#Drop Unnecessary Columns
df = df.drop(columns=['adult', 'backdrop_path', 'belongs_to_collection', 'homepage','poster_path', 'spoken_languages', 'status', 'status_code', 'status_message', 'video'])

In [247]:
df.head(5)

Unnamed: 0,budget,genres,id,imdb_id,original_language,original_title,overview,popularity,production_companies,production_countries,release_date,revenue,runtime,tagline,title,vote_average,vote_count
0,237000000.0,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",19995.0,tt0499549,en,Avatar,"In the 22nd century, a paraplegic Marine is di...",28.695,"[{'id': 289, 'logo_path': None, 'name': 'Ingen...","[{'iso_3166_1': 'US', 'name': 'United States o...",2009-12-10,2787965000.0,162.0,Enter the World of Pandora.,Avatar,7.4,18058.0
1,245000000.0,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",140607.0,tt2488496,en,Star Wars: The Force Awakens,Thirty years after defeating the Galactic Empi...,31.553,"[{'id': 1634, 'logo_path': None, 'name': 'True...","[{'iso_3166_1': 'US', 'name': 'United States o...",2015-12-15,2068224000.0,136.0,Every generation has a story.,Star Wars: The Force Awakens,7.4,12278.0
2,300000000.0,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...",299536.0,tt4154756,en,Avengers: Infinity War,As the Avengers and their allies have continue...,137.906,"[{'id': 420, 'logo_path': '/hUzeosd33nzE5MCNsZ...","[{'iso_3166_1': 'US', 'name': 'United States o...",2018-04-25,2046240000.0,149.0,An entire universe. Once and for all.,Avengers: Infinity War,8.3,12288.0
3,200000000.0,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...",597.0,tt0120338,en,Titanic,101-year-old Rose DeWitt Bukater tells the sto...,23.86,"[{'id': 4, 'logo_path': '/fycMZt242LVjagMByZOL...","[{'iso_3166_1': 'US', 'name': 'United States o...",1997-11-18,1845034000.0,194.0,Nothing on Earth could come between them.,Titanic,7.8,13630.0
4,61.0,[],570421.0,,en,Star Wars: The Original Trilogy,"Years later, the original Star Wars trilogy is...",0.6,[],[],,1788879000.0,377.0,,Star Wars: The Original Trilogy,8.5,1.0


In [249]:
#Rearrange the Columns
df = df[['id', 'imdb_id', 'title', 'original_title', 'release_date', 'genres',
         'revenue', 'budget', 'runtime', 'tagline', 'overview', 'production_companies', 'production_countries', 'vote_average', 'vote_count' ]]

In [251]:
#Cast release_date as datetime
df['release_date'] = pd.to_datetime(df['release_date'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [252]:
#Make a Copy of the DataFrame to work on
dfx = df.copy(deep=True)
dfx.shape

(2000, 15)

In [353]:
list_all_production_cos = []

for i in range(len(dfx)):
    for company in dfx.iloc[i]['all_production_cos']:
        list_all_production_cos.append(company)

In [354]:
appearances = Counter(list_all_production_cos)

sorted_appearances = sorted(zip(appearances.values(), appearances.keys()), reverse=True)

sorted_appearances

[(226, 'WarnerBros.Pictures'),
 (221, 'UniversalPictures'),
 (183, 'ColumbiaPictures'),
 (174, 'Paramount'),
 (170, '20thCenturyFox'),
 (108, 'WaltDisneyPictures'),
 (90, 'NewLineCinema'),
 (74, 'DreamWorks'),
 (68, 'RelativityMedia'),
 (66, 'VillageRoadshowPictures'),
 (63, 'TouchstonePictures'),
 (49, 'Metro-Goldwyn-Mayer'),
 (49, 'DuneEntertainment'),
 (49, 'AmblinEntertainment'),
 (43, 'RegencyEnterprises'),
 (38, 'TSGEntertainment'),
 (35, 'LegendaryEntertainment'),
 (35, 'DreamWorksAnimation'),
 (33, 'Miramax'),
 (32, 'WorkingTitleFilms'),
 (32, 'SummitEntertainment'),
 (31, 'OriginalFilm'),
 (31, 'ImagineEntertainment'),
 (31, 'Canal+'),
 (29, 'Lionsgate'),
 (29, 'Fox2000Pictures'),
 (28, 'StudioCanal'),
 (27, 'TriStarPictures'),
 (27, 'DavisEntertainment'),
 (26, 'ScreenGems'),
 (24, 'SilverPictures'),
 (24, 'RatPac-DuneEntertainment'),
 (24, 'DimensionFilms'),
 (23, 'UnitedArtists'),
 (23, 'NewRegencyPictures'),
 (22, 'WaltDisneyAnimationStudios'),
 (22, 'TheKennedy/MarshallCo

In [348]:
#Maks CSV File from Dataframe (from API Call)
dfx.to_csv('dfx_movies.csv')

In [358]:
_2010_2020_ = dfx.loc[(dfx['release_date'] > '2010-01-01') & (dfx['release_date'] < '2020-01-01'), ['title', 'release_date', 'revenue', 'budget']]

Unnamed: 0,title,release_date,revenue,budget
1,Star Wars: The Force Awakens,2015-12-15,2068224000.0,245000000.0
2,Avengers: Infinity War,2018-04-25,2046240000.0,300000000.0
5,Jurassic World,2015-06-06,1671713000.0,150000000.0
6,The Avengers,2012-04-25,1519558000.0,220000000.0
7,Furious 7,2015-04-01,1506249000.0,190000000.0
8,Avengers: Age of Ultron,2015-04-22,1405404000.0,280000000.0
9,Black Panther,2018-02-13,1346739000.0,200000000.0
10,Harry Potter and the Deathly Hallows: Part 2,2011-07-07,1342000000.0,125000000.0
11,Star Wars: The Last Jedi,2017-12-13,1332460000.0,200000000.0
12,Jurassic World: Fallen Kingdom,2018-06-06,1303460000.0,170000000.0


In [6]:
from bs4 import BeautifulSoup

def build_url(page):
    host = 'https://www.the-numbers.com/movie/budgets/all/'
    return f"{host}{page}"

def scrape(page):
    #calls url function and parses results into Beautiful Soup
    r = requests.get(build_url(page))
    c = r.content
    soup = BeautifulSoup(c, 'html.parser')
    return soup


In [7]:

def parse_scrape(soup_object):

    table = soup_object.find( "table")
    movie_data = table.find_all('td', class_ = 'data')

    titles = []
    movie_list = []
    
    for i in table.find_all('b'):
        titles.append(i.get_text())

    i = 0
    for row in range(int(len(movie_data)/4)):
        movie_list.append({'name': titles[row], 'budget': movie_data[row+i+1].get_text(), 'dom_gross': movie_data[row+i+2].get_text(), 'ww_gross': movie_data[row+i+3].get_text()})
        i += 3
        
    return movie_list

def loop_through(loops):
    
    movie_list = []
    
    adder = 100
    page = 1
    
    for i in range(loops):
        output = parse_scrape(scrape(page))
        for j in range(len(output)):
            movie_list.append(output[j])
        page += adder
        
    return movie_list

movie = loop_through(20)

In [63]:
from pandas.io.json import json_normalize
data = movie
movie_nums = json_normalize(data)
movie_nums.head()

Unnamed: 0,budget,dom_gross,name,ww_gross
0,"$425,000,000","$760,507,625",Avatar,"$2,776,345,279"
1,"$410,600,000","$241,063,875",Pirates of the Caribbean: On Stranger Tides,"$1,045,663,875"
2,"$330,600,000","$459,005,868",Avengers: Age of Ultron,"$1,403,013,963"
3,"$317,000,000","$620,181,382",Star Wars Ep. VIII: The Last Jedi,"$1,316,721,747"
4,"$306,000,000","$936,662,225",Star Wars Ep. VII: The Force Awakens,"$2,053,311,220"


In [64]:
movie_nums = movie_nums[['name', 'budget', 'dom_gross', 'ww_gross']]
movie_nums.head()

Unnamed: 0,name,budget,dom_gross,ww_gross
0,Avatar,"$425,000,000","$760,507,625","$2,776,345,279"
1,Pirates of the Caribbean: On Stranger Tides,"$410,600,000","$241,063,875","$1,045,663,875"
2,Avengers: Age of Ultron,"$330,600,000","$459,005,868","$1,403,013,963"
3,Star Wars Ep. VIII: The Last Jedi,"$317,000,000","$620,181,382","$1,316,721,747"
4,Star Wars Ep. VII: The Force Awakens,"$306,000,000","$936,662,225","$2,053,311,220"


In [66]:
cols = ['budget', 'dom_gross', 'ww_gross']

movie_nums['budget'] = movie_nums['budget'].str.strip('$').str.replace(',','')
movie_nums['dom_gross'] = movie_nums['dom_gross'].str.strip('$').str.replace(',','')
movie_nums['ww_gross'] = movie_nums['ww_gross'].str.strip('$').str.replace(',','')

movie_nums[cols] = movie_nums[cols].applymap(np.int64)

movie_nums.head()

Unnamed: 0,name,budget,dom_gross,ww_gross
0,Avatar,425000000,760507625,2776345279
1,Pirates of the Caribbean: On Stranger Tides,410600000,241063875,1045663875
2,Avengers: Age of Ultron,330600000,459005868,1403013963
3,Star Wars Ep. VIII: The Last Jedi,317000000,620181382,1316721747
4,Star Wars Ep. VII: The Force Awakens,306000000,936662225,2053311220


In [67]:
movie_nums[cols] = movie_nums[cols].replace(0, np.nan)

In [69]:
movie_nums.dropna(subset = cols, inplace=True)
movie_nums.shape

(1970, 4)

In [381]:
movie_nums['multiple'] = movie_nums['ww_gross'] / movie_nums['budget']
movie_nums.head(20)

Unnamed: 0,name,budget,dom_gross,ww_gross,multiple
0,Avatar,425000000,760507625.0,2776345000.0,6.532577
1,Pirates of the Caribbean: On Stranger Tides,410600000,241063875.0,1045664000.0,2.546673
2,Avengers: Age of Ultron,330600000,459005868.0,1403014000.0,4.243841
3,Star Wars Ep. VIII: The Last Jedi,317000000,620181382.0,1316722000.0,4.153696
4,Star Wars Ep. VII: The Force Awakens,306000000,936662225.0,2053311000.0,6.710167
5,Avengers: Infinity War,300000000,678815482.0,2048798000.0,6.829326
6,Pirates of the Caribbean: At Worldâs End,300000000,309420425.0,963420400.0,3.211401
7,Justice League,300000000,229024295.0,655945200.0,2.186484
8,Spectre,300000000,200074175.0,879620900.0,2.93207
9,The Dark Knight Rises,275000000,448139099.0,1084439000.0,3.943415


In [359]:
"""Import dfx.csv file created earlier in Notebook"""
dfx = pd.read_csv('dfx_movies.csv')
dfx

'Import dfx.csv file created earlier in Notebook'

Unnamed: 0.1,Unnamed: 0,id,imdb_id,title,original_title,release_date,revenue,budget,runtime,tagline,overview,production_countries,vote_average,vote_count,all_production_cos,list_genres
0,0,19995.0,tt0499549,Avatar,Avatar,2009-12-10,2.787965e+09,237000000.0,162.0,Enter the World of Pandora.,"In the 22nd century, a paraplegic Marine is di...","[{'iso_3166_1': 'US', 'name': 'United States o...",7.4,18058.0,"['Ingenious Film Partners', 'Dune Entertainmen...","['Action', 'Adventure', 'Fantasy', 'Science Fi..."
1,1,140607.0,tt2488496,Star Wars: The Force Awakens,Star Wars: The Force Awakens,2015-12-15,2.068224e+09,245000000.0,136.0,Every generation has a story.,Thirty years after defeating the Galactic Empi...,"[{'iso_3166_1': 'US', 'name': 'United States o...",7.4,12278.0,"['Truenorth Productions', 'Lucasfilm', 'Bad Ro...","['Action', 'Adventure', 'Science Fiction', 'Fa..."
2,2,299536.0,tt4154756,Avengers: Infinity War,Avengers: Infinity War,2018-04-25,2.046240e+09,300000000.0,149.0,An entire universe. Once and for all.,As the Avengers and their allies have continue...,"[{'iso_3166_1': 'US', 'name': 'United States o...",8.3,12288.0,['Marvel Studios'],"['Adventure', 'Action', 'Fantasy']"
3,3,597.0,tt0120338,Titanic,Titanic,1997-11-18,1.845034e+09,200000000.0,194.0,Nothing on Earth could come between them.,101-year-old Rose DeWitt Bukater tells the sto...,"[{'iso_3166_1': 'US', 'name': 'United States o...",7.8,13630.0,"['Paramount', 'Lightstorm Entertainment', '20t...","['Drama', 'Romance', 'Thriller']"
4,4,570421.0,,Star Wars: The Original Trilogy,Star Wars: The Original Trilogy,,1.788879e+09,61.0,377.0,,"Years later, the original Star Wars trilogy is...",[],8.5,1.0,[],[]
5,5,135397.0,tt0369610,Jurassic World,Jurassic World,2015-06-06,1.671713e+09,150000000.0,124.0,The park is open.,Twenty-two years after the events of Jurassic ...,"[{'iso_3166_1': 'US', 'name': 'United States o...",6.6,13638.0,"['Fuji Television Network', 'Amblin Entertainm...","['Action', 'Adventure', 'Science Fiction', 'Th..."
6,6,24428.0,tt0848228,The Avengers,The Avengers,2012-04-25,1.519558e+09,220000000.0,143.0,Some assembly required.,When an unexpected enemy emerges and threatens...,"[{'iso_3166_1': 'US', 'name': 'United States o...",7.6,18555.0,['Marvel Studios'],"['Science Fiction', 'Action', 'Adventure']"
7,7,168259.0,tt2820852,Furious 7,Furious 7,2015-04-01,1.506249e+09,190000000.0,137.0,Vengeance Hits Home,Deckard Shaw seeks revenge against Dominic Tor...,"[{'iso_3166_1': 'US', 'name': 'United States o...",7.3,6314.0,"['Abu Dhabi Film Commission', 'Universal Pictu...","['Action', 'Crime', 'Thriller', 'Drama']"
8,8,99861.0,tt2395427,Avengers: Age of Ultron,Avengers: Age of Ultron,2015-04-22,1.405404e+09,280000000.0,141.0,A New Age Has Come.,When Tony Stark tries to jumpstart a dormant p...,"[{'iso_3166_1': 'US', 'name': 'United States o...",7.3,12517.0,['Marvel Studios'],"['Action', 'Adventure', 'Science Fiction']"
9,9,284054.0,tt1825683,Black Panther,Black Panther,2018-02-13,1.346739e+09,200000000.0,134.0,Long live the king.,King T'Challa returns home from America to the...,"[{'iso_3166_1': 'US', 'name': 'United States o...",7.4,11320.0,"['Marvel Studios', 'Walt Disney Pictures']","['Action', 'Adventure', 'Fantasy', 'Science Fi..."


In [360]:
dfx.shape

(1990, 16)

In [361]:
#Replace '0' Values in Revenue & Budget with nan
dfx_cols = ['revenue', 'budget']
dfx[dfx_cols] = dfx[dfx_cols].replace(0, np.nan)

In [362]:
#Drop rows with nan Values in Revenue & Budget
dfx.dropna(subset = dfx_cols, inplace=True)
dfx.shape

(1900, 16)

In [364]:
#Clean up Genres
dfx['list_genres'] = dfx['list_genres'].str.strip('[]').str.replace(' ','').str.replace("'",'')
dfx['list_genres'] = dfx['list_genres'].str.split(',')

In [374]:
list_all_genres = []

for row in dfx['list_genres']:
    for genre in row:
        list_all_genres.append(genre)
        
list_all_genres[:10]


['Action',
 'Adventure',
 'Fantasy',
 'ScienceFiction',
 'Action',
 'Adventure',
 'ScienceFiction',
 'Fantasy',
 'Adventure',
 'Action']

In [366]:
#Count & Sort Genres
genre_appearances = Counter(list_all_genres)
sorted_appearances = sorted(zip(genre_appearances.values(), genre_appearances.keys()), reverse=True)


In [375]:
sorted_appearances[:10]

[(700, 'Comedy'),
 (691, 'Action'),
 (687, 'Drama'),
 (572, 'Adventure'),
 (548, 'Thriller'),
 (344, 'Family'),
 (315, 'Fantasy'),
 (306, 'ScienceFiction'),
 (297, 'Romance'),
 (266, 'Crime')]

In [368]:
#Clean up Production Cos
dfx['all_production_cos'] = dfx['all_production_cos'].str.strip('[]').str.replace(' ','').str.replace("'",'')
dfx['all_production_cos'] = dfx['all_production_cos'].str.split(',')

In [449]:
all_cos = []

for row in dfx['all_production_cos']:
    for co in row:
        all_cos.append(co)
        
all_cos

['IngeniousFilmPartners',
 'DuneEntertainment',
 'LightstormEntertainment',
 '20thCenturyFox',
 'TruenorthProductions',
 'Lucasfilm',
 'BadRobot',
 'MarvelStudios',
 'Paramount',
 'LightstormEntertainment',
 '20thCenturyFox',
 'FujiTelevisionNetwork',
 'AmblinEntertainment',
 'LegendaryEntertainment',
 'UniversalPictures',
 'Dentsu',
 'TheKennedy/MarshallCompany',
 'MarvelStudios',
 'AbuDhabiFilmCommission',
 'UniversalPictures',
 'ChinaFilmCo.',
 'OriginalFilm',
 'FujiTelevisionNetwork',
 'QuébecProductionServicesTaxCredit',
 'ColoradoOfficeofFilm',
 'Television&Media',
 'Dentsu',
 'MediaRightsCapital',
 'OneRace',
 'MarvelStudios',
 'MarvelStudios',
 'WaltDisneyPictures',
 'WarnerBros.Pictures',
 'HeydayFilms',
 'Lucasfilm',
 'WaltDisneyPictures',
 'RamBergmanProductions',
 'FormulaFilm',
 'TruenorthProductions',
 'JucumariFilms',
 'AmblinEntertainment',
 'LegendaryEntertainment',
 'UniversalPictures',
 'PerfectWorldPictures',
 'WaltDisneyPictures',
 'WaltDisneyAnimationStudios',
 'W

In [377]:
#Count & Sort Production Companies
prod_appearances = Counter(all_cos)
prod_sorted = sorted(zip(prod_apperances.values(), prod_appearances.keys()), reverse=True)
prod_sorted[:10]

[(226, 'HeydayFilms'),
 (221, 'Dentsu'),
 (184, 'UnitedArtists'),
 (174, 'Paramount'),
 (170, '20thCenturyFox'),
 (108, 'WarnerBros.Pictures'),
 (90, 'WingNutFilms'),
 (74, 'GKFilms'),
 (68, 'F&FVIProductionsA.I.E'),
 (66, 'SilverPictures')]

In [382]:
#Convert Release Dates to datetime and Make Release Year column
dfx.release_date = pd.to_datetime(dfx['release_date'])
dfx['release_year'] = dfx['release_date'].dt.year
dfx.head()

Unnamed: 0.1,Unnamed: 0,id,imdb_id,title,original_title,release_date,revenue,budget,runtime,tagline,overview,production_countries,vote_average,vote_count,all_production_cos,list_genres,release_year,multiple
0,0,19995.0,tt0499549,Avatar,Avatar,2009-12-10,2787965000.0,237000000.0,162.0,Enter the World of Pandora.,"In the 22nd century, a paraplegic Marine is di...","[{'iso_3166_1': 'US', 'name': 'United States o...",7.4,18058.0,"[IngeniousFilmPartners, DuneEntertainment, Lig...","[Action, Adventure, Fantasy, ScienceFiction]",2009,11.763566
1,1,140607.0,tt2488496,Star Wars: The Force Awakens,Star Wars: The Force Awakens,2015-12-15,2068224000.0,245000000.0,136.0,Every generation has a story.,Thirty years after defeating the Galactic Empi...,"[{'iso_3166_1': 'US', 'name': 'United States o...",7.4,12278.0,"[TruenorthProductions, Lucasfilm, BadRobot]","[Action, Adventure, ScienceFiction, Fantasy]",2015,8.441729
2,2,299536.0,tt4154756,Avengers: Infinity War,Avengers: Infinity War,2018-04-25,2046240000.0,300000000.0,149.0,An entire universe. Once and for all.,As the Avengers and their allies have continue...,"[{'iso_3166_1': 'US', 'name': 'United States o...",8.3,12288.0,[MarvelStudios],"[Adventure, Action, Fantasy]",2018,6.820799
3,3,597.0,tt0120338,Titanic,Titanic,1997-11-18,1845034000.0,200000000.0,194.0,Nothing on Earth could come between them.,101-year-old Rose DeWitt Bukater tells the sto...,"[{'iso_3166_1': 'US', 'name': 'United States o...",7.8,13630.0,"[Paramount, LightstormEntertainment, 20thCentu...","[Drama, Romance, Thriller]",1997,9.225171
5,5,135397.0,tt0369610,Jurassic World,Jurassic World,2015-06-06,1671713000.0,150000000.0,124.0,The park is open.,Twenty-two years after the events of Jurassic ...,"[{'iso_3166_1': 'US', 'name': 'United States o...",6.6,13638.0,"[FujiTelevisionNetwork, AmblinEntertainment, L...","[Action, Adventure, ScienceFiction, Thriller]",2015,11.144755


In [371]:
dfx['multiple'] = dfx['revenue'] / dfx['budget']
dfx.sort_values('multiple', ascending=False).head(10)

Unnamed: 0.1,Unnamed: 0,id,imdb_id,title,original_title,release_date,revenue,budget,runtime,tagline,overview,production_countries,vote_average,vote_count,all_production_cos,list_genres,release_year,multiple
4,4,570421.0,,Star Wars: The Original Trilogy,Star Wars: The Original Trilogy,NaT,1788879000.0,61.0,377.0,,"Years later, the original Star Wars trilogy is...",[],8.5,1.0,[],[],,29325890.0
1330,1330,8856.0,tt0091326,"The Karate Kid, Part II","The Karate Kid, Part II",1986-06-18,115104000.0,113.0,113.0,"This time, the combat is real.",Mr. Miyagi and Daniel take a trip to Okinawa t...,"[{'iso_3166_1': 'US', 'name': 'United States o...",6.1,817.0,"[DelphiVProductions, ColumbiaPictures]","[Adventure, Drama, Action, Romance, Family]",1986.0,1018619.0
769,769,23827.0,tt1179904,Paranormal Activity,Paranormal Activity,2009-09-25,193355800.0,15000.0,86.0,What Happens When You Sleep?,"After a young, middle class couple moves into ...","[{'iso_3166_1': 'US', 'name': 'United States o...",5.9,2613.0,"[BlumhouseProductions, SolanaFilms]","[Horror, Mystery]",2009.0,12890.39
744,744,513434.0,tt7914416,One Cut of the Dead,カメラを止めるな！,2017-11-04,200000000.0,30000.0,96.0,Don't Stop Shooting!,Things go badly for a hack director and film c...,"[{'iso_3166_1': 'JP', 'name': 'Japan'}]",8.4,47.0,"[Panpokopina, ENBUSeminar]","[Comedy, Horror, Drama]",2017.0,6666.667
1512,1512,506972.0,,Khaltoor,Khaltoor,2018-02-22,100000000.0,15000.0,90.0,Khaltoor,The lives of three young musicians who are try...,[],0.0,0.0,[Pooyafilm],"[Comedy, Crime]",2018.0,6666.667
1514,1514,491562.0,,Derakoola,دراکولا,2016-09-26,100000000.0,15000.0,,,An addict is captured by a Dracula. Dracula's ...,"[{'iso_3166_1': 'IR', 'name': 'Iran'}]",0.0,0.0,[IranianIndependents],"[Drama, Comedy]",2016.0,6666.667
577,577,2667.0,tt0185937,The Blair Witch Project,The Blair Witch Project,1999-07-14,248000000.0,60000.0,81.0,The scariest movie of all time is a true story.,In October of 1994 three student filmmakers di...,"[{'iso_3166_1': 'US', 'name': 'United States o...",6.3,2077.0,"[HaxanFilms, ArtisanEntertainment]","[Horror, Mystery]",1999.0,4133.333
1513,1513,506664.0,tt3718222,Yeki Mikhad Bahat Harf Bezane,Yeki Mikhad Bahat Harf Bezane,2012-09-22,100000000.0,100000.0,90.0,Yeki Mikhad Bahat Harf Bezane,Yasaman has been living with her mother Leila ...,[],0.0,0.0,[IranianIndependents],"[Drama, Family]",2012.0,1000.0
1718,1718,9462.0,tt0068935,The Way of the Dragon,猛龍過江,1972-06-01,85000000.0,130000.0,100.0,The Colosseum . . the battleground of Bruce Le...,Tang Lung arrives in Rome to help his cousins ...,"[{'iso_3166_1': 'HK', 'name': 'Hong Kong'}]",7.5,405.0,"[OrangeSkyGoldenHarvest, ConcordProductionsInc.]","[Action, Crime]",1972.0,653.8462
528,528,3170.0,tt0034492,Bambi,Bambi,1942-08-14,267447200.0,858000.0,70.0,A great love story.,Bambi's tale unfolds from season to season as ...,"[{'iso_3166_1': 'US', 'name': 'United States o...",6.9,2814.0,[WaltDisneyProductions],"[Animation, Drama, Family]",1942.0,311.71


In [383]:
#Drop Karate Kid (Budget Outlier)
dfx.drop(dfx.loc[dfx['id'] == 8856.0].index, inplace=True)
#Drop Star Wars Trilogy
dfx.drop(dfx.loc[dfx['id'] == 570421.0].index, inplace=True)

In [388]:
#Drop Two Columns
dfx.drop(columns=['Unnamed: 0', 'original_title'])

Unnamed: 0,id,imdb_id,title,release_date,revenue,budget,runtime,tagline,overview,production_countries,vote_average,vote_count,all_production_cos,list_genres,release_year,multiple
0,19995.0,tt0499549,Avatar,2009-12-10,2.787965e+09,237000000.0,162.0,Enter the World of Pandora.,"In the 22nd century, a paraplegic Marine is di...","[{'iso_3166_1': 'US', 'name': 'United States o...",7.4,18058.0,"[IngeniousFilmPartners, DuneEntertainment, Lig...","[Action, Adventure, Fantasy, ScienceFiction]",2009,11.763566
1,140607.0,tt2488496,Star Wars: The Force Awakens,2015-12-15,2.068224e+09,245000000.0,136.0,Every generation has a story.,Thirty years after defeating the Galactic Empi...,"[{'iso_3166_1': 'US', 'name': 'United States o...",7.4,12278.0,"[TruenorthProductions, Lucasfilm, BadRobot]","[Action, Adventure, ScienceFiction, Fantasy]",2015,8.441729
2,299536.0,tt4154756,Avengers: Infinity War,2018-04-25,2.046240e+09,300000000.0,149.0,An entire universe. Once and for all.,As the Avengers and their allies have continue...,"[{'iso_3166_1': 'US', 'name': 'United States o...",8.3,12288.0,[MarvelStudios],"[Adventure, Action, Fantasy]",2018,6.820799
3,597.0,tt0120338,Titanic,1997-11-18,1.845034e+09,200000000.0,194.0,Nothing on Earth could come between them.,101-year-old Rose DeWitt Bukater tells the sto...,"[{'iso_3166_1': 'US', 'name': 'United States o...",7.8,13630.0,"[Paramount, LightstormEntertainment, 20thCentu...","[Drama, Romance, Thriller]",1997,9.225171
5,135397.0,tt0369610,Jurassic World,2015-06-06,1.671713e+09,150000000.0,124.0,The park is open.,Twenty-two years after the events of Jurassic ...,"[{'iso_3166_1': 'US', 'name': 'United States o...",6.6,13638.0,"[FujiTelevisionNetwork, AmblinEntertainment, L...","[Action, Adventure, ScienceFiction, Thriller]",2015,11.144755
6,24428.0,tt0848228,The Avengers,2012-04-25,1.519558e+09,220000000.0,143.0,Some assembly required.,When an unexpected enemy emerges and threatens...,"[{'iso_3166_1': 'US', 'name': 'United States o...",7.6,18555.0,[MarvelStudios],"[ScienceFiction, Action, Adventure]",2012,6.907081
7,168259.0,tt2820852,Furious 7,2015-04-01,1.506249e+09,190000000.0,137.0,Vengeance Hits Home,Deckard Shaw seeks revenge against Dominic Tor...,"[{'iso_3166_1': 'US', 'name': 'United States o...",7.3,6314.0,"[AbuDhabiFilmCommission, UniversalPictures, Ch...","[Action, Crime, Thriller, Drama]",2015,7.927628
8,99861.0,tt2395427,Avengers: Age of Ultron,2015-04-22,1.405404e+09,280000000.0,141.0,A New Age Has Come.,When Tony Stark tries to jumpstart a dormant p...,"[{'iso_3166_1': 'US', 'name': 'United States o...",7.3,12517.0,[MarvelStudios],"[Action, Adventure, ScienceFiction]",2015,5.019299
9,284054.0,tt1825683,Black Panther,2018-02-13,1.346739e+09,200000000.0,134.0,Long live the king.,King T'Challa returns home from America to the...,"[{'iso_3166_1': 'US', 'name': 'United States o...",7.4,11320.0,"[MarvelStudios, WaltDisneyPictures]","[Action, Adventure, Fantasy, ScienceFiction]",2018,6.733696
10,12445.0,tt1201607,Harry Potter and the Deathly Hallows: Part 2,2011-07-07,1.342000e+09,125000000.0,130.0,It all ends here.,"Harry, Ron and Hermione continue their quest t...","[{'iso_3166_1': 'GB', 'name': 'United Kingdom'...",8.1,11046.0,"[WarnerBros.Pictures, HeydayFilms]","[Family, Fantasy, Adventure]",2011,10.736000


In [421]:
#Make List of Movies Lionsgate was listed as one of Productino Companies
list_lion = []

for i in range(len(dfx)):
    for item in dfx.iloc[i]['all_production_cos']:
        if 'lion' in item.lower():
            list_lion.append(dfx.iloc[i]['title'])
            print(dfx.iloc[i]['all_production_cos'], '-->', dfx.iloc[i]['title'])
len(list_lion)

['Lionsgate', 'ColorForce'] --> The Hunger Games: Catching Fire
['Lionsgate', 'ColorForce'] --> The Hunger Games: Mockingjay - Part 1
['Lionsgate', 'ColorForce'] --> The Hunger Games
['StudioBabelsberg', 'Lionsgate', 'ColorForce'] --> The Hunger Games: Mockingjay - Part 2
['"MelsCiteduCinema"', 'LionsGateFilms', 'CentropolisEntertainment', '20thCenturyFox', 'TheMarkGordonCompany'] --> The Day After Tomorrow
['SummitEntertainment', 'Lionsgate', 'K/OPaperProducts', 'TIKFilms'] --> Now You See Me 2
['SummitEntertainment', 'RedWagonEntertainment', 'MandevilleFilms', 'NeoReel', 'Lionsgate'] --> Insurgent
['Davis-Films', 'NuImage', 'Ex3Productions', 'FipexHolding', 'Lionsgate', 'MillenniumFilms'] --> The Expendables 3
['MandevilleFilms', 'SummitEntertainment', 'Lionsgate', 'RedWagonEntertainment'] --> Allegiant
['ThunderRoadPictures', '87Eleven', 'Lionsgate'] --> John Wick: Chapter 2
['TwistedPictures', 'EvolutionEntertainment', 'LionsGateFilms'] --> Saw III
['Saw2Productions', 'LionsGateFil

39

In [None]:
#df['is_it_whole'] = df['value'].apply(lambda x: x.is_integer())

In [413]:
#Create Column if Lionsgate Production Co
def by_lionsgate(row):
    if row['title'] in list_lion:
        val = True
    else:
        val = False
    
    return val

dfx['Lionsgate'] = dfx.apply(by_lionsgate, axis=1)

In [417]:
#All Lionsgate
dfx[dfx['Lionsgate'] == True][:10]

Unnamed: 0.1,Unnamed: 0,id,imdb_id,title,original_title,release_date,revenue,budget,runtime,tagline,overview,production_countries,vote_average,vote_count,all_production_cos,list_genres,release_year,multiple,Lionsgate
71,71,101299.0,tt1951264,The Hunger Games: Catching Fire,The Hunger Games: Catching Fire,2013-11-15,847423452.0,130000000.0,146.0,Every revolution begins with a spark.,Katniss Everdeen has returned home safe after ...,"[{'iso_3166_1': 'US', 'name': 'United States o...",7.4,10787.0,"[Lionsgate, ColorForce]","[Adventure, Action, ScienceFiction]",2013,6.518642,True
97,97,131631.0,tt1951265,The Hunger Games: Mockingjay - Part 1,The Hunger Games: Mockingjay - Part 1,2014-11-18,752100229.0,125000000.0,123.0,Fire burns brighter in the darkness,Katniss Everdeen reluctantly becomes the symbo...,"[{'iso_3166_1': 'US', 'name': 'United States o...",6.8,9751.0,"[Lionsgate, ColorForce]","[ScienceFiction, Adventure, Thriller]",2014,6.016802,True
117,117,70160.0,tt1392170,The Hunger Games,The Hunger Games,2012-03-12,691210692.0,75000000.0,142.0,May The Odds Be Ever In Your Favor.,Every year in the ruins of what was once North...,"[{'iso_3166_1': 'US', 'name': 'United States o...",7.1,14138.0,"[Lionsgate, ColorForce]","[ScienceFiction, Adventure, Fantasy]",2012,9.216143,True
129,129,131634.0,tt1951266,The Hunger Games: Mockingjay - Part 2,The Hunger Games: Mockingjay - Part 2,2015-11-18,653428261.0,160000000.0,137.0,The fire will burn forever.,"With the nation of Panem in a full scale war, ...","[{'iso_3166_1': 'DE', 'name': 'Germany'}, {'is...",6.8,7328.0,"[StudioBabelsberg, Lionsgate, ColorForce]","[Action, Adventure, ScienceFiction]",2015,4.083927,True
175,175,435.0,tt0319262,The Day After Tomorrow,The Day After Tomorrow,2004-05-26,544272402.0,125000000.0,124.0,Where will you be?,After years of increases in the greenhouse eff...,"[{'iso_3166_1': 'US', 'name': 'United States o...",6.4,4172.0,"[""MelsCiteduCinema"", LionsGateFilms, Centropol...","[Action, Adventure, ScienceFiction, Thriller]",2004,4.354179,True
386,386,291805.0,tt3110958,Now You See Me 2,Now You See Me 2,2016-06-02,334901337.0,90000000.0,129.0,You Haven't Seen Anything Yet,One year after outwitting the FBI and winning ...,"[{'iso_3166_1': 'CA', 'name': 'Canada'}, {'iso...",6.8,6455.0,"[SummitEntertainment, Lionsgate, K/OPaperProdu...","[Action, Comedy, Thriller]",2016,3.721126,True
468,468,262500.0,tt2908446,Insurgent,Insurgent,2015-03-18,295238201.0,110000000.0,119.0,One Choice Can Destroy You,Beatrice Prior must confront her inner demons ...,"[{'iso_3166_1': 'US', 'name': 'United States o...",6.3,6548.0,"[SummitEntertainment, RedWagonEntertainment, M...","[Action, Adventure, ScienceFiction, Thriller]",2015,2.683984,True
718,718,138103.0,tt2333784,The Expendables 3,The Expendables 3,2014-08-04,206172544.0,90000000.0,127.0,New team. New attitude. New mission.,"Barney, Christmas and the rest of the team com...","[{'iso_3166_1': 'FR', 'name': 'France'}, {'iso...",6.1,2775.0,"[Davis-Films, NuImage, Ex3Productions, FipexHo...","[Action, Adventure, Thriller]",2014,2.290806,True
838,838,262504.0,tt3410834,Allegiant,Allegiant,2016-03-09,179246868.0,110000000.0,121.0,Break the boundaries of your world,Beatrice Prior and Tobias Eaton venture into t...,"[{'iso_3166_1': 'US', 'name': 'United States o...",5.9,3953.0,"[MandevilleFilms, SummitEntertainment, Lionsga...","[Adventure, ScienceFiction, Action, Mystery]",2016,1.629517,True
891,891,324552.0,tt4425200,John Wick: Chapter 2,John Wick: Chapter 2,2017-02-08,171539887.0,40000000.0,122.0,Never stab the devil in the back,John Wick is forced out of retirement by a for...,"[{'iso_3166_1': 'US', 'name': 'United States o...",6.9,5308.0,"[ThunderRoadPictures, 87Eleven, Lionsgate]","[Thriller, Action, Crime]",2017,4.288497,True


In [437]:
#Comparing Lionsgate Multiple ("True") versus Dataset (Full Dataset is top ~2000 revenue producing films of all time)
dfx.groupby('Lionsgate')['multiple'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
Lionsgate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
False,1859.0,27.819537,413.109014,0.350157,2.350233,3.670761,6.567695,12890.386667
True,39.0,8.388801,14.680865,0.675028,2.063911,3.741367,7.966518,86.593058


In [450]:
prod_var = 'disney'

prod_list = []

for i in range(len(dfx)):
    for item in dfx.iloc[i]['all_production_cos']:
        if prod_var in item.lower():
            prod_list.append(dfx.iloc[i]['title'])
            #print(dfx.iloc[i]['all_production_cos'], '-->', dfx.iloc[i]['title'])

#Create Column if Warner
def prod_check(row):
    if row['title'] in prod_list:
        val = True
    else:
        val = False
    
    return val

dfx[prod_var] = dfx.apply(prod_check, axis=1)

In [466]:
dfx.groupby('production_companies')['revenue'].describe().sort_values('mean', ascending=False)

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
production_companies,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"['IngeniousFilmPartners', 'DuneEntertainment', 'LightstormEntertainment', '20thCenturyFox']",1.0,2.787965e+09,,2.787965e+09,2.787965e+09,2.787965e+09,2.787965e+09,2.787965e+09
"['TruenorthProductions', 'Lucasfilm', 'BadRobot']",1.0,2.068224e+09,,2.068224e+09,2.068224e+09,2.068224e+09,2.068224e+09,2.068224e+09
"['Paramount', 'LightstormEntertainment', '20thCenturyFox']",1.0,1.845034e+09,,1.845034e+09,1.845034e+09,1.845034e+09,1.845034e+09,1.845034e+09
"['FujiTelevisionNetwork', 'AmblinEntertainment', 'LegendaryEntertainment', 'UniversalPictures', 'Dentsu', 'TheKennedy/MarshallCompany']",1.0,1.671713e+09,,1.671713e+09,1.671713e+09,1.671713e+09,1.671713e+09,1.671713e+09
"['AbuDhabiFilmCommission', 'UniversalPictures', 'ChinaFilmCo.', 'OriginalFilm', 'FujiTelevisionNetwork', 'QuébecProductionServicesTaxCredit', 'ColoradoOfficeofFilm', 'Television&Media', 'Dentsu', 'MediaRightsCapital', 'OneRace']",1.0,1.506249e+09,,1.506249e+09,1.506249e+09,1.506249e+09,1.506249e+09,1.506249e+09
"['Lucasfilm', 'WaltDisneyPictures', 'RamBergmanProductions', 'FormulaFilm', 'TruenorthProductions', 'JucumariFilms']",1.0,1.332460e+09,,1.332460e+09,1.332460e+09,1.332460e+09,1.332460e+09,1.332460e+09
"['AmblinEntertainment', 'LegendaryEntertainment', 'UniversalPictures', 'PerfectWorldPictures']",1.0,1.303460e+09,,1.303460e+09,1.303460e+09,1.303460e+09,1.303460e+09,1.303460e+09
"['WaltDisneyPictures', 'Pixar']",1.0,1.241891e+09,,1.241891e+09,1.241891e+09,1.241891e+09,1.241891e+09,1.241891e+09
"['UniversalPictures', 'OriginalFilm', 'FujiEightCompanyLtd.', 'ChinaFilmCo.', 'Dentsu', 'OneRace']",1.0,1.238765e+09,,1.238765e+09,1.238765e+09,1.238765e+09,1.238765e+09,1.238765e+09
"['DCComics', 'DCEntertainment', 'WarnerBros.Pictures', 'TheSafranCompany', 'MadGhostProductions', 'RodeoFX', 'PanoramicPictures']",1.0,1.143689e+09,,1.143689e+09,1.143689e+09,1.143689e+09,1.143689e+09,1.143689e+09


In [457]:
dfx.sort_values('multiple', ascending=False)

Unnamed: 0.1,Unnamed: 0,id,imdb_id,title,original_title,release_date,revenue,budget,runtime,tagline,...,all_production_cos,list_genres,release_year,multiple,Lionsgate,Warner,fox,marvel,paramount,disney
769,769,23827.0,tt1179904,Paranormal Activity,Paranormal Activity,2009-09-25,193355800.0,15000.0,86.0,What Happens When You Sleep?,...,"[BlumhouseProductions, SolanaFilms]","[Horror, Mystery]",2009,12890.386667,False,False,False,False,False,False
1514,1514,491562.0,,Derakoola,دراکولا,2016-09-26,100000000.0,15000.0,,,...,[IranianIndependents],"[Drama, Comedy]",2016,6666.666667,False,False,False,False,False,False
744,744,513434.0,tt7914416,One Cut of the Dead,カメラを止めるな！,2017-11-04,200000000.0,30000.0,96.0,Don't Stop Shooting!,...,"[Panpokopina, ENBUSeminar]","[Comedy, Horror, Drama]",2017,6666.666667,False,False,False,False,False,False
1512,1512,506972.0,,Khaltoor,Khaltoor,2018-02-22,100000000.0,15000.0,90.0,Khaltoor,...,[Pooyafilm],"[Comedy, Crime]",2018,6666.666667,False,False,False,False,False,False
577,577,2667.0,tt0185937,The Blair Witch Project,The Blair Witch Project,1999-07-14,248000000.0,60000.0,81.0,The scariest movie of all time is a true story.,...,"[HaxanFilms, ArtisanEntertainment]","[Horror, Mystery]",1999,4133.333333,False,False,False,False,False,False
1513,1513,506664.0,tt3718222,Yeki Mikhad Bahat Harf Bezane,Yeki Mikhad Bahat Harf Bezane,2012-09-22,100000000.0,100000.0,90.0,Yeki Mikhad Bahat Harf Bezane,...,[IranianIndependents],"[Drama, Family]",2012,1000.000000,False,False,False,False,False,False
1718,1718,9462.0,tt0068935,The Way of the Dragon,猛龍過江,1972-06-01,85000000.0,130000.0,100.0,The Colosseum . . the battleground of Bruce Le...,...,"[OrangeSkyGoldenHarvest, ConcordProductionsInc.]","[Action, Crime]",1972,653.846154,False,False,False,False,False,False
528,528,3170.0,tt0034492,Bambi,Bambi,1942-08-14,267447150.0,858000.0,70.0,A great love story.,...,[WaltDisneyProductions],"[Animation, Drama, Family]",1942,311.709965,False,False,False,False,False,True
1506,1506,9659.0,tt0079501,Mad Max,Mad Max,1979-04-12,100000000.0,400000.0,91.0,The Maximum Force Of The Future,...,"[KennedyMillerProductions, Crossroads, MadMaxF...","[Adventure, Action, Thriller, ScienceFiction]",1979,250.000000,False,False,False,False,False,False
32,32,12092.0,tt0043274,Alice in Wonderland,Alice in Wonderland,1951-07-03,572000000.0,3000000.0,75.0,A world of wonders in One Great Picture,...,[WaltDisneyCompany],"[Animation, Family, Fantasy, Music, Adventure]",1951,190.666667,False,False,False,False,False,True


In [458]:
dfx['production_companies'] = dfx['all_production_cos'].astype(str)

In [467]:
dfx['revenue_m'] = dfx['revenue'] / 1000000
dfx['budget_m'] = dfx['budget'] / 1000000

In [480]:
dfx.groupby('release_year')['multiple'].aggregate([min, max, 'mean']).sort_values('release_year', ascending=False)

Unnamed: 0_level_0,min,max,mean
release_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019,2.251870,12.188263,5.109318
2018,0.732601,6666.666667,113.064904
2017,0.835286,6666.666667,96.924389
2016,0.940613,6666.666667,73.271067
2015,0.855922,19.690012,4.737469
2014,0.906397,39.545794,5.629205
2013,0.350157,29.776209,4.685315
2012,1.104292,1000.000000,21.500988
2011,1.013951,41.140764,4.761866
2010,0.936267,64.672767,5.429821


In [482]:
dfx.keys()

Index(['Unnamed: 0', 'id', 'imdb_id', 'title', 'original_title',
       'release_date', 'revenue', 'budget', 'runtime', 'tagline', 'overview',
       'production_countries', 'vote_average', 'vote_count',
       'all_production_cos', 'list_genres', 'release_year', 'multiple',
       'Lionsgate', 'Warner', 'fox', 'marvel', 'paramount', 'disney',
       'production_companies', 'revenue_m', 'budget_m'],
      dtype='object')

In [511]:
#dfx['production_countries'] = dfx['production_countries'].str.strip('[]').str.replace(' ','').str.replace("'",'')
#dfx['production_countries'] = dfx['production_countries'].str.split(',')

prod_country = 'US'

country_list = []

for i in range(len(dfx)):
    for item in dfx.iloc[i]['production_countries']:
        if prod_country in item:
            country_list.append(dfx.iloc[i]['title'])
            #print(dfx.iloc[i]['production_countries'], '-->', dfx.iloc[i]['title'])

#Create Column if Warner
def country_check(row):
    if row['title'] in country_list:
        val = True
    else:
        val = False
    
    return val

dfx['is_us'] = dfx.apply(country_check, axis=1)

In [513]:
dfx = dfx[dfx['is_us']]

In [514]:
dfx.head()

Unnamed: 0.1,Unnamed: 0,id,imdb_id,title,original_title,release_date,revenue,budget,runtime,tagline,...,Lionsgate,Warner,fox,marvel,paramount,disney,production_companies,revenue_m,budget_m,is_us
0,0,19995.0,tt0499549,Avatar,Avatar,2009-12-10,2787965000.0,237000000.0,162.0,Enter the World of Pandora.,...,False,False,True,False,False,False,"['IngeniousFilmPartners', 'DuneEntertainment',...",2787.965087,237.0,True
1,1,140607.0,tt2488496,Star Wars: The Force Awakens,Star Wars: The Force Awakens,2015-12-15,2068224000.0,245000000.0,136.0,Every generation has a story.,...,False,False,False,False,False,False,"['TruenorthProductions', 'Lucasfilm', 'BadRobot']",2068.223624,245.0,True
2,2,299536.0,tt4154756,Avengers: Infinity War,Avengers: Infinity War,2018-04-25,2046240000.0,300000000.0,149.0,An entire universe. Once and for all.,...,False,False,False,True,False,False,['MarvelStudios'],2046.239637,300.0,True
3,3,597.0,tt0120338,Titanic,Titanic,1997-11-18,1845034000.0,200000000.0,194.0,Nothing on Earth could come between them.,...,False,False,True,False,True,False,"['Paramount', 'LightstormEntertainment', '20th...",1845.034188,200.0,True
5,5,135397.0,tt0369610,Jurassic World,Jurassic World,2015-06-06,1671713000.0,150000000.0,124.0,The park is open.,...,False,False,False,False,False,False,"['FujiTelevisionNetwork', 'AmblinEntertainment...",1671.713208,150.0,True


In [515]:
dfx.shape

(1760, 28)

In [525]:
dfx.groupby('production_companies')['multiple'].aggregate([min, max, 'count', 'mean']).sort_values('count', ascending=False)

Unnamed: 0_level_0,min,max,count,mean
production_companies,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
['Paramount'],2.109684,62.000000,30,10.084207
['MarvelStudios'],1.091414,6.907081,18,4.391042
['ColumbiaPictures'],0.819755,13.282500,17,3.717070
['UniversalPictures'],1.793196,29.473820,14,9.544009
['DreamWorksAnimation'],2.093116,5.151181,13,3.676233
['WaltDisneyPictures'],0.945013,51.460903,13,12.180846
"['WaltDisneyPictures', 'WaltDisneyAnimationStudios']",1.128887,21.944084,11,4.989146
['Pixar'],1.896721,12.451801,11,5.168114
['20thCenturyFox'],1.364705,16.081941,9,7.232175
"['Pixar', 'WaltDisneyPictures']",2.799262,6.863501,8,4.311965
