In [199]:
import requests
import pandas as pd
import json
import matplotlib.pyplot as plt
import time
from config import api_key
import pprint as pp

In [91]:
base_url = 'https://api.themoviedb.org/3/discover/movie?api_key='

In [92]:
#url string to pass to requests
url = f"{base_url}{api_key}"

#parameters (queries) to pass to API Call
language = 'en-US'
sort_by = 'revenue.desc'
primary_release_year = None
_pages = 100

#function to make a single API call for page 'p'
def make_request(p):
    
    payload = {'language': language,
               'sort_by': sort_by,
               'primary_release_year': primary_release_year,
               'page': p
          }
    
    r = requests.get(url, params=payload)
    
    return r.json()

In [21]:
#function to make 'n' api calls
def make_api_calls(n):
    
    results = []
    
    for i in range(1, n+1):
        
        results.append(make_request(i))
        time.sleep(1)
        
    return results

x = make_api_calls(_pages)

In [23]:
#function to add all movie ids from your disover query to a list
def extract_ids(data):
    
    list_ids = []
    
    for page in data:
        
        for result in page['results']:
            list_ids.append(result['id'])

    return list_ids

list_ids = extract_ids(x)

In [25]:
#function to query single film using list of ids
query_url = 'https://api.themoviedb.org/3/movie/'

def query_movie(list_ids):
    
    results = []
    
    payload = {}
    
    for id in list_ids:
        
        r = requests.get(f'{query_url}{id}?api_key={api_key}', params=payload)
        results.append(r.json())
        time.sleep(1)
    
    return results

_results = query_movie(list_ids)

In [36]:
len(_results)

2000

In [35]:
_results[0].keys()

dict_keys(['adult', 'backdrop_path', 'belongs_to_collection', 'budget', 'genres', 'homepage', 'id', 'imdb_id', 'original_language', 'original_title', 'overview', 'popularity', 'poster_path', 'production_companies', 'production_countries', 'release_date', 'revenue', 'runtime', 'spoken_languages', 'status', 'tagline', 'title', 'video', 'vote_average', 'vote_count'])

In [245]:
#Create DataFrame
df = pd.DataFrame.from_dict(_results)

In [246]:
#Drop Unnecessary Columns
df = df.drop(columns=['adult', 'backdrop_path', 'belongs_to_collection', 'homepage','poster_path', 'spoken_languages', 'status', 'status_code', 'status_message', 'video'])

In [247]:
df.head(5)

Unnamed: 0,budget,genres,id,imdb_id,original_language,original_title,overview,popularity,production_companies,production_countries,release_date,revenue,runtime,tagline,title,vote_average,vote_count
0,237000000.0,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",19995.0,tt0499549,en,Avatar,"In the 22nd century, a paraplegic Marine is di...",28.695,"[{'id': 289, 'logo_path': None, 'name': 'Ingen...","[{'iso_3166_1': 'US', 'name': 'United States o...",2009-12-10,2787965000.0,162.0,Enter the World of Pandora.,Avatar,7.4,18058.0
1,245000000.0,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",140607.0,tt2488496,en,Star Wars: The Force Awakens,Thirty years after defeating the Galactic Empi...,31.553,"[{'id': 1634, 'logo_path': None, 'name': 'True...","[{'iso_3166_1': 'US', 'name': 'United States o...",2015-12-15,2068224000.0,136.0,Every generation has a story.,Star Wars: The Force Awakens,7.4,12278.0
2,300000000.0,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...",299536.0,tt4154756,en,Avengers: Infinity War,As the Avengers and their allies have continue...,137.906,"[{'id': 420, 'logo_path': '/hUzeosd33nzE5MCNsZ...","[{'iso_3166_1': 'US', 'name': 'United States o...",2018-04-25,2046240000.0,149.0,An entire universe. Once and for all.,Avengers: Infinity War,8.3,12288.0
3,200000000.0,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...",597.0,tt0120338,en,Titanic,101-year-old Rose DeWitt Bukater tells the sto...,23.86,"[{'id': 4, 'logo_path': '/fycMZt242LVjagMByZOL...","[{'iso_3166_1': 'US', 'name': 'United States o...",1997-11-18,1845034000.0,194.0,Nothing on Earth could come between them.,Titanic,7.8,13630.0
4,61.0,[],570421.0,,en,Star Wars: The Original Trilogy,"Years later, the original Star Wars trilogy is...",0.6,[],[],,1788879000.0,377.0,,Star Wars: The Original Trilogy,8.5,1.0


In [249]:
#Rearrange the Columns
df = df[['id', 'imdb_id', 'title', 'original_title', 'release_date', 'genres',
         'revenue', 'budget', 'runtime', 'tagline', 'overview', 'production_companies', 'production_countries', 'vote_average', 'vote_count' ]]

In [251]:
#Cast release_date as datetime
df['release_date'] = pd.to_datetime(df['release_date'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [252]:
#Make a Copy of the DataFrame to work on
dfx = df.copy(deep=True)
dfx.shape

(2000, 15)

In [266]:
#Create a dictionary with a list of the production companies
co_dict = {}
for i in range(len(dfx)):
    prod_list = []
    if isinstance(dfx.iloc[i]['production_companies'], list):
        for j in range(len(dfx.iloc[i]['production_companies'])):
            prod_list.append(dfx.iloc[i]['production_companies'][j]['name'])
    co_dict[dfx.iloc[i]['title']] = prod_list

In [254]:
df_co = pd.DataFrame([co_dict]).transpose()

In [255]:
df_co

Unnamed: 0,0
"10,000 BC","[Centropolis Entertainment, Legendary Entertai..."
101 Dalmatians,"[Great Oaks Entertainment, Walt Disney Pictures]"
102 Dalmatians,"[Walt Disney Pictures, Cruella Productions]"
12 Years a Slave,"[New Regency Pictures, Plan B Entertainment, R..."
2 Fast 2 Furious,"[Ardustry Entertainment, Mikona Productions Gm..."
2012,"[Columbia Pictures, Centropolis Entertainment,..."
21 Jump Street,"[Stephen J. Cannell Productions, Columbia Pict..."
22 Jump Street,"[Columbia Pictures, Original Film, Media Right..."
27 Dresses,"[Fox 2000 Pictures, Spyglass Entertainment, Du..."
3 Men and a Baby,"[Touchstone Pictures, Interscope Communication..."


In [256]:
dfx.isna().any().sum()

15

In [258]:
df_co = df_co.reset_index()
df_co.columns = ['title', 'all_production_cos']

In [259]:
#Merge the main dataframe with the list of production companies
dfx = dfx.merge(df_co, on='title')

In [260]:
dfx

Unnamed: 0,id,imdb_id,title,original_title,release_date,genres,revenue,budget,runtime,tagline,overview,production_companies,production_countries,vote_average,vote_count,all_production_cos
0,19995.0,tt0499549,Avatar,Avatar,2009-12-10,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",2.787965e+09,237000000.0,162.0,Enter the World of Pandora.,"In the 22nd century, a paraplegic Marine is di...","[{'id': 289, 'logo_path': None, 'name': 'Ingen...","[{'iso_3166_1': 'US', 'name': 'United States o...",7.4,18058.0,"[Ingenious Film Partners, Dune Entertainment, ..."
1,140607.0,tt2488496,Star Wars: The Force Awakens,Star Wars: The Force Awakens,2015-12-15,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",2.068224e+09,245000000.0,136.0,Every generation has a story.,Thirty years after defeating the Galactic Empi...,"[{'id': 1634, 'logo_path': None, 'name': 'True...","[{'iso_3166_1': 'US', 'name': 'United States o...",7.4,12278.0,"[Truenorth Productions, Lucasfilm, Bad Robot]"
2,299536.0,tt4154756,Avengers: Infinity War,Avengers: Infinity War,2018-04-25,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...",2.046240e+09,300000000.0,149.0,An entire universe. Once and for all.,As the Avengers and their allies have continue...,"[{'id': 420, 'logo_path': '/hUzeosd33nzE5MCNsZ...","[{'iso_3166_1': 'US', 'name': 'United States o...",8.3,12288.0,[Marvel Studios]
3,597.0,tt0120338,Titanic,Titanic,1997-11-18,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...",1.845034e+09,200000000.0,194.0,Nothing on Earth could come between them.,101-year-old Rose DeWitt Bukater tells the sto...,"[{'id': 4, 'logo_path': '/fycMZt242LVjagMByZOL...","[{'iso_3166_1': 'US', 'name': 'United States o...",7.8,13630.0,"[Paramount, Lightstorm Entertainment, 20th Cen..."
4,570421.0,,Star Wars: The Original Trilogy,Star Wars: The Original Trilogy,NaT,[],1.788879e+09,61.0,377.0,,"Years later, the original Star Wars trilogy is...",[],[],8.5,1.0,[]
5,135397.0,tt0369610,Jurassic World,Jurassic World,2015-06-06,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",1.671713e+09,150000000.0,124.0,The park is open.,Twenty-two years after the events of Jurassic ...,"[{'id': 3341, 'logo_path': '/dTG5dXE1kU2mpmL9B...","[{'iso_3166_1': 'US', 'name': 'United States o...",6.6,13638.0,"[Fuji Television Network, Amblin Entertainment..."
6,24428.0,tt0848228,The Avengers,The Avengers,2012-04-25,"[{'id': 878, 'name': 'Science Fiction'}, {'id'...",1.519558e+09,220000000.0,143.0,Some assembly required.,When an unexpected enemy emerges and threatens...,"[{'id': 420, 'logo_path': '/hUzeosd33nzE5MCNsZ...","[{'iso_3166_1': 'US', 'name': 'United States o...",7.6,18555.0,[Marvel Studios]
7,168259.0,tt2820852,Furious 7,Furious 7,2015-04-01,"[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...",1.506249e+09,190000000.0,137.0,Vengeance Hits Home,Deckard Shaw seeks revenge against Dominic Tor...,"[{'id': 87857, 'logo_path': None, 'name': 'Abu...","[{'iso_3166_1': 'US', 'name': 'United States o...",7.3,6314.0,"[Abu Dhabi Film Commission, Universal Pictures..."
8,99861.0,tt2395427,Avengers: Age of Ultron,Avengers: Age of Ultron,2015-04-22,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",1.405404e+09,280000000.0,141.0,A New Age Has Come.,When Tony Stark tries to jumpstart a dormant p...,"[{'id': 420, 'logo_path': '/hUzeosd33nzE5MCNsZ...","[{'iso_3166_1': 'US', 'name': 'United States o...",7.3,12517.0,[Marvel Studios]
9,284054.0,tt1825683,Black Panther,Black Panther,2018-02-13,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",1.346739e+09,200000000.0,134.0,Long live the king.,King T'Challa returns home from America to the...,"[{'id': 420, 'logo_path': '/hUzeosd33nzE5MCNsZ...","[{'iso_3166_1': 'US', 'name': 'United States o...",7.4,11320.0,"[Marvel Studios, Walt Disney Pictures]"


In [261]:
dfx = dfx.drop(columns=['production_companies'])

In [262]:
dfx

Unnamed: 0,id,imdb_id,title,original_title,release_date,genres,revenue,budget,runtime,tagline,overview,production_countries,vote_average,vote_count,all_production_cos
0,19995.0,tt0499549,Avatar,Avatar,2009-12-10,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",2.787965e+09,237000000.0,162.0,Enter the World of Pandora.,"In the 22nd century, a paraplegic Marine is di...","[{'iso_3166_1': 'US', 'name': 'United States o...",7.4,18058.0,"[Ingenious Film Partners, Dune Entertainment, ..."
1,140607.0,tt2488496,Star Wars: The Force Awakens,Star Wars: The Force Awakens,2015-12-15,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",2.068224e+09,245000000.0,136.0,Every generation has a story.,Thirty years after defeating the Galactic Empi...,"[{'iso_3166_1': 'US', 'name': 'United States o...",7.4,12278.0,"[Truenorth Productions, Lucasfilm, Bad Robot]"
2,299536.0,tt4154756,Avengers: Infinity War,Avengers: Infinity War,2018-04-25,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...",2.046240e+09,300000000.0,149.0,An entire universe. Once and for all.,As the Avengers and their allies have continue...,"[{'iso_3166_1': 'US', 'name': 'United States o...",8.3,12288.0,[Marvel Studios]
3,597.0,tt0120338,Titanic,Titanic,1997-11-18,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...",1.845034e+09,200000000.0,194.0,Nothing on Earth could come between them.,101-year-old Rose DeWitt Bukater tells the sto...,"[{'iso_3166_1': 'US', 'name': 'United States o...",7.8,13630.0,"[Paramount, Lightstorm Entertainment, 20th Cen..."
4,570421.0,,Star Wars: The Original Trilogy,Star Wars: The Original Trilogy,NaT,[],1.788879e+09,61.0,377.0,,"Years later, the original Star Wars trilogy is...",[],8.5,1.0,[]
5,135397.0,tt0369610,Jurassic World,Jurassic World,2015-06-06,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",1.671713e+09,150000000.0,124.0,The park is open.,Twenty-two years after the events of Jurassic ...,"[{'iso_3166_1': 'US', 'name': 'United States o...",6.6,13638.0,"[Fuji Television Network, Amblin Entertainment..."
6,24428.0,tt0848228,The Avengers,The Avengers,2012-04-25,"[{'id': 878, 'name': 'Science Fiction'}, {'id'...",1.519558e+09,220000000.0,143.0,Some assembly required.,When an unexpected enemy emerges and threatens...,"[{'iso_3166_1': 'US', 'name': 'United States o...",7.6,18555.0,[Marvel Studios]
7,168259.0,tt2820852,Furious 7,Furious 7,2015-04-01,"[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...",1.506249e+09,190000000.0,137.0,Vengeance Hits Home,Deckard Shaw seeks revenge against Dominic Tor...,"[{'iso_3166_1': 'US', 'name': 'United States o...",7.3,6314.0,"[Abu Dhabi Film Commission, Universal Pictures..."
8,99861.0,tt2395427,Avengers: Age of Ultron,Avengers: Age of Ultron,2015-04-22,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",1.405404e+09,280000000.0,141.0,A New Age Has Come.,When Tony Stark tries to jumpstart a dormant p...,"[{'iso_3166_1': 'US', 'name': 'United States o...",7.3,12517.0,[Marvel Studios]
9,284054.0,tt1825683,Black Panther,Black Panther,2018-02-13,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",1.346739e+09,200000000.0,134.0,Long live the king.,King T'Challa returns home from America to the...,"[{'iso_3166_1': 'US', 'name': 'United States o...",7.4,11320.0,"[Marvel Studios, Walt Disney Pictures]"


In [268]:
#Create a dictionary with a list of genres for each film
g_dict = {}
for i in range(len(dfx)):
    prod_list = []
    if isinstance(dfx.iloc[i]['genres'], list):
        for j in range(len(dfx.iloc[i]['genres'])):
            prod_list.append(dfx.iloc[i]['genres'][j]['name'])
    g_dict[dfx.iloc[i]['title']] = prod_list

In [270]:
df_g = pd.DataFrame([g_dict]).transpose()

In [272]:
#bring list of genres into primary dataframe
df_g = df_g.reset_index()
df_g.columns = ['title', 'list_genres']
#Merge the main dataframe with the list of production companies
dfx = dfx.merge(df_g, on='title')

In [273]:
dfx

Unnamed: 0,id,imdb_id,title,original_title,release_date,genres,revenue,budget,runtime,tagline,overview,production_countries,vote_average,vote_count,all_production_cos,list_genres
0,19995.0,tt0499549,Avatar,Avatar,2009-12-10,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",2.787965e+09,237000000.0,162.0,Enter the World of Pandora.,"In the 22nd century, a paraplegic Marine is di...","[{'iso_3166_1': 'US', 'name': 'United States o...",7.4,18058.0,"[Ingenious Film Partners, Dune Entertainment, ...","[Action, Adventure, Fantasy, Science Fiction]"
1,140607.0,tt2488496,Star Wars: The Force Awakens,Star Wars: The Force Awakens,2015-12-15,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",2.068224e+09,245000000.0,136.0,Every generation has a story.,Thirty years after defeating the Galactic Empi...,"[{'iso_3166_1': 'US', 'name': 'United States o...",7.4,12278.0,"[Truenorth Productions, Lucasfilm, Bad Robot]","[Action, Adventure, Science Fiction, Fantasy]"
2,299536.0,tt4154756,Avengers: Infinity War,Avengers: Infinity War,2018-04-25,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...",2.046240e+09,300000000.0,149.0,An entire universe. Once and for all.,As the Avengers and their allies have continue...,"[{'iso_3166_1': 'US', 'name': 'United States o...",8.3,12288.0,[Marvel Studios],"[Adventure, Action, Fantasy]"
3,597.0,tt0120338,Titanic,Titanic,1997-11-18,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...",1.845034e+09,200000000.0,194.0,Nothing on Earth could come between them.,101-year-old Rose DeWitt Bukater tells the sto...,"[{'iso_3166_1': 'US', 'name': 'United States o...",7.8,13630.0,"[Paramount, Lightstorm Entertainment, 20th Cen...","[Drama, Romance, Thriller]"
4,570421.0,,Star Wars: The Original Trilogy,Star Wars: The Original Trilogy,NaT,[],1.788879e+09,61.0,377.0,,"Years later, the original Star Wars trilogy is...",[],8.5,1.0,[],[]
5,135397.0,tt0369610,Jurassic World,Jurassic World,2015-06-06,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",1.671713e+09,150000000.0,124.0,The park is open.,Twenty-two years after the events of Jurassic ...,"[{'iso_3166_1': 'US', 'name': 'United States o...",6.6,13638.0,"[Fuji Television Network, Amblin Entertainment...","[Action, Adventure, Science Fiction, Thriller]"
6,24428.0,tt0848228,The Avengers,The Avengers,2012-04-25,"[{'id': 878, 'name': 'Science Fiction'}, {'id'...",1.519558e+09,220000000.0,143.0,Some assembly required.,When an unexpected enemy emerges and threatens...,"[{'iso_3166_1': 'US', 'name': 'United States o...",7.6,18555.0,[Marvel Studios],"[Science Fiction, Action, Adventure]"
7,168259.0,tt2820852,Furious 7,Furious 7,2015-04-01,"[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...",1.506249e+09,190000000.0,137.0,Vengeance Hits Home,Deckard Shaw seeks revenge against Dominic Tor...,"[{'iso_3166_1': 'US', 'name': 'United States o...",7.3,6314.0,"[Abu Dhabi Film Commission, Universal Pictures...","[Action, Crime, Thriller, Drama]"
8,99861.0,tt2395427,Avengers: Age of Ultron,Avengers: Age of Ultron,2015-04-22,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",1.405404e+09,280000000.0,141.0,A New Age Has Come.,When Tony Stark tries to jumpstart a dormant p...,"[{'iso_3166_1': 'US', 'name': 'United States o...",7.3,12517.0,[Marvel Studios],"[Action, Adventure, Science Fiction]"
9,284054.0,tt1825683,Black Panther,Black Panther,2018-02-13,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",1.346739e+09,200000000.0,134.0,Long live the king.,King T'Challa returns home from America to the...,"[{'iso_3166_1': 'US', 'name': 'United States o...",7.4,11320.0,"[Marvel Studios, Walt Disney Pictures]","[Action, Adventure, Fantasy, Science Fiction]"


In [274]:
dfx = dfx.drop(columns=['genres'])

In [297]:
dfx = dfx.dropna(subset=['title'])
dfx.show

(1990, 15)

In [342]:
budget = dfx['budget'].tolist()
revenue = dfx['revenue'].tolist()
runtime = dfx['runtime'].tolist()

In [303]:
list_all_production_cos = []

for i in range(len(dfx)):
    for company in dfx.iloc[i]['all_production_cos']:
        list_all_production_cos.append(company)

In [328]:
from collections import Counter
appearances = Counter(list_all_production_cos)

sorted_appearances = sorted(zip(appearances.values(), appearances.keys()), reverse=True)

sorted_appearances

[(232, 'Warner Bros. Pictures'),
 (227, 'Universal Pictures'),
 (188, 'Columbia Pictures'),
 (176, 'Paramount'),
 (174, '20th Century Fox'),
 (110, 'Walt Disney Pictures'),
 (92, 'New Line Cinema'),
 (74, 'DreamWorks'),
 (68, 'Relativity Media'),
 (67, 'Village Roadshow Pictures'),
 (66, 'Touchstone Pictures'),
 (52, 'Metro-Goldwyn-Mayer'),
 (49, 'Dune Entertainment'),
 (49, 'Amblin Entertainment'),
 (45, 'Regency Enterprises'),
 (39, 'TSG Entertainment'),
 (35, 'Legendary Entertainment'),
 (35, 'DreamWorks Animation'),
 (33, 'Working Title Films'),
 (33, 'Summit Entertainment'),
 (33, 'Miramax'),
 (33, 'Fox 2000 Pictures'),
 (33, 'Canal+'),
 (32, 'Imagine Entertainment'),
 (31, 'Original Film'),
 (29, 'Lionsgate'),
 (28, 'TriStar Pictures'),
 (28, 'StudioCanal'),
 (27, 'Davis Entertainment'),
 (26, 'Screen Gems'),
 (24, 'Silver Pictures'),
 (24, 'RatPac-Dune Entertainment'),
 (24, 'Dimension Films'),
 (23, 'United Artists'),
 (23, 'New Regency Pictures'),
 (22, 'Walt Disney Animation 

In [332]:
min(dfx['release_date'])

Timestamp('1937-12-20 00:00:00')

In [334]:
max(dfx['revenue'])

2787965087.0

In [359]:
dfx['profit_multiple'] = dfx['revenue'] / dfx['budget']

In [368]:

_2000_2010_ = dfx.loc[(dfx['release_date'] > '2000-01-01') & (dfx['release_date'] < '2010-01-01'), ['title', 'release_date', 'revenue', 'budget', 'profit_multiple']]
_2000_2010_.head(25)

Unnamed: 0,title,release_date,revenue,budget,profit_multiple
0,Avatar,2009-12-10,2787965000.0,237000000.0,11.763566
21,The Lord of the Rings: The Return of the King,2003-12-01,1118889000.0,94000000.0,11.903074
26,Pirates of the Caribbean: Dead Man's Chest,2006-06-20,1065660000.0,200000000.0,5.328299
35,The Dark Knight,2008-07-16,1004558000.0,185000000.0,5.430046
38,Harry Potter and the Philosopher's Stone,2001-11-16,976475600.0,125000000.0,7.811804
43,Pirates of the Caribbean: At World's End,2007-05-19,961000000.0,300000000.0,3.203333
47,Finding Nemo,2003-05-30,940335500.0,94000000.0,10.00357
48,Harry Potter and the Order of the Phoenix,2007-06-28,938212700.0,150000000.0,6.254752
49,Harry Potter and the Half-Blood Prince,2009-07-07,933959200.0,250000000.0,3.735837
50,The Lord of the Rings: The Two Towers,2002-12-18,926287400.0,79000000.0,11.725157


In [348]:
dfx.to_csv('dfx_movies.csv')

In [358]:
_2010_2020_ = dfx.loc[(dfx['release_date'] > '2010-01-01') & (dfx['release_date'] < '2020-01-01'), ['title', 'release_date', 'revenue', 'budget']]
_2010_2020_.head(20)

Unnamed: 0,title,release_date,revenue,budget
1,Star Wars: The Force Awakens,2015-12-15,2068224000.0,245000000.0
2,Avengers: Infinity War,2018-04-25,2046240000.0,300000000.0
5,Jurassic World,2015-06-06,1671713000.0,150000000.0
6,The Avengers,2012-04-25,1519558000.0,220000000.0
7,Furious 7,2015-04-01,1506249000.0,190000000.0
8,Avengers: Age of Ultron,2015-04-22,1405404000.0,280000000.0
9,Black Panther,2018-02-13,1346739000.0,200000000.0
10,Harry Potter and the Deathly Hallows: Part 2,2011-07-07,1342000000.0,125000000.0
11,Star Wars: The Last Jedi,2017-12-13,1332460000.0,200000000.0
12,Jurassic World: Fallen Kingdom,2018-06-06,1303460000.0,170000000.0


In [375]:
page = 1
adder = 100

from bs4 import BeautifulSoup

def build_url(page):
    host = 'https://www.the-numbers.com/movie/budgets/all/'
    return f"{host}{page}"

def scrape():
    #calls url function and parses results into Beautiful Soup
    r = requests.get(build_url(page))
    c = r.content
    soup = BeautifulSoup(c, 'html.parser')

    return soup

x = scrape()


In [487]:
movie_list = []

def parse_scrape(soup_object):

    table = x.find( "table")
    movie_data = table.find_all('td', class_ = 'data')

    titles = []


    for i in table.find_all('b'):
        titles.append(i.get_text())

    i = 0
    for row in range(int(len(movie_data)/4)):
        movie_list.append({titles[row]: {'budget': movie_data[row+i+1].get_text(), 'dom_gross': movie_data[row+i+2].get_text(), 'ww_gross': movie_data[row+i+3].get_text()}})
        i += 3


def loop_through(loops):
    

[{'Avatar': {'budget': '$425,000,000',
   'dom_gross': '$760,507,625',
   'ww_gross': '$2,776,345,279'}},
 {'Pirates of the Caribbean: On Stranger Tides': {'budget': '$410,600,000',
   'dom_gross': '$241,063,875',
   'ww_gross': '$1,045,663,875'}},
 {'Avengers: Age of Ultron': {'budget': '$330,600,000',
   'dom_gross': '$459,005,868',
   'ww_gross': '$1,403,013,963'}},
 {'Star Wars Ep. VIII: The Last Jedi': {'budget': '$317,000,000',
   'dom_gross': '$620,181,382',
   'ww_gross': '$1,316,721,747'}},
 {'Star Wars Ep. VII: The Force Awakens': {'budget': '$306,000,000',
   'dom_gross': '$936,662,225',
   'ww_gross': '$2,053,311,220'}},
 {'Avengers: Infinity War': {'budget': '$300,000,000',
   'dom_gross': '$678,815,482',
   'ww_gross': '$2,048,797,682'}},
 {'Pirates of the Caribbean: At Worldâ\x80\x99s End': {'budget': '$300,000,000',
   'dom_gross': '$309,420,425',
   'ww_gross': '$963,420,425'}},
 {'Justice League': {'budget': '$300,000,000',
   'dom_gross': '$229,024,295',
   'ww_gross