## Get movie info box(store in Python dictionary)
#### Import necessary libraries

In [2]:
from bs4 import BeautifulSoup as bs
import requests

#### Load the webpage

In [None]:
r = requests.get("https://en.wikipedia.org/wiki/Toy_Story_3")

#Convert to a beautiful soup object
soup = bs(r.content)

#Print out the html
content = soup.prettify()
print(content)

#### Grab only the toy story 3 movie info box

In [None]:
info_box = soup.find(class_="infobox vevent")
#print(info_box.prettify())
info_box_rows = info_box.find_all("tr")
for row in info_box_rows:
    print(row.prettify())

#### Toy story 3 movie data cleanup


In [None]:
def get_content_value(row_data):
    if row_data.find("li"):
        return [li.get_text(" ", strip=True).replace("\xa0", " ") for li in row_data.find_all("li")]
    else:
        return row_data.get_text(" ", strip=True).replace("\xa0", " ")

movie_info = {}
for index, row in enumerate(info_box_rows): #enumerate allow us to get both the index and the row at the same time
    if index == 0:
        movie_info['title'] = row.find('th').get_text(" ", strip=True)
    elif index == 1:
        continue
    else:
        content_key = row.find("th").get_text(" ", strip=True)
        content_value = get_content_value(row.find("td"))
        movie_info[content_key] = content_value
        
movie_info         

## Task 2: Get info Box for all movie

In [None]:
r = requests.get("https://en.wikipedia.org/wiki/List_of_Walt_Disney_Pictures_films")

#convert to a beautiful soup object
soup = bs(r.content)

#print out the html
content = soup.prettify()
print(content)

In [None]:
movies = soup.select(".wikitable.sortable i")
movies[0:10]
movies[0]
#movies[0].a['href']
#movies[0].a['title']

In [49]:
def get_content_value(row_data):
    if row_data.find("li"):
        return [li.get_text(" ", strip=True).replace("\xa0", " ") for li in row_data.find_all("li")]
    elif row_data.find("br"):
        return [text for text in row_data.stripped_strings]
    else:
        return row_data.get_text(" ", strip=True).replace("\xa0", " ")
    
#Clean up references (remove[1][2], tags etc.)
def clean_tags(soup):
    for tag in soup.find_all(["sup", "span"]):
        tag.decompose()    
    
    
def get_info_box(url):    
    r = requests.get(url)
    soup = bs(r.content)
    info_box = soup.find(class_="infobox vevent")
    info_box_rows = info_box.find_all("tr")
    
    clean_tags(soup)
    
    movie_info = {}
    for index, row in enumerate(info_box_rows): #enumerate allow us to get both the index and the row at the same time
        if index == 0:
            movie_info['title'] = row.find('th').get_text(" ", strip=True)
        else:
            header = row.find('th')
            if header:
                content_key = row.find("th").get_text(" ", strip=True)
                content_value = get_content_value(row.find("td"))
                movie_info[content_key] = content_value
    return movie_info  


In [50]:
get_info_box("https://en.wikipedia.org/wiki/One_Little_Indian_(film)")

{'title': 'One Little Indian',
 'Directed by': 'Bernard McEveety',
 'Written by': 'Harry Spalding',
 'Produced by': 'Winston Hibler',
 'Starring': ['James Garner',
  'Vera Miles',
  'Pat Hingle',
  'Morgan Woodward',
  'Jodie Foster'],
 'Cinematography': 'Charles F. Wheeler',
 'Edited by': 'Robert Stafford',
 'Music by': 'Jerry Goldsmith',
 'Production company': 'Walt Disney Productions',
 'Distributed by': 'Buena Vista Distribution',
 'Release date': ['June 20, 1973'],
 'Running time': '90 Minutes',
 'Country': 'United States',
 'Language': 'English',
 'Box office': '$2 million'}

In [51]:
r = requests.get("https://en.wikipedia.org/wiki/List_of_Walt_Disney_Pictures_films")
soup = bs(r.content)
movies = soup.select(".wikitable.sortable i a")

base_path = "https://www.wikipedia.org/"

movie_info_list = []
for index, movie in enumerate(movies):
    if index % 10 == 0:
        print(index)
    try:
        relative_path = movie['href']
        full_path = base_path + relative_path
        title = movie['title']
        
        movie_info_list.append(get_info_box(full_path))
        
    except Exception as e:
        print(movie.get_text())
        print(e)


0
10
20
30
40
Zorro the Avenger
'NoneType' object has no attribute 'find'
The Sign of Zorro
'NoneType' object has no attribute 'find'
50
60
70
80
90
100
110
120
True-Life Adventures
'NoneType' object has no attribute 'find_all'
130
140
The London Connection
'NoneType' object has no attribute 'find'
150
160
170
180
190
200
210
220
230
240
250
260
270
280
290
300
310
320
330
340
350
360
370
380
390
400
410
420
430
440
450
460
470
480
490
500
Sister Act 3
'NoneType' object has no attribute 'find'
Tower of Terror
'NoneType' object has no attribute 'find_all'
Tron: Ares
'NoneType' object has no attribute 'find'
61
'NoneType' object has no attribute 'find_all'
510
Keeper of the Lost Cities
'NoneType' object has no attribute 'find_all'
Muppet Man
'NoneType' object has no attribute 'find_all'
Grimm
'NoneType' object has no attribute 'find_all'
520
The Paper Magician
'NoneType' object has no attribute 'find_all'
The Thief
'NoneType' object has no attribute 'find_all'
Tom Sawyer
'NoneType' objec

In [10]:
len(movie_info_list)

502

#### Save/Reload movie data

In [52]:
import json

def save_data(title, data):
    with open(title, 'w', encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=2)

In [55]:
import json

def load_data(title):
    with open(title, encoding="utf-8") as f:
        return json.load(f)

In [54]:
save_data("disney_data_cleaned.json", movie_info_list)

#### Task #3: Clean the Data            

In [115]:
movie_info_list = load_data("disney_data_cleaned.json")

#### Clean our data
- ~~Clean up referneces [1]~~
- ~~Convert running time into an integer~~
- Convert dates into datetime object
- ~~Split up the long strings~~
- ~~Convert budget and Box office to numbers~~



In [116]:
#Convert running time into an integer
movie_info_list[-11]

{'title': 'Bambi',
 'Directed by': ['Supervising director',
  'David Hand',
  'Sequence directors',
  'James Algar',
  'Samuel Armstrong',
  'Graham Heid',
  'Bill Roberts',
  'Paul Satterfield',
  'Norman Wright'],
 'Story by': ['Story direction',
  'Perce Pearce',
  'Story adaptation',
  'Larry Morey',
  'Story development',
  'Vernon Stallings',
  'Melvin Shaw',
  'Carl Fallberg',
  'Chuck Couch',
  'Ralph Wright'],
 'Based on': ['Bambi, a Life in the Woods', 'by', 'Felix Salten'],
 'Produced by': 'Walt Disney',
 'Starring': 'see below',
 'Music by': ['Frank Churchill', 'Edward H. Plumb'],
 'Production company': 'Walt Disney Productions',
 'Distributed by': 'RKO Radio Pictures',
 'Release date': ['August 9, 1942 (World Premiere – London)',
  'August 13, 1942 (Premiere – New York City)',
  'August 21, 1942 (U.S.)'],
 'Running time': '70 minutes',
 'Country': 'United States',
 'Language': 'English',
 'Budget': '$858,000',
 'Box office': '$267.4 million'}

In [64]:
print([movie.get("Running time", "N/A") for movie in movie_info_list])

['41 minutes (74 minutes 1966 release)', '83 minutes', '88 minutes', '126 minutes', '74 minutes', '64 minutes', '70 minutes', '42 minutes', '70 min', '71 minutes', '75 minutes', '94 minutes', '73 minutes', '75 minutes', '82 minutes', '68 minutes', '74 minutes', '96 minutes', '75 minutes', '84 minutes', '77 minutes', '92 minutes', '69 minutes', '81 minutes', ['60 minutes (VHS version)', '71 minutes (original)'], '127 minutes', '92 minutes', '76 minutes', '75 minutes', '73 minutes', '85 minutes', '81 minutes', '70 minutes', '90 min.', '80 minutes', '75 minutes', '83 minutes', '83 minutes', '72 minutes', '97 minutes', '75 minutes', '104 minutes', '93 minutes', '105 minutes', '95 minutes', '97 minutes', '134 minutes', '69 minutes', '92 minutes', '131 minutes', '79 minutes', '97 minutes', '128 minutes', '74 minutes', '91 minutes', '105 minutes', '98 minutes', '130 minutes', '89 min.', '93 minutes', '67 minutes', '98 minutes', '100 minutes', '118 minutes', '103 minutes', '110 minutes', '80 m

In [117]:
#"85 minutes"
def minute_to_integer(running_time):
    if running_time == "N/A":
        return None
    if isinstance(running_time, list): #is a list
        entry = running_time[0]
        value = int(entry.split(" ")[0])
        return value
    else: #is a string
        value = int(running_time.split(" ")[0])
        return value
#print(minute_to_integer(["85 minutes", "90 minutes"]))

for movie in movie_info_list:
    movie['Running time (int)'] = minutes_to_integer(movie.get('Running time', "N/A"))

In [119]:
#movie_info_list[-11]
print([movie.get("Running time (int)", "N/A") for movie in movie_info_list])

[41, 83, 88, 126, 74, 64, 70, 42, 70, 71, 75, 94, 73, 75, 82, 68, 74, 96, 75, 84, 77, 92, 69, 81, 60, 127, 92, 76, 75, 73, 85, 81, 70, 90, 80, 75, 83, 83, 72, 97, 75, 104, 93, 105, 95, 97, 134, 69, 92, 131, 79, 97, 128, 74, 91, 105, 98, 130, 89, 93, 67, 98, 100, 118, 103, 110, 80, 79, 91, 91, 97, 118, 139, 131, 92, 87, 116, 93, 110, 110, 131, 101, 108, 84, 78, 75, 164, 106, 110, 99, 113, 108, 112, 93, 91, 93, 100, 100, 79, 96, 113, 89, 118, 92, 88, 92, 87, 93, 93, 93, 90, 83, 96, 88, 89, 91, 93, 92, 97, 100, 100, 89, 91, 112, 115, 95, 91, 97, 104, 74, 48, 77, 104, 128, 101, 94, 104, 90, 100, 88, 93, 98, 112, 84, 97, 97, 114, 96, 97, 109, 83, 90, 107, 96, 103, 91, 95, 105, 113, 80, 101, 90, 74, 90, 89, 110, 74, 93, 84, 83, 74, 77, 107, 93, 88, 108, 84, 121, 89, 104, 90, 86, 84, 108, 107, 96, 98, 105, 108, 94, 106, 102, 88, 102, 102, 97, 111, 100, 96, 98, 78, 81, 108, 89, 99, 89, 81, 92, 100, 89, 79, 91, 101, 104, 103, 86, 105, 75, 93, 92, 98, 95, 93, 87, 93, 87, 128, 77, 86, 95, 114, 93

In [125]:
movie_info_list[11]

{'title': 'Song of the South',
 'Directed by': ['Live action:',
  'Harve Foster',
  'Animation:',
  'Wilfred Jackson'],
 'Screenplay by': ['Live action:',
  'Morton Grant',
  'Maurice Rapf',
  'Dalton S. Reymond',
  'Animation:',
  'Bill Peet',
  'George Stallings',
  'Ralph Wright'],
 'Based on': ['Uncle Remus', 'by', 'Joel Chandler Harris'],
 'Produced by': 'Walt Disney',
 'Starring': ['James Baskett',
  'Bobby Driscoll',
  'Luana Patten',
  'Ruth Warrick',
  'Hattie McDaniel'],
 'Cinematography': 'Gregg Toland',
 'Edited by': 'William M. Morgan',
 'Music by': ['Edward Plumb',
  'Daniele Amfitheatrof',
  'Paul J. Smith',
  'Charles Wolcott'],
 'Production company': 'Walt Disney Productions',
 'Distributed by': 'RKO Radio Pictures',
 'Release date': ['November 12, 1946 (Premiere: Atlanta, Georgia)',
  'November 20, 1946',
  'March 30, 1947 (Stanford Theatre, Palo Alto, California)'],
 'Running time': '94 minutes',
 'Country': 'United States',
 'Language': 'English',
 'Budget': '$2.125

In [126]:
print([movie.get('Budget', 'N/A') for movie in movie_info_list])

['N/A', '$1.49 million', '$2.6 million', '$2.28 million', '$600,000', '$950,000', '$858,000', 'N/A', '$788,000', 'N/A', '$1.35 million', '$2.125 million', 'N/A', '$1.5 million', '$1.5 million', 'N/A', '$2.2 million', '$1,800,000', '$3 million', 'N/A', '$4 million', '$2 million', '$300,000', '$1.8 million', 'N/A', '$5 million', 'N/A', '$4 million', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', '$700,000', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', '$6 million', 'under $1 million or $1,250,000', 'N/A', '$2 million', 'N/A', 'N/A', '$2.5 million', 'N/A', 'N/A', '$4 million', '$3.6 million', 'N/A', 'N/A', 'N/A', 'N/A', '$3 million', 'N/A', '$3 million', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', '$3 million', 'N/A', 'N/A', 'N/A', 'N/A', '$4.4–6 million', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', '$4 million', 'N/A', '$5 million', 'N/A', 'N/A', 'N/A', 'N/A', '$5 million', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', '$4 million', 'N/A', 'N/A', 'N/A', '

In [143]:
import re

amounts = r"thousand|million|billion"
number = r"\d+(,\d{3})*\.*\d*"

word_re = rf"\${number}(-|\sto\s)?({number})?\s({amounts})"
value_re = rf"\${number}"


def word_to_value(word):
    value_dict = {"thousand": 1000, "million": 1000000, "billion": 1000000000}
    return value_dict[word]


def parse_word_syntax(string):
    value_string = re.search(number, string).group()
    value = float(value_string.replace(",", ""))
    word = re.search(amounts, string, flags=re.I).group().lower()
    word_value = word_to_value(word)
    return  value*word_value


def parse_value_syntax(string):
    value_string = re.search(number, string).group()
    value = float(value_string.replace(",", ""))
    return value

'''
money_conversion("$12.2 million") --> 12200000 ##the word syntax
money_conversion("$790,000") --> 790000        ##The value Syntax
'''


def money_conversion(money):
    if money == "N/A":
        return None

    if isinstance(money, list):
        money = money[0]

    word_syntax = re.search(word_re, money, flags=re.I)
    value_syntax = re.search(value_re, money)

    if word_syntax:
        return parse_word_syntax(word_syntax.group())
    elif value_syntax:
        return parse_value_syntax(value_syntax.group())
    else:
        return None

#print(money_conversion("$790 million"))

In [145]:
for movie in movie_info_list:
    movie['Budget (float)'] = money_conversion(movie.get('Budget', "N/A"))
    movie['Box office (float)'] = money_conversion(movie.get('Box office', "N/A"))

In [198]:
budgets = [movie.get('Budget', 'N/A') for movie in movie_info_list]
for budget in budgets:
    print(money_conversion(budget))
    
#box_offices = [movie.get('Box office', 'N/A') for movie in movie_info_list]
#for box_office in box_offices:
    #print(money_conversion(box_office))    

None
1490000.0
2600000.0
2280000.0
600000.0
950000.0
858000.0
None
788000.0
None
1350000.0
2125000.0
None
1500000.0
1500000.0
None
2200000.0
1800000.0
3000000.0
None
4000000.0
2000000.0
300000.0
1800000.0
None
5000000.0
None
4000000.0
None
None
None
None
None
None
700000.0
None
None
None
None
None
6000000.0
1000000.0
None
2000000.0
None
None
2500000.0
None
None
4000000.0
3600000.0
None
None
None
None
3000000.0
None
3000000.0
None
None
None
None
None
None
None
None
None
3000000.0
None
None
None
None
4.4
None
None
None
None
None
None
None
None
None
None
None
4000000.0
None
5000000.0
None
None
None
None
5000000.0
None
None
None
None
None
None
4000000.0
None
None
None
6300000.0
None
None
None
None
None
None
None
None
5000000.0
None
None
None
None
8000000.0
None
None
None
None
None
1000000.0
None
None
None
None
5000000.0
None
None
None
7500000.0
None
10000000.0
None
None
3500000.0
None
None
None
5250000.0
20000000.0
None
9000000.0
None
6000000.0
20000000.0
None
None
18000000.0
12000000.0
14

In [180]:
money_conversion(movie_info_list[86]['Budget'])


5000000.0

In [178]:
#convert date into datetimes
print([movie.get('Release date', 'N/A') for movie in movie_info_list])

[['May 19, 1937'], ['December 21, 1937 ( Carthay Circle Theatre )'], ['February 7, 1940 ( Center Theatre )', 'February 23, 1940 (United States)'], ['November 13, 1940'], ['June 27, 1941'], ['October 23, 1941 (New York City)', 'October 31, 1941 (U.S.)'], ['August 9, 1942 (World Premiere – London)', 'August 13, 1942 (Premiere – New York City)', 'August 21, 1942 (U.S.)'], ['August 24, 1942 (World Premiere – Rio de Janeiro)', 'February 6, 1943 (U.S. Premiere – Boston)', 'February 19, 1943 (U.S.)'], ['July 17, 1943'], ['December 21, 1944 (Mexico City)', 'February 3, 1945 (US)'], ['April 20, 1946 (New York City premiere)', 'August 15, 1946 (U.S.)'], ['November 12, 1946 (Premiere: Atlanta, Georgia)', 'November 20, 1946', 'March 30, 1947 (Stanford Theatre, Palo Alto, California)'], ['September 27, 1947'], 'May 27, 1948', ['November 29, 1948 (Chicago, Illinois)', 'January 19, 1949 (Indianapolis, Indiana)'], ['October 5, 1949'], ['February 15, 1950 (Boston)', 'March 4, 1950 (United States)'], ['

In [177]:
movie_info_list[86]

{'title': 'The Happiest Millionaire',
 'Directed by': 'Norman Tokar',
 'Screenplay by': 'A. J. Carothers',
 'Story by': 'A. J. Carothers',
 'Based on': ['My Philadelphia Father', 'by Cordelia Drexel Biddle'],
 'Produced by': ['Walt Disney', 'Bill Anderson'],
 'Starring': ['Fred MacMurray',
  'Tommy Steele',
  'Greer Garson',
  'Gladys Cooper',
  'Geraldine Page',
  'Hermione Baddeley',
  'John Davidson',
  'Lesley Ann Warren'],
 'Cinematography': 'Edward Colman',
 'Edited by': 'Cotton Warburton',
 'Music by': 'Jack Elliott',
 'Production company': 'Walt Disney Productions',
 'Distributed by': 'Buena Vista Distribution',
 'Release date': ['June 23, 1967', 'November 30, 1967'],
 'Running time': ['164 minutes',
  '(',
  'Los Angeles',
  'premiere)',
  '144 minutes',
  '(',
  'New York City',
  'premiere)',
  '118 minutes',
  '(General release)',
  '172 minutes',
  '(',
  "Director's cut",
  ')'],
 'Country': 'United States',
 'Language': 'English',
 'Budget': '$5 million',
 'Box office': 

In [220]:
from datetime import datetime

dates = [movie.get('Release date', 'N/A') for movie in movie_info_list]

def clean_date(date):
    return date.split("(")[0].strip()

def date_conversion(date):
    if isinstance(date, list):
        date = date[0]
     
    if date == 'N/A':
        return None
    
    date_str = clean_date(date)
    #print(date_str)
    
    fmts = ["%B %d, %Y", "%d %B %Y"]
    for fmt in fmts:
        try:
            return datetime.strptime(date_str, fmt)
        except:
            pass
    return None    
    
#for date in dates:
    #print(date_conversion(date))
    #print()    
        
    

In [221]:
for movie in movie_info_list:
    movie['Release date (datetime)'] = date_conversion(movie.get('Release date', 'N/A'))

In [223]:
movie_info_list[86]

{'title': 'The Happiest Millionaire',
 'Directed by': 'Norman Tokar',
 'Screenplay by': 'A. J. Carothers',
 'Story by': 'A. J. Carothers',
 'Based on': ['My Philadelphia Father', 'by Cordelia Drexel Biddle'],
 'Produced by': ['Walt Disney', 'Bill Anderson'],
 'Starring': ['Fred MacMurray',
  'Tommy Steele',
  'Greer Garson',
  'Gladys Cooper',
  'Geraldine Page',
  'Hermione Baddeley',
  'John Davidson',
  'Lesley Ann Warren'],
 'Cinematography': 'Edward Colman',
 'Edited by': 'Cotton Warburton',
 'Music by': 'Jack Elliott',
 'Production company': 'Walt Disney Productions',
 'Distributed by': 'Buena Vista Distribution',
 'Release date': ['June 23, 1967', 'November 30, 1967'],
 'Running time': ['164 minutes',
  '(',
  'Los Angeles',
  'premiere)',
  '144 minutes',
  '(',
  'New York City',
  'premiere)',
  '118 minutes',
  '(General release)',
  '172 minutes',
  '(',
  "Director's cut",
  ')'],
 'Country': 'United States',
 'Language': 'English',
 'Budget': '$5 million',
 'Box office': 

In [224]:
import pickle

def save_data_pickle(name, data):
    with open(name, 'wb') as f:
        pickle.dump(data, f)

In [225]:
import pickle

def load_data_pickle(name):
    with open(name, 'rb') as f:
        return pickle.load(f)

In [226]:
save_data_pickle("disney_movie_data_cleaned_more.pickle", movie_info_list)

In [227]:
a = load_data_pickle("disney_movie_data_cleaned_more.pickle")

In [228]:
a[5]

{'title': 'Dumbo',
 'Directed by': ['Ben Sharpsteen',
  'Norman Ferguson',
  'Wilfred Jackson',
  'Bill Roberts',
  'Jack Kinney',
  'Samuel Armstrong'],
 'Story by': ['Otto Englander', 'Joe Grant', 'Dick Huemer'],
 'Based on': ['Dumbo, the Flying Elephant',
  'by',
  'Helen Aberson',
  'Harold Pearl'],
 'Produced by': 'Walt Disney',
 'Starring': ['Edward Brophy',
  'Verna Felton',
  'Cliff Edwards',
  'Herman Bing',
  'Sterling Holloway',
  'Margaret Wright',
  'Hall Johnson Choir'],
 'Narrated by': 'John McLeish',
 'Music by': ['Frank Churchill', 'Oliver Wallace'],
 'Production company': 'Walt Disney Productions',
 'Distributed by': 'RKO Radio Pictures',
 'Release date': ['October 23, 1941 (New York City)',
  'October 31, 1941 (U.S.)'],
 'Running time': '64 minutes',
 'Country': 'United States',
 'Language': 'English',
 'Budget': '$950,000',
 'Box office': '>$1.3 million (est. United States/Canada rentals, 1941)',
 'Running time (int)': 64,
 'Budget (float)': 950000.0,
 'Box office (

#### Attached IMDB/Rotten Tomatoes/Metascore scores

In [None]:
movie_info_list = load_data_pickle("disney_movie_data_cleaned_more.pickle")

In [229]:
movie_info_list[-60]

{'title': 'Maleficent: Mistress of Evil',
 'Directed by': 'Joachim Rønning',
 'Written by': ['Linda Woolverton', 'Noah Harpster', 'Micah Fitzerman-Blue'],
 'Based on': ["Disney's Sleeping Beauty",
  'La Belle au bois dormant by Charles Perrault'],
 'Produced by': ['Joe Roth', 'Angelina Jolie', 'Duncan Henderson'],
 'Starring': ['Angelina Jolie',
  'Elle Fanning',
  'Chiwetel Ejiofor',
  'Sam Riley',
  'Ed Skrein',
  'Imelda Staunton',
  'Juno Temple',
  'Lesley Manville',
  'Michelle Pfeiffer'],
 'Cinematography': 'Henry Braham',
 'Edited by': ['Laura Jennings', 'Craig Wood'],
 'Music by': 'Geoff Zanelli',
 'Production companies': ['Walt Disney Pictures', 'Roth/Kirschenbaum Films'],
 'Distributed by': ['Walt Disney Studios', 'Motion Pictures'],
 'Release date': ['October 18, 2019'],
 'Running time': '119 minutes',
 'Country': 'United States',
 'Language': 'English',
 'Budget': '$185 million',
 'Box office': '$491.7 million',
 'Running time (int)': 119,
 'Budget (float)': 185000000.0,
 

In [None]:
# http://www.omdbapi.com/?apikey=[yourkey]&

In [300]:
import requests
import urllib
import os

def get_omdb_info(title):
    base_url = "http://www.omdbapi.com/?"
    #parameters = {"apikey": os.environ['OMDB_API_KEY'], 't': title}
    parameters = {"apikey": "84ec181f", 't': title}
    params_encoded = urllib.parse.urlencode(parameters)
    full_url = base_url + params_encoded
    return requests.get(full_url).json()


def get_rotten_tomato_score(omdb_info):
    ratings = omdb_info.get('Ratings', [])
    for rating in ratings:
        #print(rating)
        if rating['Source'] == 'Rotten Tomatoes':
            return rating['Value']
    return None 
    
get_omdb_info('into the woods')
#info
#get_rotten_tomato_score(info)        
    

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [291]:
for movie in movie_info_list:
    title = movie['title']
    #omdb_info = get_omdb_info(title)
    #movie['imdb'] = omdb_info.get("imdbRating", None)
    #movie['metascore'] = omdb_info.get("Metascore", None)
    #movie['rotten_tomatoes'] = get_rotten_tomato_score(omdb_info)

Academy Award Review of
Snow White and the Seven Dwarfs
Pinocchio
Fantasia
The Reluctant Dragon
Dumbo
Bambi
Saludos Amigos
Victory Through Air Power
The Three Caballeros
Make Mine Music
Song of the South
Fun and Fancy Free
Melody Time
So Dear to My Heart
The Adventures of Ichabod and Mr. Toad
Cinderella
Treasure Island
Alice in Wonderland
The Story of Robin Hood
Peter Pan
The Sword and the Rose
The Living Desert
Rob Roy: The Highland Rogue
The Vanishing Prairie
20,000 Leagues Under the Sea
Davy Crockett: King of the Wild Frontier
Lady and the Tramp
The African Lion
The Littlest Outlaw
The Great Locomotive Chase
Davy Crockett and the River Pirates
Secrets of Life
Westward Ho the Wagons!
Johnny Tremain
Perri
Old Yeller
The Light in the Forest
White Wilderness
Tonka
Sleeping Beauty
The Shaggy Dog
Darby O'Gill and the Little People
Third Man on the Mountain
Toby Tyler or 10 Weeks with a Circus
Kidnapped
Pollyanna
Jungle Cat
Ten Who Dared
Swiss Family Robinson
One Hundred and One Dalmatians

In [288]:
movie_info_list[-50]


{'title': 'Artemis Fowl',
 'Directed by': 'Kenneth Branagh',
 'Screenplay by': ['Conor McPherson', 'Hamish McColl'],
 'Based on': ['Artemis Fowl', 'by', 'Eoin Colfer'],
 'Produced by': ['Kenneth Branagh', 'Judy Hofflund'],
 'Starring': ['Ferdia Shaw',
  'Lara McDonnell',
  'Josh Gad',
  'Tamara Smart',
  'Nonso Anozie',
  'Colin Farrell',
  'Judi Dench'],
 'Cinematography': 'Haris Zambarloukos',
 'Edited by': 'Matthew Tucker',
 'Music by': 'Patrick Doyle',
 'Production companies': ['Walt Disney Pictures',
  'TriBeCa Productions',
  'Marzano Films'],
 'Distributed by': 'Disney+',
 'Release date': ['June 12, 2020'],
 'Running time': '95 minutes',
 'Country': 'United States',
 'Language': 'English',
 'Budget': '$125 million',
 'Running time (int)': 95,
 'Budget (float)': 125000000.0,
 'Box office (float)': None,
 'Release date (datetime)': datetime.datetime(2020, 6, 12, 0, 0)}