In [1]:
import requests

In [2]:
from bs4 import BeautifulSoup

# Task 1: Scrape Toy Story 3 info from wiki

In [3]:
URL = "https://en.wikipedia.org/wiki/Toy_Story_3"

In [66]:
def movie_info(movie_URL):
    source = requests.get(movie_URL).text
    soup = BeautifulSoup(source, 'lxml')
    
    info_box = soup.find('table', class_="infobox vevent")
    
    info_dict = {}

    title = info_box.find('tr').text
    info_dict['title'] = title

    for row in info_box.find_all('tr'):
        try:
            # get key
            label = row.find('th', class_="infobox-label").get_text(" ", strip=True)
            info_dict[label] = []

            # get value
            data = row.find('td', class_="infobox-data")
            data_list = data.find('div', class_="plainlist")
            # Non-list data
            if data_list is None:
                info_dict[label] = data.get_text(" ", strip=True).replace('\xa0', ' ')
            # list data
            else:
                for info in data_list.find_all('li'):
                    info_text = info.get_text(" ", strip=True).replace('\xa0', ' ')
                    info_dict[label].append(info_text)

        except AttributeError:
            continue
            
    return info_dict

In [67]:
info_dict = movie_info(URL)

In [68]:
info_dict

{'title': 'Toy Story 3',
 'Directed by': 'Lee Unkrich',
 'Produced by': 'Darla K. Anderson',
 'Screenplay by': 'Michael Arndt',
 'Story by': ['John Lasseter', 'Andrew Stanton', 'Lee Unkrich'],
 'Starring': ['Tom Hanks',
  'Tim Allen',
  'Joan Cusack',
  'Don Rickles',
  'Wallace Shawn',
  'John Ratzenberger',
  'Estelle Harris',
  'Ned Beatty',
  'Michael Keaton',
  'Jodi Benson',
  'John Morris'],
 'Music by': 'Randy Newman',
 'Cinematography': ['Jeremy Lasky', 'Kim White'],
 'Edited by': 'Ken Schretzmann',
 'Production companies': ['Walt Disney Pictures', 'Pixar Animation Studios'],
 'Distributed by': 'Walt Disney Studios Motion Pictures',
 'Release date': ['June 12, 2010 ( 2010-06-12 ) ( Taormina Film Fest )',
  'June 18, 2010 ( 2010-06-18 ) (United States)'],
 'Running time': '103 minutes [1]',
 'Country': 'United States',
 'Language': 'English',
 'Budget': '$200 million [1]',
 'Box office': '$1.067 billion [1]'}

# Task 2: Scrape infobox for all movies in List of Disney Films

In [44]:
movie_list_URL = "https://en.wikipedia.org/wiki/List_of_Walt_Disney_Pictures_films"

In [45]:
source1 = requests.get(movie_list_URL).text
soup1 = BeautifulSoup(source1, 'lxml')

In [83]:
movie = soup1.find('table', class_='wikitable sortable')

In [84]:
movie_source = movie.find('i').find('a')['href']

In [85]:
movie_source

'/wiki/Academy_Award_Review_of_Walt_Disney_Cartoons'

In [86]:
movie_info("https://en.wikipedia.org" + movie_source)

{'title': 'Academy Award Review of Walt Disney Cartoons',
 'Production company': 'Walt Disney Productions',
 'Release date': ['May 19, 1937 ( 1937-05-19 )'],
 'Running time': '41 minutes (74 minutes 1966 release)',
 'Country': 'United States',
 'Language': 'English',
 'Box office': '$45.472'}

In [79]:
movies_info_list = []

In [87]:
for movie_table in soup1.find_all('table', class_='wikitable sortable'):
    for movie in movie_table.find_all('i'):
        movie_title = movie.find('a')
        if movie_title is None:
            continue
        movie_source = movie_title['href']
        movie_URL = "https://en.wikipedia.org" + movie_source
        print(movie_URL)
        info_dict = movie_info(movie_URL)
        movies_info_list.append(info_dict)

https://en.wikipedia.org/wiki/Academy_Award_Review_of_Walt_Disney_Cartoons
https://en.wikipedia.org/wiki/Snow_White_and_the_Seven_Dwarfs_(1937_film)
https://en.wikipedia.org/wiki/Pinocchio_(1940_film)
https://en.wikipedia.org/wiki/Fantasia_(1940_film)
https://en.wikipedia.org/wiki/The_Reluctant_Dragon_(1941_film)
https://en.wikipedia.org/wiki/Dumbo
https://en.wikipedia.org/wiki/Bambi
https://en.wikipedia.org/wiki/Saludos_Amigos
https://en.wikipedia.org/wiki/Victory_Through_Air_Power_(film)
https://en.wikipedia.org/wiki/The_Three_Caballeros
https://en.wikipedia.org/wiki/Make_Mine_Music
https://en.wikipedia.org/wiki/Song_of_the_South
https://en.wikipedia.org/wiki/Fun_and_Fancy_Free
https://en.wikipedia.org/wiki/Melody_Time
https://en.wikipedia.org/wiki/So_Dear_to_My_Heart
https://en.wikipedia.org/wiki/The_Adventures_of_Ichabod_and_Mr._Toad
https://en.wikipedia.org/wiki/Cinderella_(1950_film)
https://en.wikipedia.org/wiki/Treasure_Island_(1950_film)
https://en.wikipedia.org/wiki/Alice_in_

AttributeError: 'NoneType' object has no attribute 'find'

In [88]:
movies_info_list

[{'title': 'Academy Award Review of Walt Disney Cartoons',
  'Production company': 'Walt Disney Productions',
  'Release date': ['May 19, 1937 ( 1937-05-19 )'],
  'Running time': '41 minutes (74 minutes 1966 release)',
  'Country': 'United States',
  'Language': 'English',
  'Box office': '$45.472'},
 {'title': 'Snow White and the Seven Dwarfs',
  'Directed by': ['David Hand (supervising)',
   'William Cottrell',
   'Wilfred Jackson',
   'Larry Morey',
   'Perce Pearce',
   'Ben Sharpsteen'],
  'Produced by': 'Walt Disney',
  'Written by': ['Ted Sears',
   'Richard Creedon',
   'Otto Englander',
   'Dick Rickard',
   'Earl Hurd',
   'Merrill De Maris',
   'Dorothy Ann Blank',
   'Webb Smith'],
  'Based on': 'Snow White by The Brothers Grimm',
  'Starring': ['Adriana Caselotti',
   'Lucille La Verne',
   'Harry Stockwell',
   'Roy Atwell',
   'Pinto Colvig',
   'Otis Harlan',
   'Scotty Mattraw',
   'Billy Gilbert',
   'Eddie Collins',
   'Moroni Olsen',
   'Stuart Buchanan'],
  'Music 