### Imports

In [14]:
import requests
from bs4 import BeautifulSoup

### What do I want?
    Get number of characters for each movie in order to compare budgets.
    Puntuación de IMBD vs others.

### Scrapping step by step

In [15]:
base_url = 'https://www.pixar.com'
endpoint = '/feature-films-launch'
request_url = f'{base_url}{endpoint}'
res = requests.get(request_url)
soup = BeautifulSoup(res.content, 'html.parser')
movies = soup.find_all('div', {'class':'slide'})
movies[0]

<div class="slide" data-animation-role="image" data-type="image">
<div class="margin-wrapper">
<a class="image-slide-anchor content-fit" href="/elemental">
<noscript><img alt="Elemental" src="https://images.squarespace-cdn.com/content/v1/51cdafc4e4b09eb676a64e68/1677261445468-VNTJ4L41VES1YMLW5R9G/elemental.jpg"/></noscript><img alt="Elemental" class="thumb-image" data-image="https://images.squarespace-cdn.com/content/v1/51cdafc4e4b09eb676a64e68/1677261445468-VNTJ4L41VES1YMLW5R9G/elemental.jpg" data-image-dimensions="720x1053" data-image-focal-point="0.5,0.5" data-image-id="63f8fa843cdaf815c13436de" data-load="false" data-src="https://images.squarespace-cdn.com/content/v1/51cdafc4e4b09eb676a64e68/1677261445468-VNTJ4L41VES1YMLW5R9G/elemental.jpg" data-type="image"/>
</a>
<div class="image-slide-title">Elemental</div>
</div>
</div>

In [18]:
link = movies[0].find('a')
link = link.get('href')

'/elemental'

In [23]:
name = movies[0].getText()
name.strip()

'Elemental'

In [32]:
endpoint = '/elemental'
request_url = f'{base_url}{endpoint}'
res = requests.get(request_url)
soup = BeautifulSoup(res.content, 'html.parser')
characters = soup.find('section', {'id':'lightyear_character_main-copy'})
characters
blocks = characters.find_all('h2')
blocks

[<h2 style="white-space:pre-wrap;">Ember Lumen</h2>,
 <h2 style="white-space:pre-wrap;">Wade Ripple</h2>,
 <h2 style="white-space:pre-wrap;">Bernie Lumen</h2>,
 <h2 style="white-space:pre-wrap;">Cinder Lumen</h2>,
 <h2 style="white-space:pre-wrap;">Clod</h2>,
 <h2 style="white-space:pre-wrap;">Brook Ripple</h2>,
 <h2 style="white-space:pre-wrap;">Gale</h2>,
 <h2 style="white-space:pre-wrap;">Fern</h2>]

### Modularization

In [16]:
def get_html(url):
    '''
    Returns de html parsed.
    '''
    res = requests.get(url)
    return BeautifulSoup(res.content, 'html.parser')

In [30]:
def get_characters(m_html):
    '''
    Returns a list of characters.
    '''
    section = m_html.find_all('section')
    characters = section[2].find_all('h2')
    return [i.getText() for i in characters]

def get_movies_dict(movies):
    '''
    Returns a list of dictionaries.
    '''
    movies_dict_list = []
    for m in movies:
        name = m.getText()
        name = name.strip()
        link = m.find('a')
        link = link.get('href')
        request_url = f'{base_url}{link}'
        m_html = get_html(request_url)
        characters = get_characters(m_html)
        movies_dict_list.append({name: characters})
    return movies_dict_list

def get_pixar_movie_list(url):
    soup = get_html(url)
    return soup.find_all('div', {'class':'slide'})


In [31]:
endpoint = '/feature-films-launch'
request_url = f'{base_url}{endpoint}'
movies = get_pixar_movie_list(request_url)
movies_dict = get_movies_dict(movies)
movies_dict

[{'Elemental': ['Ember Lumen',
   'Wade Ripple',
   'Bernie Lumen',
   'Cinder Lumen',
   'Clod',
   'Brook Ripple',
   'Gale',
   'Fern']},
 {'Lightyear': ['The Human Side of Buzz',
   'Buzz Lightyear',
   'Izzy Hawthorne',
   'Sox',
   'Maurice “Mo” Morrison',
   'Darby Steel',
   'Zurg',
   'Alisha Hawthorne',
   'Zyclops',
   'Commander Burnside',
   'Airman DÍaz',
   'I.V.A.N.']},
 {'Turning Red': ['The Inner 13-Year Old',
   'Meilin Lee',
   'Red Panda Mei',
   'Ming Lee',
   'Jin Lee',
   'Miriam Mendelsohn',
   'Priya Mangal',
   'Abby Park',
   '4*Town',
   'Tyler Nguyen-Baker',
   'Grandma',
   'Sun Yee']},
 {'Luca': ['Just Add Water',
   'Luca Paguro',
   'Alberto Scorfano',
   'Giulia Marcovaldo',
   'Ercole Visconti',
   'Massimo Marcovaldo',
   'Daniela Paguro',
   'Lorenzo Paguro',
   'Grandma Paguro',
   'Uncle Ugo',
   'Machiavelli']},
 {'Soul': ['Joe Gardner',
   '22',
   'Dez',
   'Dorothea Williams ',
   'Libba Gardner',
   'Moonwind',
   'The Counselors',
   'Terry