In [49]:
import requests
from bs4 import BeautifulSoup

In [50]:
def extract_movie_data(div):
    movie = {}
    
    try:
        movie['title'] = div.find('a', class_='meta-title-link').text.strip()
    except AttributeError:
        movie['title'] = 'N/A'
    
    try:
        movie['release_date'] = div.find('span', class_='date').text.strip()
    except AttributeError:
        movie['release_date'] = 'N/A'
    
    try:
        movie['genre'] = div.find('span', class_='dark-grey-link').text.strip()
    except AttributeError:
        movie['genre'] = 'N/A'
    
    movie['nationalities'] = [span.text.strip() for span in div.find_all('span', class_='nationality')]
    
    try:
        movie['director'] = div.find('div', class_='meta-body-direction').find('span', class_='dark-grey-link').text.strip()
    except AttributeError:
        movie['director'] = 'N/A'
    
    try:
        actors_div = div.find('div', class_='meta-body-item meta-body-actor')
        if actors_div:
            actors_spans = actors_div.find_all('span', class_='dark-grey-link')
            movie['actors'] = [span.text.strip() for span in actors_spans]
        else:
            movie['actors'] = []
    except AttributeError:
        movie['actors'] = []
 
    try:
        press_rating_elem = div.find('span', text=' Presse ')
        movie['press_rating'] = press_rating_elem.find_next('span', class_='stareval-note').text.strip() if press_rating_elem else 'N/A'
    except AttributeError:
        movie['press_rating'] = 'N/A'
    
    try:
        spectator_rating_elem = div.find('span', text=' Spectateurs ')
        movie['spectator_rating'] = spectator_rating_elem.find_next('span', class_='stareval-note').text.strip() if spectator_rating_elem else 'N/A'
    except AttributeError:
        movie['spectator_rating'] = 'N/A'
    
    try:
        movie['synopsis'] = div.find('div', class_='content-txt').text.strip()
    except AttributeError:
        movie['synopsis'] = 'N/A'
    
    movie['showtimes'] = [span.find('span', class_='showtimes-hour-item-value').text.strip() 
                          for span in div.find_all('span', class_='showtimes-hour-item') if span.find('span', class_='showtimes-hour-item-value')]
    
    return movie


In [51]:
url = "https://www.allocine.fr/seance/salle_gen_csalle=B0045.html"

response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
# Use the function
movie_divs = soup.find_all('div', class_='card entity-card entity-card-list movie-card-theater cf hred')
movies = [extract_movie_data(div) for div in movie_divs]

  press_rating_elem = div.find('span', text=' Presse ')
  spectator_rating_elem = div.find('span', text=' Spectateurs ')


In [53]:
movies

[{'title': 'Le Robot Sauvage',
  'release_date': '9 octobre 2024',
  'genre': 'Aventure',
  'nationalities': ['U.S.A.'],
  'director': 'Chris Sanders',
  'actors': ["Lupita Nyong'o", 'Pedro Pascal', 'Kit Connor'],
  'press_rating': 'N/A',
  'spectator_rating': 'N/A',
  'synopsis': "Après avoir fait naufrage sur une île déserte, un robot doit apprendre à s'adapter à un environnement hostile en nouant petit à petit des relations avec les animaux de l'île.",
  'showtimes': ['16:00']},
 {'title': 'All We Imagine as Light',
  'release_date': '2 octobre 2024',
  'genre': 'Drame',
  'nationalities': ['France', 'Inde', 'Luxembourg', 'Pays-Bas'],
  'director': 'Payal Kapadia',
  'actors': ['Kani Kusruti', 'Divya Prabha', 'Chhaya Kadam'],
  'press_rating': 'N/A',
  'spectator_rating': 'N/A',
  'synopsis': "Sans nouvelles de son mari depuis des années, Prabha, infirmière à Mumbai, s'interdit toute vie sentimentale. De son côté, Anu, sa jeune colocataire, fréquente en cachette un jeune homme qu’el