In [37]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

# Set the URL and headers
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.162 Safari/537.36'
}
url = "https://www.imdb.com/calendar/?ref_=login"

# Send the GET request
response = requests.get(url, headers=headers)

# Check if the request was successful
if response.status_code == 200:
    webpage = response.text
    # Parse the HTML content using BeautifulSoup
    soup = BeautifulSoup(webpage, "lxml")
    
    # Find all divs with the specified class
    elements = soup.find_all("div", class_="ipc-title ipc-title--base ipc-title--title ipc-title--on-textPrimary")
    elements_1 = soup.find_all("a", class_="ipc-metadata-list-summary-item__t")
    list_items_1 = soup.find_all("ul", class_="ipc-inline-list ipc-inline-list--show-dividers ipc-inline-list--no-wrap ipc-inline-list--inline ipc-metadata-list-summary-item__tl base")
    list_items_2 = soup.find_all("ul", class_="ipc-inline-list ipc-inline-list--show-dividers ipc-inline-list--no-wrap ipc-inline-list--inline ipc-metadata-list-summary-item__stl base")
    
    # Initialize lists
    dates = []
    names = []
    descriptions_1 = []
    descriptions_2 = []

    # Extract dates
    for element in elements:
        dates.append(element.text.strip())

    # Extract names
    for element in elements_1:
        names.append(element.text.strip())

    # Extract first set of descriptions
    for ul in list_items_1:
        description = " ".join(li.text.strip() for li in ul.find_all("li", class_="ipc-inline-list__item"))
        descriptions_1.append(description)

    # Extract second set of descriptions
    for ul in list_items_2:
        description = " ".join(li.text.strip() for li in ul.find_all("li", class_="ipc-inline-list__item"))
        descriptions_2.append(description)

    # Determine the length of the shortest list
    min_length = min(len(dates), len(names), len(descriptions_1), len(descriptions_2))

    # Truncate lists to the length of the shortest list
    dates = dates[:min_length]
    names = names[:min_length]
    descriptions_1 = descriptions_1[:min_length]
    descriptions_2 = descriptions_2[:min_length]

    # Create a DataFrame
    data = pd.DataFrame({
        'Date': dates,
        'Name': names,
        'Description_1': descriptions_1,
        'Description_2': descriptions_2
    })
    
    # Save the DataFrame to a CSV file
    data.to_csv('imdb_calendar.csv', index=False)
    
    print("Data saved to imdb_calendar.csv")
    
else:
    print(f"Failed to retrieve the webpage. Status code: {response.status_code}")


Data saved to imdb_calendar.csv
