<a href="https://colab.research.google.com/github/pallavmarch/Web-Scraping-Projects/blob/main/Imdb_WebScraping.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [25]:
import requests
from bs4 import BeautifulSoup
import time
import pandas as pd
from rich.console import Console
from rich.table import Table

In [26]:
imdb_charts = {
    "Top Movies": "https://www.imdb.com/chart/top/",
    "Most Popular Movies": "https://www.imdb.com/chart/moviemeter/?ref_=nv_mv_mpm",
    "Box Office": "https://www.imdb.com/chart/boxoffice/?ref_=nv_ch_cht",
    "Top TV Shows": "https://www.imdb.com/chart/toptv/?ref_=nv_tvv_250",
    "Most Popular TV Shows": "https://www.imdb.com/chart/tvmeter/?ref_=nv_tvv_mptv",
}

HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
    "Accept-Language": "en-US,en;q=0.5",
}

console = Console()
scraped_data = []


def fetch_html(url, max_retries=3):
    """Fetch HTML content with retry mechanism."""
    for attempt in range(max_retries):
        try:
            response = requests.get(url, headers=HEADERS, timeout=10)
            if response.status_code == 200:
                return response.text
            console.print(f"[bold red]Failed to retrieve data (Attempt {attempt+1}/{max_retries}): {response.status_code}[/bold red]")
        except requests.exceptions.RequestException as e:
            console.print(f"[bold red]Request failed (Attempt {attempt+1}/{max_retries}): {e}[/bold red]")
        #time.sleep(2)
    return None

def scrape_imdb_chart(chart_name, url):
    """Scrape and display movies/TV shows from IMDb charts."""
    html = fetch_html(url)
    if not html:
        console.print(f"[bold red]Failed to retrieve {chart_name} after multiple attempts.[/bold red]")
        return

    soup = BeautifulSoup(html, "html.parser")
    movies = soup.find_all("li", class_="ipc-metadata-list-summary-item")

    if not movies:
        console.print(f"[bold red]No movies found for {chart_name}. IMDb might have changed its structure.[/bold red]")
        return


    table = Table(title=f"🎬 {chart_name}", header_style="bold magenta")
    table.add_column("Rank", justify="center", style="cyan")
    table.add_column("Title", justify="left", style="yellow")
    table.add_column("Year", justify="center", style="green")
    table.add_column("IMDb Rating", justify="center", style="blue")


    for i, movie in enumerate(movies[:10]):
        title = movie.find("h3", class_="ipc-title__text")
        year = movie.find("span", class_="cli-title-metadata-item")
        rating = movie.find("span", class_="ipc-rating-star--rating")

        title_text = title.text if title else "N/A"
        year_text = year.text if year else "N/A"
        rating_text = rating.text if rating else "N/A"

        table.add_row(f"[bold]{i+1}[/bold]", title_text, year_text, rating_text)
        scraped_data.append({"Chart": chart_name, "Rank": i+1, "Title": title_text, "Year": year_text, "IMDb Rating": rating_text})


    console.print(table)
    #time.sleep(1.5)


for chart_name, url in imdb_charts.items():
    scrape_imdb_chart(chart_name, url)
