# Top 250 IMDB Movies

In [13]:
import pandas as pd 
import requests
from bs4 import BeautifulSoup

url = 'https://www.imdb.com/chart/top/'

#As the server refuse to authorize request so created user header
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}

response = requests.get(url, headers=headers)

if response.status_code == 200:
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Find the movie containers
    containers = soup.find_all('li', class_ = 'ipc-metadata-list-summary-item')
    
    # Lists to store Movie Title, Stars, and Ratings, Release Year
    movie_title = []
    movie_stars = []
    movie_ratings = []
    release_year = []
    movie_duration = []
    movie_url = []
    
    # Extract Movie information
    for container in containers:
        #Extracting movie title
        title = container.find('h3', class_ = 'ipc-title__text').text.strip().split('.')[1]
        movie_title.append(title)

        #Extracting movie stars
        stars = container.find('span', class_ = 'sc-479faa3c-1 iMRvgp').text.strip()[0:3]
        movie_stars.append(stars)

        #Extracting moview ratings
        ratings = container.find('span', class_ = 'ipc-rating-star--voteCount').text.strip().replace('(','').replace(')','').replace('K', '000').replace('M', '00000').replace('.','')
        movie_ratings.append(ratings)

        #Extracting movie release year
        year = container.find('span', class_ = 'sc-479faa3c-8 bNrEFi cli-title-metadata-item').text
        release_year.append(year)

        #Extracting movie duration 
        duration = container.find_all("span",class_= "sc-479faa3c-8 bNrEFi cli-title-metadata-item")[1].text
        movie_duration.append(duration)

        #Extracting movieURL 
        url = container.find('a', class_ = 'ipc-title-link-wrapper').get('href')
        domain = 'https://www.imdb.com'
        url_link = domain + url
        movie_url.append(url_link)
        
        
    df= {"movie_title": movie_title,
         "movie_stars": movie_stars,
         "movie_ratings": movie_ratings,
         "release_year": release_year,
         "movie_duration": movie_duration,
         "movie_url": movie_url
        }

    data = pd.DataFrame(df)
    data.to_csv("IMBD_Data.csv", index = False)


else:
    print(f"Error: {response.status_code}")