In [1]:
# import libraries
from bs4 import BeautifulSoup
import requests
import openpyxl

In [2]:
# create an excel file
excel = openpyxl.Workbook()

# identify active sheet
sheet = excel.active

# change sheet name
sheet.title = 'Top 250 Rated Movies'

# create a row(header) to store values
sheet.append(['Rank', 'Title','Release Year', 'IMDB Rating'])

In [3]:
try:
    #access website
    source = requests.get('https://www.imdb.com/chart/top/')

    # check if url valid
    source.raise_for_status()
    
    # take the html content of the website as string
    soup = BeautifulSoup(source.text,'html.parser')
    #print(soup)
    
    # find the tag(tbody) and the class(lister_list) that holds the list
    #movies = soup.find('tbody', class_ = 'lister-list')
    #print(movies)
    
    # find the tag(tbody) and the class(lister_list) that holds the tag(tr) of all the rows of the list 
    movies = soup.find('tbody', class_ = 'lister-list').find_all('tr')
    #print(len(movies)) #check how many tag is there - supposedly 250
    
    for movie in movies:
        # find the tag(td) and the class(titleColumn), then the text in the tag(a) for the movie title
        title = movie.find('td', class_ = 'titleColumn').a.text
        #print(title) - type:string
        
        # find the tag(td) and the class(titleColumn), then the text in it, then taking the first index after splitting at full stop
        rank = movie.find('td', class_ = 'titleColumn').get_text(strip=True).split('.')[0]
        #print(rank) - type:string
        
        # find the tag(td) and the class(titleColumn), then the text in the tag(span) for the movie's year while omitting the parenthesis('()')
        year = movie.find('td', class_ = 'titleColumn').span.text.strip('()')
        #print(year) - type:string
        
        # find the tag(td) and the class(ratingColumn), then the text in the tag(strong) for the movie's rating
        rating = movie.find('td', class_ = 'ratingColumn imdbRating').strong.text
        #print(rating) - type:string
        
        #print(rank, title, year, rating)
        sheet.append([rank, title, year, rating])

except Exception as e:
    print(e)

In [4]:
# save excel file
excel.save('IMDB Top 250 Movies.xlsx')