In [1]:
# For this to work, you need to have all libraries below installed and have chromedriver.exe in the same folder as this file.
from bs4 import BeautifulSoup
import pandas as pd
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as ec
from selenium.webdriver.common.by import By

In [2]:
# Loading in the dynamic page
driver = webdriver.Chrome("chromedriver.exe")
imdb_page = input("Put your IMDB listing here: ").split("/")[4]
driver.get(f'https://www.imdb.com/title/{imdb_page}/reviews?ref_=tt_urv')
html = driver.page_source

# Checking the amount of reviews so we know how many times to press on the 'load more reviews' button
soup = BeautifulSoup(html,"lxml")
page_title = soup.title.string.split(" -")[0]
reviews_per_chunk = 25
nr_of_reviews = int(soup.select(".lister")[0].find("span").get_text().split(" ")[0].replace(",",""))
total_button_presses = round(nr_of_reviews / reviews_per_chunk)
    
print(f"Listing: {page_title}")
if total_button_presses > 0:
    print(f"{nr_of_reviews} total reviews in {total_button_presses} chunks of {reviews_per_chunk} reviews.\n")
else:
    print("No reviews yet...\n")
    
# Clicking the button
for x in range(0, total_button_presses):
    x = x + 1
    load_more = WebDriverWait(driver, 10).until(ec.visibility_of_element_located((By.XPATH, '//*[@id="load-more-trigger"]')));
    load_more.click()
    print(f"Loading all reviews - chunk {x} out of {total_button_presses}")

# Get all page content after all reviews are loaded    
html = driver.page_source
soup = BeautifulSoup(html,'html5lib')

Put your IMDB listing here: https://www.imdb.com/title/tt1677720/
Listing: Ready Player One (2018)
1777 total reviews in 71 chunks of 25 reviews.

Loading all reviews - chunk 1 out of 71
Loading all reviews - chunk 2 out of 71
Loading all reviews - chunk 3 out of 71
Loading all reviews - chunk 4 out of 71
Loading all reviews - chunk 5 out of 71
Loading all reviews - chunk 6 out of 71
Loading all reviews - chunk 7 out of 71
Loading all reviews - chunk 8 out of 71
Loading all reviews - chunk 9 out of 71
Loading all reviews - chunk 10 out of 71
Loading all reviews - chunk 11 out of 71
Loading all reviews - chunk 12 out of 71
Loading all reviews - chunk 13 out of 71
Loading all reviews - chunk 14 out of 71
Loading all reviews - chunk 15 out of 71
Loading all reviews - chunk 16 out of 71
Loading all reviews - chunk 17 out of 71
Loading all reviews - chunk 18 out of 71
Loading all reviews - chunk 19 out of 71
Loading all reviews - chunk 20 out of 71
Loading all reviews - chunk 21 out of 71
L

In [6]:
all_reviews = []

review_blocks = soup.select(".lister-item-content")
for review_block in review_blocks:
    # Review content
    content = review_block.findAll("div",{"class":"text show-more__control clickable"})
    if not content:
        content = review_block.findAll("div",{"class":"text show-more__control"})[0].get_text().strip()
    else:
        content = content[0].get_text().strip()

    # Review Score
    score = review_block.findAll("span",{"class":"rating-other-user-rating"})
    if not score:
        score = "NA"
    else:
        score = score[0].find_all('span')[0].get_text()    
    
    # Review title
    title = review_block.findAll("a",{"class":"title"})[0].get_text().strip()

    # Review author
    author = review_block.findAll("span",{"class":"display-name-link"})[0].select("a")[0].get_text()
    
    # Review date
    date = review_block.findAll("span",{"class":"review-date"})[0].get_text()
    
    review = {
        "author": author,
        "title": title,
        "date": date,
        "score": score,
        "content": content
    }
    
    all_reviews.append(review)

In [7]:
df = pd.DataFrame(all_reviews)
df = df[['author', 'date', 'title', 'score', 'content']]

df

Unnamed: 0,author,date,title,score,content
0,Platypuschow,5 September 2018,Ready Player One: Considerably better than I e...,9,There seem to be two camps as far as Ready Pla...
1,MnemonicDevice,16 April 2018,Vintage Spielberg,8,I've noticed quite a few reviews here from boo...
2,ArchStanton1862,20 March 2018,An Instant Classic,9,I honestly didn't think that Spielberg had ano...
3,mattgosling1987,20 March 2018,Great movie but different from the book,8,Pretty good movie visually and even though the...
4,DanielRobertRoss,21 March 2018,Welcome back Spielberg,7,Spielberg remains to this day one of the most ...
5,juanceballos1,1 April 2018,Don't listen to the book critics,10,I personally did not read the book prior to wa...
6,hilaryswank2011,24 April 2018,"Film's New Horizon, Unification of Online Game...",10,This film actually explored the new frontier o...
7,lukyboy1,30 August 2018,Entertaining,7,This movie surprised me. Wasn't expecting much...
8,fredemt,1 April 2018,How could Ernest Cline green light this?,3,I went to see this movie with my boyfriend. I ...
9,Pjtaylor-96-138044,2 April 2018,Relies heavily on empty nostalgia and pop-cult...,4,'Ready Player One (2018)' should have been cal...


In [8]:
csv = df.to_csv("rpo.csv",index=False)