In [1]:
# load packages
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
# Define the URL of the site
base_site = "https://editorial.rottentomatoes.com/guide/140-essential-action-movies-to-watch-now/"

In [3]:
# sending a request to the webpage
response = requests.get(base_site)
response.status_code

200

In [4]:
# get the HTML from the webpage
html = response.content

In [5]:
# convert the HTML to a Beautiful Soup object
soup = BeautifulSoup(html,'lxml')

In [6]:
# Exporting the HTML to a file
with open('140-essential-action-movies-to-watch-no.html','wb') as file:
    file.write(soup.prettify('utf-8'))

Every movie is present in the "col-sm-18 col-full-xs countdown-item-content" class and "article_movie_title" class contains movie_title,year and tMeterScore

In [7]:
# Find all div tags on the webpage containing the information we want to scrape
divs_info = soup.find_all("div",{"class":"col-sm-18 col-full-xs countdown-item-content"})
len(divs_info)

140

In [8]:
print(divs_info[0])

<div class="col-sm-18 col-full-xs countdown-item-content">
<div class="row countdown-item-title-bar">
<div class="col-sm-20 col-full-xs" style="height: 100%;">
<div class="article_movie_title" style="float: left;">
<div><h2><a href="https://www.rottentomatoes.com/m/1018009-running_scared/">Running Scared</a> <span class="subtle start-year">(1986)</span> <span class="icon tiny fresh" title="Fresh"></span> <span class="tMeterScore">60%</span></h2></div>
</div>
</div>
<div class="col-sm-4 col-full-xs" style="height: 100%;">
<div class="countdown-index">#140</div>
</div>
</div>
<div class="row countdown-item-details">
<div class="col-sm-24">
<div class="info countdown-adjusted-score"><span class="descriptor">Adjusted Score: </span>61.188% <span class="glyphicon glyphicon-question-sign" data-html="true" data-original-title="The Adjusted Score comes from a weighted formula (Bayesian) that we use that accounts for variation in the number of reviews per movie." data-placement="top" data-toggle

### Title, year and tMeterScore are contained in the "h2" tag of each this div

In [9]:
headings = [div.find("h2") for div in divs_info]
headings[0]

<h2><a href="https://www.rottentomatoes.com/m/1018009-running_scared/">Running Scared</a> <span class="subtle start-year">(1986)</span> <span class="icon tiny fresh" title="Fresh"></span> <span class="tMeterScore">60%</span></h2>

In [10]:
title_list = [heading.find('a').text.strip() for heading in headings]
title_list[0]

'Running Scared'

In [11]:
year_list = [int(heading.find('span',class_="subtle start-year").text.strip('()')) for heading in headings]
year_list[0]

1986

In [12]:
tMeterScore = [float(heading.find('span',class_="tMeterScore").text.strip('()').strip("%")) for heading in headings]
tMeterScore[0]

60.0

### Fetching countdown-index,Adjusted Score,Critics Consensus,Synopsis,Starring,Directed By

In [13]:
countdown_index_list = [int(div.find('div',class_="countdown-index").text.strip("#")) for div in divs_info]
countdown_index_list[0]

140

In [14]:
adjusted_score_list = [float(div.find("div",class_="info countdown-adjusted-score").contents[1].strip().strip("%")) for div in divs_info]
adjusted_score_list[0]

61.188

In [15]:
critics_consensus_list = [div.find("div",class_="info critics-consensus").contents[1].strip() for div in divs_info]
critics_consensus_list[0]

'Running Scared struggles to strike a consistent balance between violent action and humor, but the chemistry between its well-matched leads keeps things entertaining.'

In [16]:
synopsis_list = [div.find("div",class_="info synopsis").contents[1].strip() for div in divs_info]
synopsis_list[0]

'Distinguished by a sharp, witty dialogue between its two cop protagonists, Ray and Danny (Gregory Hines and Billy Crystal), this...'

In [17]:
synopsis_link_list = [div.find("div",class_="info synopsis").find("a").get('href') for div in divs_info]
synopsis_link_list[0]

'https://www.rottentomatoes.com/m/1018009-running_scared/'

In [18]:
director_list = [None if div.find("div",class_="info director").find("a") is None else div.find("div",class_="info director").find("a").text.strip() for div in divs_info]
director_list[0]

'Peter Hyams'

In [19]:
cast_info = [div.find("div",class_="info cast") for div in divs_info]
cast_list = [", ".join([link.string for link in c.find_all("a")]) for c in cast_info]
cast_list[0]

'Gregory Hines, Billy Crystal, Jimmy Smits, Steven Bauer'

In [20]:
movies_info = pd.DataFrame()

movies_info['countdown_index'] = countdown_index_list
movies_info['title'] = title_list
movies_info['year'] = year_list
movies_info['tMeterScore'] = tMeterScore
movies_info['adjusted_score'] = adjusted_score_list
movies_info['critics_consensus'] = critics_consensus_list
movies_info['synopsis'] = synopsis_list
movies_info['synopsis_link'] = synopsis_link_list
movies_info['director'] = director_list
movies_info['cast'] = cast_list


In [21]:
movies_info.head()

Unnamed: 0,countdown_index,title,year,tMeterScore,adjusted_score,critics_consensus,synopsis,synopsis_link,director,cast
0,140,Running Scared,1986,60.0,61.188,Running Scared struggles to strike a consisten...,"Distinguished by a sharp, witty dialogue betwe...",https://www.rottentomatoes.com/m/1018009-runni...,Peter Hyams,"Gregory Hines, Billy Crystal, Jimmy Smits, Ste..."
1,139,Equilibrium,2002,40.0,41.991,Equilibrium is a reheated mishmash of other sc...,"In the nation of Libria, there is always peace...",https://www.rottentomatoes.com/m/equilibrium/,Kurt Wimmer,"Christian Bale, Emily Watson, Taye Diggs, Angu..."
2,138,Hero,2004,95.0,100.759,With death-defying action sequences and epic h...,Hero is two-time Academy Award nominee Zhang Y...,https://www.rottentomatoes.com/m/hero/,Zhang Yimou,"Jet Li, Tony Leung Chiu Wai, Maggie Cheung, Da..."
3,137,Road House,1989,39.0,41.997,Whether Road House is simply bad or so bad it'...,Dalton (Swayze) is a true gentleman with a deg...,https://www.rottentomatoes.com/m/1017666-road_...,Rowdy Herrington,"Patrick Swayze, Kelly Lynch, Sam Elliott, Ben ..."
4,136,Unstoppable,2010,86.0,91.465,"As fast, loud, and relentless as the train at ...",In this action thriller from director Tony Sco...,https://www.rottentomatoes.com/m/unstoppable-2...,Tony Scott,"Denzel Washington, Chris Pine, Rosario Dawson,..."


In [22]:
movies_info.to_csv("140-essential-action-movies-to-watch-now.csv",index=False)