## Set up

In [9]:
# load the packages
import requests
from bs4 import BeautifulSoup

In [10]:
# Define the URL of the site
base_site = "https://editorial.rottentomatoes.com/guide/140-essential-action-movies-to-watch-now/"

In [11]:
# sending a request to the webpage
response = requests.get(base_site)
response.status_code

200

In [12]:
# get the HTML from the webpage
html = response.content

In [13]:
# convert the HTML to a BeatifulSoup object
soup = BeautifulSoup(html, 'lxml')

In [14]:
# Find all div tags on the webpage containing the information we want to scrape
divs = soup.find_all("div", {"class": "col-sm-18 col-full-xs countdown-item-content"})

In [15]:
# Extracting all 'h2' tags
headings = [div.find("h2") for div in divs]
headings

[<h2>
 <a href="https://www.rottentomatoes.com/m/1018009-running_scared">Running Scared</a>
 <span class="subtle start-year">(1986)</span>
 <br/>
 <img alt="Tomatometer icon" class="icon tiny" decoding="async" src="https://images.fandango.com/cms/assets/bffe0ff0-d359-11ea-b2c2-d92a57d89c3a--fresh.png"/> <span class="tMeterScore" style="margin-right: 10px;">63%</span>
 </h2>,
 <h2>
 <a href="https://www.rottentomatoes.com/m/equilibrium">Equilibrium</a>
 <span class="subtle start-year">(2002)</span>
 <br/>
 <img alt="Tomatometer icon" class="icon tiny" decoding="async" src="https://images.fandango.com/cms/assets/b3909ad0-d359-11ea-b2c2-d92a57d89c3a--rotten.png"/> <span class="tMeterScore" style="margin-right: 10px;">39%</span>
 </h2>,
 <h2>
 <a href="https://www.rottentomatoes.com/m/hero">Hero</a>
 <span class="subtle start-year">(2002)</span>
 <br/>
 <img alt="Tomatometer icon" class="icon tiny" decoding="async" src="https://images.fandango.com/cms/assets/c6672520-d359-11ea-a15f-bdf29fa

## Extracting the scores

In [21]:
# Filtering only the spans containing the score
[heading.find("span", class_ = 'tMeterScore').string for heading in headings]

['63%',
 '39%',
 '94%',
 '44%',
 '87%',
 '88%',
 '85%',
 '69%',
 '69%',
 '47%',
 '53%',
 '92%',
 '91%',
 '97%',
 '59%',
 '58%',
 '68%',
 '62%',
 '60%',
 '61%',
 '61%',
 '90%',
 '79%',
 '50%',
 '59%',
 '44%',
 '60%',
 '67%',
 '68%',
 '67%',
 '62%',
 '70%',
 '93%',
 '77%',
 '83%',
 '68%',
 '98%',
 '71%',
 '88%',
 '70%',
 '87%',
 '68%',
 '91%',
 '91%',
 '88%',
 '72%',
 '91%',
 '70%',
 '68%',
 '92%',
 '60%',
 '61%',
 '70%',
 '64%',
 '51%',
 '93%',
 '74%',
 '75%',
 '72%',
 '73%',
 '79%',
 '81%',
 '81%',
 '82%',
 '85%',
 '86%',
 '91%',
 '86%',
 '90%',
 '93%',
 '95%',
 '89%',
 '84%',
 '90%',
 '93%',
 '94%',
 '92%',
 '93%',
 '99%',
 '96%',
 '93%',
 '83%',
 '90%',
 '82%',
 '97%',
 '82%',
 '89%',
 '93%',
 '89%',
 '91%',
 '85%',
 '96%',
 '96%',
 '87%',
 '76%',
 '89%',
 '94%',
 '80%',
 '84%',
 '86%',
 '93%',
 '88%',
 '94%',
 '94%',
 '77%',
 '81%',
 '67%',
 '92%',
 '89%',
 '94%',
 '92%',
 '100%',
 '95%',
 '83%',
 '88%',
 '68%',
 '86%',
 '94%',
 '100%',
 '80%',
 '85%',
 '75%',
 '95%',
 '85%',
 '86%'

In [23]:
# Extracting the score string
score = []
for heading in headings:
    span_tag = heading.find("span", class_='tMeterScore')
    if span_tag:
        score.append(span_tag.string)
    # else:
        # years.append(None)  # or skip, or handle as needed

score

['63%',
 '39%',
 '94%',
 '44%',
 '87%',
 '88%',
 '85%',
 '69%',
 '69%',
 '47%',
 '53%',
 '92%',
 '91%',
 '97%',
 '59%',
 '58%',
 '68%',
 '62%',
 '60%',
 '61%',
 '61%',
 '90%',
 '79%',
 '50%',
 '59%',
 '44%',
 '60%',
 '67%',
 '68%',
 '67%',
 '62%',
 '70%',
 '93%',
 '77%',
 '83%',
 '68%',
 '98%',
 '71%',
 '88%',
 '70%',
 '87%',
 '68%',
 '91%',
 '91%',
 '88%',
 '72%',
 '91%',
 '70%',
 '68%',
 '92%',
 '60%',
 '61%',
 '70%',
 '64%',
 '51%',
 '93%',
 '74%',
 '75%',
 '72%',
 '73%',
 '79%',
 '81%',
 '81%',
 '82%',
 '85%',
 '86%',
 '91%',
 '86%',
 '90%',
 '93%',
 '95%',
 '89%',
 '84%',
 '90%',
 '93%',
 '94%',
 '92%',
 '93%',
 '99%',
 '96%',
 '93%',
 '83%',
 '90%',
 '82%',
 '97%',
 '82%',
 '89%',
 '93%',
 '89%',
 '91%',
 '85%',
 '96%',
 '96%',
 '87%',
 '76%',
 '89%',
 '94%',
 '80%',
 '84%',
 '86%',
 '93%',
 '88%',
 '94%',
 '94%',
 '77%',
 '81%',
 '67%',
 '92%',
 '89%',
 '94%',
 '92%',
 '100%',
 '95%',
 '83%',
 '88%',
 '68%',
 '86%',
 '94%',
 '100%',
 '80%',
 '85%',
 '75%',
 '95%',
 '85%',
 '86%'

In [31]:
# Removing the '%' sign
# score[0].strip('%')
score = [score.strip('%') for score in score]

In [32]:
# Converting each score to an integer
score = [int(score) for score in score]
score

[63,
 39,
 94,
 44,
 87,
 88,
 85,
 69,
 69,
 47,
 53,
 92,
 91,
 97,
 59,
 58,
 68,
 62,
 60,
 61,
 61,
 90,
 79,
 50,
 59,
 44,
 60,
 67,
 68,
 67,
 62,
 70,
 93,
 77,
 83,
 68,
 98,
 71,
 88,
 70,
 87,
 68,
 91,
 91,
 88,
 72,
 91,
 70,
 68,
 92,
 60,
 61,
 70,
 64,
 51,
 93,
 74,
 75,
 72,
 73,
 79,
 81,
 81,
 82,
 85,
 86,
 91,
 86,
 90,
 93,
 95,
 89,
 84,
 90,
 93,
 94,
 92,
 93,
 99,
 96,
 93,
 83,
 90,
 82,
 97,
 82,
 89,
 93,
 89,
 91,
 85,
 96,
 96,
 87,
 76,
 89,
 94,
 80,
 84,
 86,
 93,
 88,
 94,
 94,
 77,
 81,
 67,
 92,
 89,
 94,
 92,
 100,
 95,
 83,
 88,
 68,
 86,
 94,
 100,
 80,
 85,
 75,
 95,
 85,
 86,
 98,
 80,
 92,
 81,
 93,
 84,
 87,
 98,
 94,
 94,
 92,
 83,
 91,
 94,
 97]