# Query Movies

The purpose is to create a mechanism that queries RT and returns the top movie that has a tomatometer score.

In [29]:
import requests
import re

BAD_BOYS = "https://www.rottentomatoes.com/search?search=Bad%20boys"
content = requests.get(BAD_BOYS).content
content = str(content)

In [30]:
content = content[2:]
content = content[:-1]
content[:5], content[-3:]

('<!DOC', '>\\n')

In [31]:
content.find("search-page-media-row")

96343

In [32]:
content[96374:97974]

'                            skeleton="panel"\\n                                    cast="Will Smith,Martin Lawrence"\\n                                    data-qa="data-row"\\n                                    endyear=""\\n                                    releaseyear="2024"\\n                                    startyear=""\\n                                    tomatometerscore=""\\n                                    tomatometerstate=""\\n                                >\\n                                    <a href="https://www.rottentomatoes.com/m/bad_boys_4" class="unset" data-qa="thumbnail-link" slot="thumbnail">\\n                                        <img\\n                                            alt="Bad Boys 4"\\n                                            loading="lazy"\\n                                            src="https://images.fandango.com/cms/assets/5d84d010-59b1-11ea-b175-791e911be53d--rt-poster-defaultgif.gif">\\n                                    </a

In [33]:
res = re.findall(r"<search-page-media-row(.*?)</search-page-media-row>", content)
res[0]

'\\n                                    skeleton="panel"\\n                                    cast="Will Smith,Martin Lawrence"\\n                                    data-qa="data-row"\\n                                    endyear=""\\n                                    releaseyear="2024"\\n                                    startyear=""\\n                                    tomatometerscore=""\\n                                    tomatometerstate=""\\n                                >\\n                                    <a href="https://www.rottentomatoes.com/m/bad_boys_4" class="unset" data-qa="thumbnail-link" slot="thumbnail">\\n                                        <img\\n                                            alt="Bad Boys 4"\\n                                            loading="lazy"\\n                                            src="https://images.fandango.com/cms/assets/5d84d010-59b1-11ea-b175-791e911be53d--rt-poster-defaultgif.gif">\\n                            

In [34]:
# Finding tomatometer
snip = res[4]
tomato_qry = "tomatometerscore="
tomato_loc = snip.find(tomato_qry) + len(tomato_qry)
tomato_snip = snip[tomato_loc:tomato_loc+5]
meter = tomato_snip.split('"')[1]
has_tomatometer = bool(meter)
has_tomatometer

True

In [35]:
# Finding url
snip = res[0]
urls = re.findall(r'a href="(.*?)"', snip)
url = urls[0]
url

'https://www.rottentomatoes.com/m/bad_boys_4'

In [36]:
class SearchListing:
    """A search listing from the Rotten Tomatoes search page."""
    def __init__(self, has_tomatometer: bool, is_movie: bool, url: str) -> None:
        self.has_tomatometer = has_tomatometer
        self.is_movie = is_movie
        self.url = str(url)
        
    @classmethod
    def from_html(cls, html_snippet: str) -> "SearchListing":
        """
        Takes a snippet from the search page's HTML code.
        
        Use `re.findall(r"<search-page-media-row(.*?)</search-page-media-row>", content)`
        to separate the html into snippets, then feed each one to this method to create
        a `SearchListing` objects.
        """
        # Find the tomatometer
        tomato_qry = "tomatometerscore="
        tomato_loc = html_snippet.find(tomato_qry) + len(tomato_qry)
        tomato_snip = html_snippet[tomato_loc:tomato_loc+5]
        meter = tomato_snip.split('"')[1]
        has_tomatometer = bool(meter)
        
        # Find the url
        urls = re.findall(r'a href="(.*?)"', html_snippet)
        url = urls[0]
        
        # Determine if it's a movie
        is_movie = "/m/" in url
        
        return cls(has_tomatometer=has_tomatometer, is_movie=is_movie, url=url)
    
    def __str__(self) -> str:
        """Represent the SearchListing object."""
        return f"Tomatometer: {self.has_tomatometer}. URL: {self.url}. Is movie: {self.is_movie}."

In [37]:
snippets = re.findall(r"<search-page-media-row(.*?)</search-page-media-row>", content)
listings = [SearchListing.from_html(snippet) for snippet in snippets]
listings

[<__main__.SearchListing at 0x103f1ea40>,
 <__main__.SearchListing at 0x103f1fca0>,
 <__main__.SearchListing at 0x103f1f790>,
 <__main__.SearchListing at 0x103f1e1a0>,
 <__main__.SearchListing at 0x103f1d5a0>,
 <__main__.SearchListing at 0x103f1e500>,
 <__main__.SearchListing at 0x103f1f700>,
 <__main__.SearchListing at 0x103f1ecb0>,
 <__main__.SearchListing at 0x103f1ddb0>,
 <__main__.SearchListing at 0x103f1fd60>,
 <__main__.SearchListing at 0x103f1de10>,
 <__main__.SearchListing at 0x103f1faf0>,
 <__main__.SearchListing at 0x103f1e0b0>,
 <__main__.SearchListing at 0x103f1e380>,
 <__main__.SearchListing at 0x103f1cb80>,
 <__main__.SearchListing at 0x103f1f820>,
 <__main__.SearchListing at 0x103f1e6b0>]

In [38]:
for listing in listings:
    print(listing)

Tomatometer: False. URL: https://www.rottentomatoes.com/m/bad_boys_4. Is movie: True.
Tomatometer: True. URL: https://www.rottentomatoes.com/m/bad_boys_for_life. Is movie: True.
Tomatometer: False. URL: https://www.rottentomatoes.com/m/bad_boys_of_summer. Is movie: True.
Tomatometer: True. URL: https://www.rottentomatoes.com/m/bad_boys_ii. Is movie: True.
Tomatometer: True. URL: https://www.rottentomatoes.com/m/1062483-bad_boys. Is movie: True.
Tomatometer: True. URL: https://www.rottentomatoes.com/m/bad_boys_1982. Is movie: True.
Tomatometer: False. URL: https://www.rottentomatoes.com/m/bad_boys. Is movie: True.
Tomatometer: False. URL: https://www.rottentomatoes.com/m/for_bad_boys_only. Is movie: True.
Tomatometer: False. URL: https://www.rottentomatoes.com/m/good_bad_boys. Is movie: True.
Tomatometer: False. URL: https://www.rottentomatoes.com/m/jaali_baaru_mattu_poli_hudugaru. Is movie: True.
Tomatometer: False. URL: https://www.rottentomatoes.com/tv/p_diddy_presents_the_bad_boys_o

In [39]:
snippets = re.findall(r"<search-page-media-row(.*?)</search-page-media-row>", content)
listing = SearchListing.from_html(snippets[4])
print(listing)

Tomatometer: True. URL: https://www.rottentomatoes.com/m/1062483-bad_boys. Is movie: True.
