In [128]:
import urllib3, certifi
from urllib3.util.retry import Retry

link = "https://www.metacritic.com/browse/movie/all/all/2024/metascore/?page=2"

# HTTP headers
headers = {
    "User-Agent": ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                   "AppleWebKit/537.36 (KHTML, like Gecko) "
                   "Chrome/127.0.0.0 Safari/537.36"),
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
    "Accept-Language": "en-US,en;q=0.9",
    "Referer": "https://www.metacritic.com/",
}

# Retry strategy
retries = Retry(
    total=3,
    backoff_factor=0.5,
    status_forcelist=[429, 500, 502, 503, 504],
    allowed_methods=["GET"]
)

# Python request pool
http = urllib3.PoolManager(
    ca_certs=certifi.where(),
    retries=retries,
    headers=headers,
)

# Init web request
r = http.request("GET", link)

# Bytes to text
datastring = str(r.data, 'utf-8')

# Debug output
print(r.status)
print(len(datastring))

200
316598


In [129]:
import re

# Search for titles
titles = re.findall(r'<div data-title="(.*?)" class="c-finderProductCard_title">', datastring)
print(f"Retrieved {len(titles)} movie titles")

Retrieved 24 movie titles


In [130]:
import re

# Search for release dates
release_dates = re.findall(r'<span class="u-text-uppercase">\s*(.*?)\s*</span>', datastring)
print(f"Retrieved {len(release_dates)} movie release dates")

Retrieved 24 movie release dates


In [131]:
import re

# Search for release dates
meta_score = re.findall(r'<span data-v-e408cafe>(.*?)</span>', datastring)
print(f"Retrieved {len(meta_score)} movie meta scores")

Retrieved 24 movie meta scores


In [132]:
import re

# Search for descriptions
description = re.findall(r'<div class="c-finderProductCard_description"><span>(.*?)\s*<\/span>', datastring)
print(f"Retrieved {len(description)} movie descriptions")

Retrieved 24 movie descriptions


In [133]:
import pandas as pd
import tabulate as tab

# Create a DataFrame
df = pd.DataFrame({
    "Title": titles,
    "Description": description,
    "Release Date": release_dates,
    "Meta Score": meta_score
})

# Show the DataFrame
print(tab.tabulate(df, showindex=False, headers=df.columns))

Title                                                 Description                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          Release Date      Meta Score
----------------------------------------------------  --------------------------------------------------------------------------------------------------------------------------------------------------------------------------