In [73]:
# imports
import pandas as pd
import requests
import re
from io import StringIO
from bs4 import BeautifulSoup

pd.set_option('display.max_colwidth', None)


In [74]:
wiki_url = 'https://en.wikipedia.org/wiki/List_of_best-selling_Nintendo_Switch_video_games'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36'}

wiki_response = requests.get(wiki_url, headers=headers)
wiki_response.status_code # Status code of 200 to confirm we got the information properly


200

In [75]:
soup = BeautifulSoup(wiki_response.text, 'html.parser')
table = soup.find('table', {'class': 'wikitable'})

In [76]:
switch_sales_df = pd.read_html(StringIO(str(table)))[0] # use StringIO to avoid depreciation warnings
switch_sales_df

Unnamed: 0,Title,Copies sold,As of,Release date[a],Genre(s),Developer(s),Publisher(s)
0,Mario Kart 8 Deluxe,68.86 million[4],"June 30, 2025","April 28, 2017",Kart racing,Nintendo EPD,Nintendo
1,Animal Crossing: New Horizons,48.19 million[4],"June 30, 2025","March 20, 2020",Social simulation,Nintendo EPD,Nintendo
2,Super Smash Bros. Ultimate,36.55 million[4],"June 30, 2025","December 7, 2018",Fighting,Bandai Namco StudiosSora Ltd.,Nintendo
3,The Legend of Zelda: Breath of the Wild,33.04 million[4],"June 30, 2025","March 3, 2017",Action-adventure,Nintendo EPD,Nintendo
4,Super Mario Odyssey,29.50 million[4],"June 30, 2025","October 27, 2017",Platformer,Nintendo EPD,Nintendo
...,...,...,...,...,...,...,...
97,Fitness Boxing,1 million[46],"September 8, 2020","December 20, 2018",Exergamerhythm,Imagineer,JP: ImagineerNA/PAL: Nintendo
98,Fitness Boxing 2: Rhythm and Exercise,1 million[47],"December 9, 2021","December 4, 2020",Exergamerhythm,Imagineer,JP: ImagineerNA/PAL: Nintendo
99,Resident Evil 4,1 million[37],"March 31, 2025","May 21, 2019",Survival horror,Capcom,Capcom
100,Story of Seasons: Pioneers of Olive Town,1 million[48],"November 18, 2021","February 25, 2021",Simulationrole-playing,Marvelous,Xseed Games


In [77]:
# functions made to clean the columns/extract data
def turn_copies_sold_to_num(val):
    parsed_num = val.split('\xa0')[0] # parse the str into a singular number
    return float(parsed_num) * 1000000 # convert it to a number

def get_num_from_str(text):
    match = re.search(r'\d+', text)
    return int(match.group()) if match else None

def create_metacritic_url(game_name):
     game_name = re.sub(r'[^\w\s]', '', game_name)
     game_name = re.sub(r'\s+', '-', game_name).lower()
     return 'https://www.metacritic.com/game/' + game_name
 


In [78]:

# clean the copies sold by turning the str into a numeric value allowing it to be compared easier
switch_sales_df_formatted = switch_sales_df.assign(copies_sold = lambda df: df['Copies sold'].apply(turn_copies_sold_to_num)).drop(columns=['Copies sold'])

# turned both string versions of the dates into datetime values for easier analysis later
switch_sales_df_formatted['as_of'] = pd.to_datetime(switch_sales_df_formatted['As of']).dt.strftime('%m/%d/%Y')
switch_sales_df_formatted['release_date'] = pd.to_datetime(switch_sales_df_formatted['Release date[a]']).dt.strftime('%m/%d/%Y')

# drop the previous columns since we already created another table with easier data type to work with
switch_sales_df_formatted = switch_sales_df_formatted.drop(columns=[
    'As of',
    'Release date[a]',
])

switch_sales_df_formatted = switch_sales_df_formatted.assign(metacritic_url= lambda df: df['Title'].apply(create_metacritic_url))

switch_sales_df_formatted


Unnamed: 0,Title,Genre(s),Developer(s),Publisher(s),copies_sold,as_of,release_date,metacritic_url
0,Mario Kart 8 Deluxe,Kart racing,Nintendo EPD,Nintendo,68860000.0,06/30/2025,04/28/2017,https://www.metacritic.com/game/mario-kart-8-deluxe
1,Animal Crossing: New Horizons,Social simulation,Nintendo EPD,Nintendo,48190000.0,06/30/2025,03/20/2020,https://www.metacritic.com/game/animal-crossing-new-horizons
2,Super Smash Bros. Ultimate,Fighting,Bandai Namco StudiosSora Ltd.,Nintendo,36550000.0,06/30/2025,12/07/2018,https://www.metacritic.com/game/super-smash-bros-ultimate
3,The Legend of Zelda: Breath of the Wild,Action-adventure,Nintendo EPD,Nintendo,33040000.0,06/30/2025,03/03/2017,https://www.metacritic.com/game/the-legend-of-zelda-breath-of-the-wild
4,Super Mario Odyssey,Platformer,Nintendo EPD,Nintendo,29500000.0,06/30/2025,10/27/2017,https://www.metacritic.com/game/super-mario-odyssey
...,...,...,...,...,...,...,...,...
97,Fitness Boxing,Exergamerhythm,Imagineer,JP: ImagineerNA/PAL: Nintendo,1000000.0,09/08/2020,12/20/2018,https://www.metacritic.com/game/fitness-boxing
98,Fitness Boxing 2: Rhythm and Exercise,Exergamerhythm,Imagineer,JP: ImagineerNA/PAL: Nintendo,1000000.0,12/09/2021,12/04/2020,https://www.metacritic.com/game/fitness-boxing-2-rhythm-and-exercise
99,Resident Evil 4,Survival horror,Capcom,Capcom,1000000.0,03/31/2025,05/21/2019,https://www.metacritic.com/game/resident-evil-4
100,Story of Seasons: Pioneers of Olive Town,Simulationrole-playing,Marvelous,Xseed Games,1000000.0,11/18/2021,02/25/2021,https://www.metacritic.com/game/story-of-seasons-pioneers-of-olive-town


In [79]:
def get_metacritic_stats(url):
    req = requests.get(url, headers=headers)

    soup = BeautifulSoup(req.text, 'html.parser')
    summary_span_text = soup.find('span', {'class': 'c-productionDetailsGame_description'}).text

    meta_score = int(soup.find('div', {'class': 'c-productScoreInfo_scoreNumber'}).find('span').text)

    meta_critic_number_text = soup.find('a', {'data-testid': 'critic-path'}).find('span').text

    meta_critic_score = get_num_from_str(meta_critic_number_text)
    

    return meta_critic_score

print(get_metacritic_stats('https://www.metacritic.com/game/animal-crossing-new-horizons'))


111
