In [None]:
# imports
import pandas as pd
import requests
from io import StringIO
from bs4 import BeautifulSoup


In [None]:
wiki_url = 'https://en.wikipedia.org/wiki/List_of_best-selling_Nintendo_Switch_video_games'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36'}

wiki_response = requests.get(wiki_url, headers=headers)
wiki_response.status_code # Status code of 200 to confirm we got the information properly


200

In [21]:
soup = BeautifulSoup(wiki_response.text, 'html.parser')
table = soup.find('table', {'class': 'wikitable'})

In [22]:
switch_sales_df = pd.read_html(StringIO(str(table)))[0] # use StringIO to avoid depreciation warnings
switch_sales_df

Unnamed: 0,Title,Copies sold,As of,Release date[a],Genre(s),Developer(s),Publisher(s)
0,Mario Kart 8 Deluxe,68.20 million[4],"March 31, 2025","April 28, 2017",Kart racing,Nintendo EPD,Nintendo
1,Animal Crossing: New Horizons,47.82 million[4],"March 31, 2025","March 20, 2020",Social simulation,Nintendo EPD,Nintendo
2,Super Smash Bros. Ultimate,36.24 million[4],"March 31, 2025","December 7, 2018",Fighting,Bandai Namco StudiosSora Ltd.,Nintendo
3,The Legend of Zelda: Breath of the Wild,32.81 million[4],"March 31, 2025","March 3, 2017",Action-adventure,Nintendo EPD,Nintendo
4,Super Mario Odyssey,29.28 million[4],"March 31, 2025","October 27, 2017",Platformer,Nintendo EPD,Nintendo
...,...,...,...,...,...,...,...
97,Fitness Boxing,1 million[46],"September 8, 2020","December 20, 2018",Exergamerhythm,Imagineer,JP: ImagineerNA/PAL: Nintendo
98,Fitness Boxing 2: Rhythm and Exercise,1 million[47],"December 9, 2021","December 4, 2020",Exergamerhythm,Imagineer,JP: ImagineerNA/PAL: Nintendo
99,Resident Evil 4,1 million[37],"March 31, 2025","May 21, 2019",Survival horror,Capcom,Capcom
100,Story of Seasons: Pioneers of Olive Town,1 million[48],"November 18, 2021","February 25, 2021",Simulationrole-playing,Marvelous,Xseed Games


In [None]:
# functions made to clean the columns/extract data
def turn_copies_sold_to_num(val):
    parsed_num = val.split('\xa0')[0] # parse the str into a singular number
    return float(parsed_num) * 1000000 # convert it to a number



In [None]:

# clean the copies sold by turning the str into a numeric value allowing it to be compared easier
switch_sales_df_formatted = switch_sales_df.assign(copies_sold = lambda df: df['Copies sold'].apply(turn_copies_sold_to_num))
switch_sales_df_formatted = switch_sales_df_formatted.drop(columns=['Copies sold'])

# turned both string versions of the dates into datetime values for easier analysis later
switch_sales_df_formatted['As of'] = pd.to_datetime(switch_sales_df_formatted['As of']).dt.strftime('%m/%d/%Y')
switch_sales_df_formatted['Release date[a]'] = pd.to_datetime(switch_sales_df_formatted['Release date[a]']).dt.strftime('%m/%d/%Y')


switch_sales_df_formatted


Unnamed: 0,Title,As of,Release date[a],Genre(s),Developer(s),Publisher(s),copies_sold
0,Mario Kart 8 Deluxe,03/31/2025,04/28/2017,Kart racing,Nintendo EPD,Nintendo,68200000.0
1,Animal Crossing: New Horizons,03/31/2025,03/20/2020,Social simulation,Nintendo EPD,Nintendo,47820000.0
2,Super Smash Bros. Ultimate,03/31/2025,12/07/2018,Fighting,Bandai Namco StudiosSora Ltd.,Nintendo,36240000.0
3,The Legend of Zelda: Breath of the Wild,03/31/2025,03/03/2017,Action-adventure,Nintendo EPD,Nintendo,32810000.0
4,Super Mario Odyssey,03/31/2025,10/27/2017,Platformer,Nintendo EPD,Nintendo,29280000.0
...,...,...,...,...,...,...,...
97,Fitness Boxing,09/08/2020,12/20/2018,Exergamerhythm,Imagineer,JP: ImagineerNA/PAL: Nintendo,1000000.0
98,Fitness Boxing 2: Rhythm and Exercise,12/09/2021,12/04/2020,Exergamerhythm,Imagineer,JP: ImagineerNA/PAL: Nintendo,1000000.0
99,Resident Evil 4,03/31/2025,05/21/2019,Survival horror,Capcom,Capcom,1000000.0
100,Story of Seasons: Pioneers of Olive Town,11/18/2021,02/25/2021,Simulationrole-playing,Marvelous,Xseed Games,1000000.0
