In [1]:
import requests, csv, datetime
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np

In [2]:
data=[]
catUrls = ['https://www.cdkeys.com/nintendo/switch/1', 'https://www.cdkeys.com/nintendo/switch/2'] # Only 2 pages in category
for url in catUrls: # Iterate through category
    soup = BeautifulSoup(requests.get(url).content, 'html.parser') # Parse each page to BS
    for item in soup.find_all('div', class_='custom_category-list'): # Iterate through list of separate product items
        name = item.find('div', class_='game-name').h2.text.strip() # Get clean string for name
        price = item.find('span', class_='price').text[1:].strip() # Drop the currency character for price
        data.append((name, price)) # Attach a tuple of name, price

In [3]:
# Write out into a new CSV, adding current datetime value
with open('cdkeys-switch-prices.csv', 'w', newline='') as file: #newline='' is necessary to avoid \r\r\n double row breaks
    file_writer = csv.writer(file)
    for (name, price) in data:
        file_writer.writerow([name, price, datetime.datetime.now()])

In [16]:
# Read in CSV data into a pandas dataframe with header values
game_prices = pd.read_csv('cdkeys-switch-prices.csv', names = ['name', 'price', 'date'], parse_dates = [2])
print("Average price for a Switch game: ","£{:.2f}".format(np.mean(game_prices['price']))) # format the float mean value as a valid currency string

Average price for a Switch game:  £26.41


In [24]:
# Find the index of the most expensive item, then return that row with iloc
most_expensive = game_prices.iloc[game_prices['price'].idxmax(axis=1)] #idxmax returns the index of the max value, axis=1 operates on a column
print("The most expensive game is", most_expensive['name'], "at", "£{:0.2f}".format(most_expensive['price']))

The most expensive game is The Legend of Zelda - Breath of the Wild Switch at £55.99


In [23]:
# Find the index of the least expensive item, then return that row with iloc
least_expensive = game_prices.iloc[game_prices['price'].idxmin(axis=1)]  #idxmin returns the index of the min value, axis=1 operates on a column
print("The least expensive game is", least_expensive['name'], "at", "£{:0.2f}".format(least_expensive['price']))

The least expensive game is Pokemon Quest - Stay Strong Stone Switch at £2.39
