# Patches
Age of Empires has patches that consist of balance changes, bug fixes, new civ additions, etc. Here, we attempt to scrape patch information.

Unfortunately, looks like steamdb.info looks for JS to be enabled. So instead of scraping a live site, we will download the html locally and scrape that instead to get patch information

In [99]:
from bs4 import BeautifulSoup as Soup

# Locally download the below websites:
# de_steam_id = 813780
# hd_steam_id = 221380
# de_patch_url = f'https://steamdb.info/app/{de_steam_id}/patchnotes/'
# hd_patch_url = f'https://steamdb.info/app/{hd_steam_id}/patchnotes/'

In [100]:
with open('de_patches.html', 'r') as de:
    de_patches = Soup(de, 'html.parser')
with open('hd_patches.html', 'r') as hd:
    hd_patches = Soup(hd, 'html.parser')



In [101]:
def get_patches(soup: Soup, game_name:str)->list:
    """
    Gets the patch notes for a game as a list
    :param game_name: `de` or `hd` for the version of age of empires (DE or HD)
    :param soup: the BeautifulSoup object for the steamdb.info game website at 'https://steamdb.info/app/{game_steam_id}/patchnotes/'
    :return: list of patches made to the game
    """
    from datetime import datetime

    patches = []
    table = soup.find('tbody', attrs={'id':'js-builds'})
    for tr in table.find_all('tr'):
        td_list = tr.find_all('td')
        date = datetime.strptime(td_list[0].find('a').string, '%d %B %Y')
        title = td_list[3].find('a').string
        has_patch_notes = td_list[5].string is None
        build_id = td_list[6].string
        patches.append([game_name, date, title, has_patch_notes, build_id])
    return patches



In [103]:
import csv
with open('hd_de_patches.csv', 'w') as f:
    writer = csv.writer(f)
    writer.writerow(['Game', 'DateTime', 'Title', 'Has Patch Notes', 'Build ID'])
    writer.writerows(get_patches(hd_patches, 'hd'))
    writer.writerows(get_patches(de_patches, 'de'))


In [None]:
!cat hd_de_patches.csv