# Scraping Data from BGG

In [2]:
import requests
import xml.etree.ElementTree as ET

In [58]:
x = requests.get('https://boardgamegeek.com/xmlapi/boardgame/5332')
x.status_code

200

### Writing a function to parse through XML file

In [46]:
def num_players(xml):
    """"
    Returns the community-recommended range of number of players.
    
    Paramaters:
        poll (bytes): The content of an xml file.
        
    Returns:
        list: A list with all the number of players the game is recommended for.
    """
    
    poll_results = dict()    
    for poll in xml.findall(".//poll[@name='suggested_numplayers']"):
        responses = poll.findall('results')
        for resp in responses:
            num_players = resp.attrib['numplayers'] 
            votes_int = [i.attrib for i in resp.findall('result')]
            votes = [int(i['numvotes']) for i in votes_int]
            poll_results[num_players] = votes
    
    rec = list()
    
    for key,item in poll_results.items():
        if sum(item) != 0:
            percentages = [i/sum(item) for i in item]
            if percentages[0] >= 0.5 or percentages[1] >= 0.5:
                rec.append(key)
            
    return rec

In [56]:
def player_age(xml):
    """
    Returns the results of the community poll asking "for those who do not speak the language in which this game is published, 
    how language-dependent are the components?"
    
    Parameters:
        xml (bytes): The content of an xml file.
    
    Returns:
        dict: Number of votes for each player count.
    """
    
    poll_results = dict()
    responses = xml.findall(".//poll[@name='suggested_playerage']")[0].findall('results')[0].findall('result')
    for response in responses:
        poll_results[response.attrib['value']] = response.attrib['numvotes']

    return list(poll_results.keys())[list(poll_results.values()).index(max(poll_results.values()))]

In [60]:
#testing 

root = ET.fromstring(x.content)
print(num_players(root))
print(player_age(root))

['2', '3']
10
