In [2]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import re

In [41]:
def get_category_fans(link):
    """
    Given link to boardgame category page on bgg, returns the number of fans of that category
    
    """
    url = 'https://boardgamegeek.com'+link
    response=requests.get(url)
    soup=BeautifulSoup(response.text, 'html5lib')
    script=soup.find_all('script')[1]
    cat_info=script.contents[0]
    search_text = '\"numfans\":(\d*)}'
    nf_string = re.search(search_text,cat_info)
    return int(nf_string.group(1))    

def make_category_dict(num_top):
    """
    Generates dict of num_top most popular game categories
    4 digit code is key, name is value
    """
    category_list = []
    url = 'https://boardgamegeek.com/browse/boardgamecategory'
    response=requests.get(url)
    soup = BeautifulSoup(response.text, 'html5lib')
    table = soup.find('table', class_='forum_table')
    entries = table.find_all('a')
    for entry in entries:
        category = entry.text
        link = entry['href']
        id_ = int(link.split('/')[-2])
        num_fans = get_category_fans(link)
        category_list.append([id_,category,num_fans])
    category_df = pd.DataFrame(category_list,columns=['category_id','category_name','category_fans'])
    top_categories = category_df.sort_values('category_fans', ascending=False)[:num_top]
    top_categories_dict = pd.Series(top_categories['category_name'].values,
                                    index=top_categories['category_id']).to_dict()
    return top_categories_dict

def code_to_name(categories_dict, code_list):
    """
    Converts list of category codes to a list of names of those categories
    """
    name_list=[categories_dict[code] for code in code_list if (code in categories_dict.keys())]
    if not name_list:
        name_list.append('No top categories')
    return name_list

In [42]:
cd=make_category_dict(10)


In [46]:
code_to_name(cd,[20123,1,2,1019])

['Wargame']

In [5]:
category_list = []
table = soup.find('table', class_='forum_table')
entries = table.find_all('a')
for entry in entries:
    category = entry.text
    link = entry['href']
    id_ = int(link.split('/')[-2])
    num_fans = get_category_fans(link)
    category_list.append([id_,category,num_fans])



In [6]:
print(category_list)

[[1009, 'Abstract Strategy', 214], [1032, 'Action / Dexterity', 56], [1022, 'Adventure', 99], [2726, 'Age of Reason', 19], [1048, 'American Civil War', 26], [1108, 'American Indian Wars', 12], [1075, 'American Revolutionary War', 14], [1055, 'American West', 49], [1050, 'Ancient', 69], [1089, 'Animals', 41], [1052, 'Arabian', 6], [2650, 'Aviation / Flight', 20], [1023, 'Bluffing', 44], [1117, 'Book', 9], [1002, 'Card Game', 153], [1041, "Children's Game", 41], [1029, 'City Building', 80], [1102, 'Civil War', 3], [1015, 'Civilization', 103], [1044, 'Collectible Components', 6], [1116, 'Comic Book / Strip', 14], [1039, 'Deduction', 125], [1017, 'Dice', 54], [1021, 'Economic', 119], [1094, 'Educational', 23], [1072, 'Electronic', 6], [1084, 'Environmental', 27], [1042, 'Expansion for Base-game', 13], [1020, 'Exploration', 69], [2687, 'Fan Expansion', 6], [1010, 'Fantasy', 139], [1013, 'Farming', 21], [1046, 'Fighting', 28], [1119, 'Game System', 17], [1024, 'Horror', 120], [1079, 'Humor',

In [31]:
category_df = pd.DataFrame(category_list,columns=['category_id','category_name','category_fans'])
top_categories = category_df.sort_values('category_fans', ascending=False)[:20]
top_categories.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 20 entries, 62 to 73
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   category_id    20 non-null     int64 
 1   category_name  20 non-null     object
 2   category_fans  20 non-null     int64 
dtypes: int64(2), object(1)
memory usage: 640.0+ bytes


In [32]:
#top_categories.set_index('category_id',inplace=True)
top_categories


Unnamed: 0,category_id,category_name,category_fans
62,1120,Print & Play,658
79,1019,Wargame,234
0,1009,Abstract Strategy,214
68,1016,Science Fiction,158
14,1002,Card Game,153
30,1010,Fantasy,139
21,1039,Deduction,125
34,1024,Horror,120
23,1021,Economic,119
18,1015,Civilization,103


In [23]:
n=np.array(top_categories['category_id'])

In [30]:
test_list = [1120, 2013]

for val in test_list:
    if val in n:
        print(top_categories.loc[val,'category_name'])

Print & Play


In [34]:
#pd.Series(df.Letter.values,index=df.Position).to_dict()
pd.Series(top_categories['category_name'].values, index=top_categories['category_id']).to_dict()

{1120: 'Print & Play',
 1019: 'Wargame',
 1009: 'Abstract Strategy',
 1016: 'Science Fiction',
 1002: 'Card Game',
 1010: 'Fantasy',
 1039: 'Deduction',
 1024: 'Horror',
 1021: 'Economic',
 1015: 'Civilization',
 1022: 'Adventure',
 1030: 'Party Game',
 1047: 'Miniatures',
 1049: 'World War II',
 1113: 'Space Exploration',
 1029: 'City Building',
 1050: 'Ancient',
 1020: 'Exploration',
 1035: 'Medieval',
 1034: 'Trains'}

In [None]:
de