# Notebook Objective and Setup

BGG02 involves the acquisition of user_id information from BoardGameGeek, using their XML API.

In this notebook, user apis are accessed one-by-one and the user information dumped dirty to file.

## Package Imports

In [1]:
import pandas as pd
import numpy as np
import requests
import regex as re
import time
import os
import gc

# ignore warnings (gets rid of Pandas copy warnings)
import warnings
warnings.filterwarnings('ignore')
pd.options.display.max_columns = None

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 30)

# NLP tools
import spacy
nlp = spacy.load("en_core_web_sm")
import re
import nltk
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from nltk.tokenize import word_tokenize

from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import os

## Notebook Functions

In [2]:
def get_user_ratings(username):
    '''
    Get detailed information on the user
    
    Inputs: username, must be a valid BGG username
    
    Outputs:
    user: user as dataframe row
    user_ratings_dates: user rate date in dataframe row
    '''
    
    # set the API call path
    user_path = re.sub("\s+", "+", username)
    path = "https://www.boardgamegeek.com/xmlapi2/collection?username="+user_path+"&rated=1&stats=1"
    
    # start logging time
    start = time.time()# log the start time for this entry
    
    # print the path to confirm
    print(path)
    
    # set initial flag to False
    flag = False
    
    # run while flag is false:
    while flag == False:
        
        print("Retrieving page")
        
        # get the page
        page = requests.get(path) # get the page
        game_page = BeautifulSoup(page.content, "xml") # parse the page with beautifulsoup        
        
        # if the page returns errors,
        if game_page.find('errors') != None:
            
            # the username is invalid. Break out of the function.
            print("Invalid username")
            break
        
        # if the page returned no errors,
        else: 
            # Try to print the total number of user items
            try:
                print(int(game_page.find('items')['totalitems']))
                # if the print was successful, set the flag to True and return to the flag check
                flag = True
                continue
            # if the print failed, pause the timer 1 second and return to the flag check
            except:
                print("failed, pausing")
                time.sleep(1)
                continue
    
    # This section will begin once the flag == True
    
    # find all rated items on page
    rated_items = game_page.find_all("item")
    
    # make lists for game_id, game_ratings, modified_record date, and a dictionary to hold all
    game_ids = []
    game_ratings = []
    modified_record = []
    all_ratings = {}

    # for each item in the rated items:
    for game in rated_items:
        # get game name
        name = game.find("name").text
        # get BGG Id
        game_id = game["objectid"] 
        # Get user's rating for game
        rating = float(game.find("rating")["value"])
        # Get date of rating
        date_rated = game.find("status")["lastmodified"]
        # append game id to correct
        game_ids.append(game_id)
        # append game rating to correct list
        game_ratings.append(rating)
        # set in dictionary rating for game_id
        all_ratings[game_id]=rating
        # make list of items to add to modified
        modified = [username, game_id, rating, date_rated]
        # append list to modified
        modified_record.append(modified)
    
    # make dictionary of raw ratings for user
    raw_ratings_dict = {}
    # set in dictionary all ratings for user
    raw_ratings_dict[username] = all_ratings
    
    # make dataframe for user from raw ratings
    user = pd.DataFrame.from_dict(raw_ratings_dict, orient="index")
    # make dataframe for user from modified record
    user_ratings_dates = pd.DataFrame(modified_record, columns=["Username", "BGGId", "Rating", "Date_rated"])
    
    # Wait .5 seconds
    time.sleep(.5)        
    
    return user, user_ratings_dates


In [3]:
def save_load_loop(users):
    '''
    Processes one batch of users. Saves and cleans up after batch.
    
    Inputs:
    users: list of users to batch
    '''
    # load files
    ratings_dates = pd.read_pickle('userid/ratings_dates.pkl')
    user_ratings = pd.read_pickle('userid/user_ratings.pkl')
    
    # print batch
    print(users[:])
    
    # for each user in batch:
    for username in users:
        
        # call get_user_ratings function
        user, user_ratings_dates = get_user_ratings(username)
        
        # append user_ratings_dates to existing ratings_dates
        ratings_dates = ratings_dates.append(user_ratings_dates)
        # append user to existing user_ratings
        user_ratings = user_ratings.append(user)
        
        # delete user and user_ratings_dates
        del user
        del user_ratings_dates
        gc.collect()
    
    # set user ratings as float
    float_conversion = user_ratings.astype('float')
    
    # save files
    ratings_dates.to_pickle('userid/ratings_dates.pkl')
    float_conversion.to_pickle('userid/user_ratings.pkl')
    
    # clean up
    del ratings_dates
    del user_ratings
    gc.collect()

## Loading Comments

In [7]:
comments = pd.read_pickle('data_cleaned/comments.pkl')
comments

Unnamed: 0,BGGId,Name,Rating,Username,cleaned
0,174430,Gloomhaven,,-Johnny-,con fiddliness time investment ridiculous mo...
1,174430,Gloomhaven,9,-mIDE-,kickstarter estimate delivery
2,174430,Gloomhaven,,0 1 1 2 3 5 8,half good forge war
3,174430,Gloomhaven,10,0stuart0,good addictive coop game ve play desperately p...
4,174430,Gloomhaven,5,1 Family Meeple,imghttpscfgeekdostaticcommbsmbpngimg gloomh...
...,...,...,...,...,...
4172603,339592,Sheep in Disguise,10,Pitersenpai,este es el primer proyecto que patrocino en ki...
4172604,339592,Sheep in Disguise,10,Radalict,game look lot fun m look forward
4172605,339592,Sheep in Disguise,10,Rigoberto123,nt wait play support kickstarter subscribe mr ...
4172606,339592,Sheep in Disguise,10,TAB4two,wait game hour hilarious sheepnanigan family b...


In [8]:
# Get a list of all unique users that left a comment on a game
all_users = sorted(list(comments['Username'].unique()))

del comments
gc.collect()

6363

In [9]:
# how long is the list of users?
len(all_users)

182354

In [90]:
# where is a specific user?
all_users.index('zath')

181750

## Get Ratings

In [93]:
# determine the start of the batch
sub_users = all_users[181750:]

In [94]:
# set start
start = 0
batch_size = 50

# While this is true (runs until no users remain)
while True:
    
    end = start + batch_size  # set endpoint based on batch size
    users = sub_users[start:end] # make a batch
    if len(users)==0: break # breaks out if no users are left
    save_load_loop(users) # calls save_load_loop and processes batch of users
    start += batch_size # increments by batch size
    time.sleep(3)# pause between batches

['zath', 'zatks', 'zatoichijuegos', 'zaubererer', 'zauberwurf', 'zauis', 'zautman', 'zavel8', 'zavesky', 'zavijahbeta', 'zavulon', 'zawimawi', 'zax', 'zaxa', 'zazasLT', 'zazenpanda', 'zazery', 'zazzman', 'zb1035', 'zb2112', 'zbajszek', 'zberkoff', 'zbgamer4545', 'zbizek1', 'zboat', 'zbone', 'zbrent716', 'zbruss', 'zbunc17', 'zbv500', 'zbyshekh', 'zbyszeklarwa', 'zc1993', 'zcantin', 'zcapr17', 'zcelloman', 'zchillman', 'zco2000', 'zd2046', 'zdalekacelny', 'zdan89', 'zdenda', 'zdepthcharge', 'zdkat', 'zdpierson', 'zdravstvouite', 'zdriesen', 'zdtorok', 'ze_dutch_kiwi', 'ze_stom']
https://www.boardgamegeek.com/xmlapi2/collection?username=zath&rated=1&stats=1
Retrieving page
80
https://www.boardgamegeek.com/xmlapi2/collection?username=zatks&rated=1&stats=1
Retrieving page
19
https://www.boardgamegeek.com/xmlapi2/collection?username=zatoichijuegos&rated=1&stats=1
Retrieving page
25
https://www.boardgamegeek.com/xmlapi2/collection?username=zaubererer&rated=1&stats=1
Retrieving page
1092
http

failed, pausing
Retrieving page
1
https://www.boardgamegeek.com/xmlapi2/collection?username=zeaklous&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
36
https://www.boardgamegeek.com/xmlapi2/collection?username=zeaksevers&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
5
https://www.boardgamegeek.com/xmlapi2/collection?username=zeal&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
48
https://www.boardgamegeek.com/xmlapi2/collection?username=zealisee&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
16
https://www.boardgamegeek.com/xmlapi2/collection?username=zeamadevarza&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
failed, pausing
Retrieving page
163
https://www.boardgamegeek.com/xmlapi2/collection?username=zeanarchy&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
13
https://www.boardgamegeek.com/xmlapi2/collection?username=zebani200&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving 

failed, pausing
Retrieving page
158
https://www.boardgamegeek.com/xmlapi2/collection?username=zeldaar&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
35
https://www.boardgamegeek.com/xmlapi2/collection?username=zeldafire&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
78
https://www.boardgamegeek.com/xmlapi2/collection?username=zelekangela&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
64
https://www.boardgamegeek.com/xmlapi2/collection?username=zelekar&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
32
https://www.boardgamegeek.com/xmlapi2/collection?username=zelig2&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
35
https://www.boardgamegeek.com/xmlapi2/collection?username=zelitrex36&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
15
https://www.boardgamegeek.com/xmlapi2/collection?username=zeljkov020&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
46
https://www.boardgameg

failed, pausing
Retrieving page
1
https://www.boardgamegeek.com/xmlapi2/collection?username=zenwired&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
86
https://www.boardgamegeek.com/xmlapi2/collection?username=zenxacred&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
126
https://www.boardgamegeek.com/xmlapi2/collection?username=zenz&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
62
https://www.boardgamegeek.com/xmlapi2/collection?username=zeotter&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
failed, pausing
Retrieving page
failed, pausing
Retrieving page
failed, pausing
Retrieving page
failed, pausing
Retrieving page
failed, pausing
Retrieving page
failed, pausing
Retrieving page
failed, pausing
Retrieving page
failed, pausing
Retrieving page
failed, pausing
Retrieving page
failed, pausing
Retrieving page
failed, pausing
Retrieving page
failed, pausing
Retrieving page
failed, pausing
Retrieving page
failed, pausing
Retr

failed, pausing
Retrieving page
0
https://www.boardgamegeek.com/xmlapi2/collection?username=zeropointfield&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
5
https://www.boardgamegeek.com/xmlapi2/collection?username=zerorez&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
30
https://www.boardgamegeek.com/xmlapi2/collection?username=zerosatanmine&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
47
https://www.boardgamegeek.com/xmlapi2/collection?username=zerostasis&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
8
https://www.boardgamegeek.com/xmlapi2/collection?username=zeroth+hour&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
125
https://www.boardgamegeek.com/xmlapi2/collection?username=zerothefool&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
failed, pausing
Retrieving page
236
https://www.boardgamegeek.com/xmlapi2/collection?username=zerstorer88&rated=1&stats=1
Retrieving page
failed, pa

failed, pausing
Retrieving page
25
https://www.boardgamegeek.com/xmlapi2/collection?username=zhpaka&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
11
https://www.boardgamegeek.com/xmlapi2/collection?username=zhredder&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
0
https://www.boardgamegeek.com/xmlapi2/collection?username=zhstalt&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
failed, pausing
Retrieving page
171
https://www.boardgamegeek.com/xmlapi2/collection?username=zhuliang88bgg&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
13
https://www.boardgamegeek.com/xmlapi2/collection?username=zhuyirenaa&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
failed, pausing
Retrieving page
306
https://www.boardgamegeek.com/xmlapi2/collection?username=zhuzhuzhu2002&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
18
https://www.boardgamegeek.com/xmlapi2/collection?username=zhynn&rated=1&stats=1
Retriev

failed, pausing
Retrieving page
21
https://www.boardgamegeek.com/xmlapi2/collection?username=zingor1066&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
2
https://www.boardgamegeek.com/xmlapi2/collection?username=zinniye&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
55
https://www.boardgamegeek.com/xmlapi2/collection?username=zinnober&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
0
https://www.boardgamegeek.com/xmlapi2/collection?username=zinoblub&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
0
https://www.boardgamegeek.com/xmlapi2/collection?username=zinolau&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
101
https://www.boardgamegeek.com/xmlapi2/collection?username=zinovik&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
0
https://www.boardgamegeek.com/xmlapi2/collection?username=zinschj&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
12
https://www.boardgamegeek.com/xm

Retrieving page
failed, pausing
Retrieving page
222
https://www.boardgamegeek.com/xmlapi2/collection?username=zkl2003&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
31
https://www.boardgamegeek.com/xmlapi2/collection?username=zkolodin&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
9
https://www.boardgamegeek.com/xmlapi2/collection?username=zkovac65&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
56
https://www.boardgamegeek.com/xmlapi2/collection?username=zkress&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
1
https://www.boardgamegeek.com/xmlapi2/collection?username=zlandine&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
36
https://www.boardgamegeek.com/xmlapi2/collection?username=zlnemeth&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
117
https://www.boardgamegeek.com/xmlapi2/collection?username=zloki83&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
18
https://www.boa

failed, pausing
Retrieving page
16
https://www.boardgamegeek.com/xmlapi2/collection?username=zokvicds&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
1
https://www.boardgamegeek.com/xmlapi2/collection?username=zol72&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
44
https://www.boardgamegeek.com/xmlapi2/collection?username=zolarasec&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
128
https://www.boardgamegeek.com/xmlapi2/collection?username=zolarzardoz&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
1
https://www.boardgamegeek.com/xmlapi2/collection?username=zoldiz&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
136
https://www.boardgamegeek.com/xmlapi2/collection?username=zoliak&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
55
https://www.boardgamegeek.com/xmlapi2/collection?username=zolle001&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
failed, pausing
Retrieving page
2

failed, pausing
Retrieving page
0
https://www.boardgamegeek.com/xmlapi2/collection?username=zoojar&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
17
https://www.boardgamegeek.com/xmlapi2/collection?username=zooko&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
10
https://www.boardgamegeek.com/xmlapi2/collection?username=zoomom&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
1
https://www.boardgamegeek.com/xmlapi2/collection?username=zooney&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
22
https://www.boardgamegeek.com/xmlapi2/collection?username=zoooch25&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
152
https://www.boardgamegeek.com/xmlapi2/collection?username=zoop&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
failed, pausing
Retrieving page
272
https://www.boardgamegeek.com/xmlapi2/collection?username=zooradio&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
3
https://w

failed, pausing
Retrieving page
1
https://www.boardgamegeek.com/xmlapi2/collection?username=zsonody&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
0
https://www.boardgamegeek.com/xmlapi2/collection?username=zstoltz&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
61
https://www.boardgamegeek.com/xmlapi2/collection?username=zsusyt&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
51
https://www.boardgamegeek.com/xmlapi2/collection?username=zsuzs&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
17
https://www.boardgamegeek.com/xmlapi2/collection?username=ztfdrake61&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
13
https://www.boardgamegeek.com/xmlapi2/collection?username=ztilleto&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
109
https://www.boardgamegeek.com/xmlapi2/collection?username=ztimmons&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
39
https://www.boardgamegeek.com/xml

Retrieving page
failed, pausing
Retrieving page
failed, pausing
Retrieving page
failed, pausing
Retrieving page
failed, pausing
Retrieving page
failed, pausing
Retrieving page
failed, pausing
Retrieving page
failed, pausing
Retrieving page
failed, pausing
Retrieving page
1057
https://www.boardgamegeek.com/xmlapi2/collection?username=zvsmith&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
2
https://www.boardgamegeek.com/xmlapi2/collection?username=zwafke&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
0
https://www.boardgamegeek.com/xmlapi2/collection?username=zwalex&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
failed, pausing
Retrieving page
217
https://www.boardgamegeek.com/xmlapi2/collection?username=zwarbo&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
66
https://www.boardgamegeek.com/xmlapi2/collection?username=zwbsch&rated=1&stats=1
Retrieving page
failed, pausing
Retrieving page
26
https://www.boardgamegeek.com/x

## Check pickle

In [10]:
#ratings_dates = pd.read_pickle('userid/ratings_dates40.pkl')
user_ratings = pd.read_pickle('userid/user_ratings31.pkl')

In [20]:
# dictionary of game IDs-Names

# Load games
games = pd.read_pickle('data_cleaned/games.pkl')

# lists of game ids and game names
game_ids = list(games['BGGId'])
game_names = list(games['Name'])

# make lookup dictionary
game_id_lookup = {}

# store ids and names in lookup dictionary
for key, item in zip(game_ids, game_names):
    game_id_lookup[key] = item

    
del games
gc.collect()

game_id_lookup

{174430: 'Gloomhaven',
 161936: 'Pandemic Legacy: Season 1',
 224517: 'Brass: Birmingham',
 167791: 'Terraforming Mars',
 291457: 'Gloomhaven: Jaws of the Lion',
 233078: 'Twilight Imperium: Fourth Edition',
 220308: 'Gaia Project',
 187645: 'Star Wars: Rebellion',
 182028: 'Through the Ages: A New Story of Civilization',
 115746: 'War of the Ring: Second Edition',
 162886: 'Spirit Island',
 193738: 'Great Western Trail',
 12333: 'Twilight Struggle',
 169786: 'Scythe',
 84876: 'The Castles of Burgundy',
 173346: '7 Wonders Duel',
 120677: 'Terra Mystica',
 124361: 'Concordia',
 28720: 'Brass: Lancashire',
 167355: 'Nemesis',
 266192: 'Wingspan',
 177736: 'A Feast for Odin',
 205637: 'Arkham Horror: The Card Game',
 183394: 'Viticulture Essential Edition',
 164928: 'Orléans',
 237182: 'Root',
 96848: 'Mage Knight Board Game',
 316554: 'Dune: Imperium',
 199792: 'Everdell',
 3076: 'Puerto Rico',
 102794: 'Caverna: The Cave Farmers',
 175914: 'Food Chain Magnate',
 170216: 'Blood Rage',
 

In [28]:
this_user = pd.DataFrame(user_ratings.T['Threnody'].dropna(axis=0))
this_user.rename(columns={'Threnody':'Rating'}, inplace=True)
this_user.reset_index(inplace=True)
this_user['Game'] = this_user['index'].astype('int32').map(game_id_lookup)
this_user.drop('index', axis=1, inplace=True)
this_user.sort_values('Rating', ascending=False).head(30)

Unnamed: 0,Rating,Game
0,10.0,Gloomhaven
127,10.0,Roll Player
18,10.0,One Night Ultimate Werewolf
56,10.0,Magic: The Gathering
119,10.0,One Night Ultimate Werewolf: Daybreak
32,10.0,The Castles of Burgundy
124,10.0,Pandemic Legacy: Season 2
120,10.0,Pandemic Legacy: Season 1
157,10.0,Between Two Cities
170,10.0,


In [None]:
ratings_dates

##### ONLY RUN THIS TO RESET THE PICKLES ENTIRELY

This section entirely resets the pickle files on disk. Never run this unless intentional.

In [None]:
break

In [91]:
#ratings_dates = pd.DataFrame(columns=['Username', 'BGGId', 'Rating', 'Date_rated'])

#user_ratings = pd.DataFrame()

In [92]:
#ratings_dates.to_pickle('userid/ratings_dates.pkl')
#user_ratings.to_pickle('userid/user_ratings.pkl')