# Notebook Setup

In [1]:
import pandas as pd
pd.set_option('display.max_columns', None)
import numpy as np
from bs4 import BeautifulSoup
import requests
import regex as re
import time


from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import os


## Functions

In [2]:
def create_designers(game_page, game_id):
    all_designers = game_page.find_all('link', type='boardgamedesigner')
    
    designers = pd.DataFrame(columns=['BGGId'])
    design = {'BGGId':int(game_id)}
    
    for item in all_designers:
        design[item['value']] = int(1)
    
    designers = designers.append(design, ignore_index=True)
    
    return designers

In [3]:
def create_categories(game_page, game_id):
    all_categories = game_page.find_all('link', type='boardgamecategory')
    
    categories = pd.DataFrame(columns=['BGGId'])
    category = {'BGGId':int(game_id)}

    for item in all_categories:
        category[item['value']] = int(1)
    
    categories = categories.append(category, ignore_index=True)
    
    return categories

In [4]:
def create_mechanics(game_page, game_id):
    all_mechanics = game_page.find_all('link', type='boardgamemechanic')
    
    mechanics = pd.DataFrame(columns=['BGGId'])
    mechanic = {'BGGId':int(game_id)}

    for item in all_mechanics:
        mechanic[item['value']] = int(1)
    
    # Try Tableau
    try:
        game_page.find('link', type='boardgamefamily', value=("Mechanism: Tableau Building"))['value']
        mechanic['TableauBuilding'] = int(1)
    except: pass
    
    # Try is Legacy
    try:
        game_page.find('link', type='boardgamefamily', value=("Mechanism: Legacy"))['value']
        mechanic['Legacy'] = int(1)
    except: pass
    
    mechanics = mechanics.append(mechanic, ignore_index=True)
    
    return mechanics

In [5]:
def create_artists(game_page, game_id):
    
    all_artists = game_page.find_all('link', type='boardgameartist')
    
    artists = pd.DataFrame(columns=['BGGId'])
    artist = {'BGGId':int(game_id)}

    for item in all_artists:
        artist[item['value']] = int(1)
    
    artists = artists.append(artist, ignore_index=True)
    
    return artists

In [6]:
def create_publishers(game_page, game_id):
    all_publishers = game_page.find_all('link', type='boardgamepublisher')
    
    publishers = pd.DataFrame(columns=['BGGId'])
    publisher = {'BGGId':int(game_id)}

    for item in all_publishers:
        publisher[item['value']] = int(1)
    
    publishers = publishers.append(publisher, ignore_index=True)
    
    return publishers

In [7]:
def create_awards(awards_level, game_id):
    all_awards = awards_level.find_all('a', class_='ng-binding')
    
    awards = pd.DataFrame(columns=['BGGId'])
    award = {'BGGId':int(game_id)}

    for item in all_awards:
        item = re.sub("[0-9]", "", item.text).strip(' ')
        award[item] = int(1)
    
    awards = awards.append(award, ignore_index=True)
    
    return awards

In [8]:
def create_ratings_dist(stats_page, game_id):

    all_ratings = stats_page.find('ratings-stats-graph')
    next_ratings = all_ratings.find_all('text')

    ratings = pd.DataFrame(columns=['BGGId'])
    rating = {'BGGId':int(game_id)}
    
    rating['1'] = next_ratings[10].text
    rating['2'] = next_ratings[11].text
    rating['3'] = next_ratings[12].text
    rating['4'] = next_ratings[13].text
    rating['5'] = next_ratings[14].text
    rating['6'] = next_ratings[15].text
    rating['7'] = next_ratings[16].text
    rating['8'] = next_ratings[17].text
    rating['9'] = next_ratings[18].text
    rating['10'] = next_ratings[19].text
    
    ratings = ratings.append(rating, ignore_index=True)
    
    return ratings

In [9]:
def create_game_entry(game_id):
    '''Takes in a single game id and gets ALL THE THINGS about that game
    Returns 9 separate dataframes to be appended to larger frames'''
    
    
    start = time.time()# log the start time for this entry
    
    # get the game path using the game id, call the api and get the page
    path = 'https://www.boardgamegeek.com/xmlapi2/thing?id='+str(game_id)+'&stats=1&comments=1&ratingcomments=1&page=1&pagesize=100'
    page = requests.get(path) # get the page
    game_page = BeautifulSoup(page.content, 'xml') # parse the page with beautifulsoup
    
    # Print the game we're pulling data on
    game_name = game_page.find('name', type='primary')['value']
    print("Starting",game_name,game_id)
    
    # check that this game has sufficient user ratings to incluide
    user_ratings = int(game_page.find('usersrated')['value'])# get the number of user ratings
    if user_ratings < 30: #check if user ratings are under 30
        print("Not enough data to include this listing")# if so, print a decline message and exit the function
        return pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame()

    else:
        print("Getting basic stats")
        description = game_page.find('description').text # description text of the game
        year_pub = int(game_page.find('yearpublished')['value']) # year published
        minplayers = int(game_page.find('minplayers')['value']) # minimum players
        maxplayers = int(game_page.find('maxplayers')['value']) # maximum players
        avg_rating = float(game_page.find('average')['value']) # average rating
        bayes_avg = float(game_page.find('bayesaverage')['value']) # bayes average rating
        std_dev = float(game_page.find('stddev')['value']) # standard deviation of rating
        num_owned = int(game_page.find('owned')['value']) # num of people own this game
        num_want = int(game_page.find('wanting')['value']) # num of people want this game
        num_wish = int(game_page.find('wishing')['value']) # num of people with game on wishlist
        num_weight_votes = int(game_page.find('numweights')['value']) # num of votes for game weight
        game_weight = float(game_page.find('averageweight')['value']) # voted game weight
        try: image_path = game_page.find('image').text # path to image
        except: image_path = None
        mfg_play_time = int(game_page.find('playingtime')['value']) # mfg stated playtime
        comm_min_play = int(game_page.find('minplaytime')['value']) # community min playtime
        comm_max_play = int(game_page.find('maxplaytime')['value']) # community max playtime
        mfg_age = int(game_page.find('minage')['value']) # mfg min age
        num_comments = int(game_page.find('comments')['totalitems']) # num of ratings comments
        num_alts = len(game_page.find_all('name', type='alternate')) # number alternate versions
        num_expansions = len(game_page.find_all('link', type='boardgameexpansion')) # number of expansions
        num_implementations = len(game_page.find_all('link', type='boardgameimplementation')) # number of implementations
    
    
    
        # Get expansion flag
        gametype = game_page.find('item')['type'] # check game type
        if gametype == 'boardgameexpansion': expansion_flag = 1 # if game is an expansion, flag it 1
        else: expansion_flag=0
        
        
        
        # Get reimplementation flag
        reimplementation = game_page.find('link', type="boardgameimplementation", inbound="true") # check if game is a reimplementation
        if reimplementation: reimplements = 1 # if it's a reimplementation, flag it 1
        else: reimplements = 0
        
        
        
        # Get community age min
        age_poll = game_page.find('poll', title="User Suggested Player Age").find_all('result')

        total = 0
        items = 0

        for item in age_poll:   
            vote = int(item['numvotes']) * int(item['value'][:2])
            total += vote
            items += int(item['numvotes'])

        if items>0: comm_age = total/items # make sure not dividing by 0, get community recommended age
        else: comm_age=None # if no votes, record none
    
    
    
        # Language Ease
        lang_poll = game_page.find('poll', title="Language Dependence").find_all('result')
        total, items = 0, 0

        for item in lang_poll:   
            vote = int(item['numvotes']) * int(item['level'])
            total += vote
            items += int(item['numvotes'])

        if items>0: lang_ease = total/items # make sure not dividing by 0, get community language ease
        else: lang_ease=None # if no votes, record none
    
    
    
    
        # Best and Good Players
        players = game_page.find('poll', title="User Suggested Number of Players").find_all('results') # get user players poll
        player_num_votes = int(game_page.find('poll', title="User Suggested Number of Players")['totalvotes'])# get total votes
        
        best_players, best_score, good_players = 0, 0, [] # set up for best players loop
        
        if player_num_votes > 30: # evaluate if more than 30 votes for num players
            for player in players:
                best = int(player.find('result', value='Best')['numvotes'])
                rec = int(player.find('result', value='Recommended')['numvotes'])
                score = best*2 + rec*1
                positives = best+rec
                ratio = positives/player_num_votes
                if score > best_score: best_players, best_score = player['numplayers'], score # put in # players for best score
                if ratio > .5: good_players.append(player['numplayers']) # put in good players if over 50% ratio
        else: best_players=None
        
        
        
        # Use Selenium driver to scrape dynamic content
        # Set up Selenium drivers
        options = webdriver.ChromeOptions() # set up chrome options
        options.add_argument("--headless") # set up chrome options
        
        time.sleep(1) # wait 1 second
        
        print("New page retrieval for awards.")
        driver = webdriver.Chrome(options=options) # initiate chrome driver with options
        path = "https://boardgamegeek.com/boardgame/"+str(game_id)+'/' # determine path
        driver.get(path) # get path
        
        # Get scrape page for awards
        game_page_dynamic = BeautifulSoup(driver.page_source) #parse dynamic page with beautifulsoup
        
        # number of awards
        try:
            awards_level = game_page_dynamic.find('awards-module') # find awards on page
            awards = awards_level.find_all('a', class_='ng-binding') # get list of awards  
            num_awards = len(awards) # log number of awards
        except: pass
    
    
    
        # get stats page path
        print("New page retrieval for ratings distribution. May be waiting for chart to load.")
        time.sleep(1) 
        driver = webdriver.Chrome(options=options)# initiate chrome driver with options
        stats_path = game_page_dynamic.find('link')['href']+"/stats" #determine path
        driver.get(stats_path)# get path
        # wait until the driver finds the element that we need
        element = WebDriverWait(driver, 30).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, '#mainbody > div > div.global-body-content.pending.ready > div.content.ng-isolate-scope > div:nth-child(2) > ng-include > div > div > ui-view > ui-view > div > div > div.panel-body > div > div.col-sm-6.col-sm-push-6 > div > div.stats-graph > ratings-stats-graph > div > div > div:nth-child(1) > div > svg > g:nth-child(2) > g:nth-child(5)')))
        stats_page = BeautifulSoup(driver.page_source) # parse page with beautifulsoup
        
        
        # get number of user ratings, number of fans, number of views
        num_fans = int(stats_page.find_all('div', class_="outline-item-description")[5].text.replace(',','').strip(' ')) # get number of fans
        num_views = int(stats_page.find_all('div', class_="outline-item-description")[6].text.replace(',','').strip(' ')) # get number of page views
        

        
        # Get forum thread top-level info
        print('New page retrieval for forum ids.')
        forum_page = requests.get("https://www.boardgamegeek.com/xmlapi2/forumlist?id="+str(game_id)+"&type=thing") # get forum top level
        forums = BeautifulSoup(forum_page.content, 'xml') # parse with beautifulsoup
        
        total_threads = 0 # set threads to 0
        all_forums = forums.find_all('forum') # find all forum listings
        for item in all_forums: # for each listing in forum listings
            total_threads += int(item['numthreads']) #add the number of threads to total threads

        rules_threads = int(all_forums[3]['numthreads']) # get the number of Rules threads       
        
        print(game_id)
        # make dataframe for this game
        this_game = pd.DataFrame()
        
        this_game['BGGId']=int(game_id),
        this_game['Name']=game_name,
        this_game['Description']=description,
        this_game['YearPublished']=int(year_pub),
        this_game['GameWeight']=float(game_weight),
        this_game['AvgRating']=float(avg_rating),
        this_game['BayesAvgRating']=float(bayes_avg),
        this_game['StdDev']=float(std_dev),
        this_game['MinPlayers']=int(minplayers),
        this_game['MaxPlayers']=int(maxplayers),
        try: this_game['ComAgeRec']=float(comm_age),
        except: this_game['ComAgeRec']=None,
        try: this_game['LanguageEase']=float(lang_ease),
        except: this_game['LanguageEase']=None,
        this_game['BestPlayers']=best_players,
        this_game['GoodPlayers']=good_players,
        this_game['NumOwned']=int(num_owned),
        this_game['NumWant']=int(num_want),
        this_game['NumWish']=int(num_wish),
        this_game['NumWeightVotes']=int(num_weight_votes),
        this_game['MfgPlaytime']=int(mfg_play_time),
        this_game['ComMinPlaytime']=int(comm_min_play),
        this_game['ComMaxPlaytime']=int(comm_max_play),
        this_game['MfgAgeRec']=int(mfg_age),
        this_game['NumUserRatings']=int(user_ratings),
        this_game['NumComments']=int(num_comments),
        this_game['NumAlternates']=int(num_alts),
        this_game['NumExpansions']=int(num_expansions),
        this_game['NumAwards'] = int(num_awards)
        this_game['NumImplementations']=int(num_implementations),
        this_game['NumFans']=int(num_fans),
        this_game['NumPageViews']=int(num_views),
        this_game['RulesPosts']=int(rules_threads),
        this_game['TotalPosts']=int(total_threads),
        this_game['IsExpansion']=int(expansion_flag),
        this_game['IsReimplementation']=int(reimplements),
        this_game['ImagePath']=image_path
            
        
        # add unique information to end of df
        
        # Add game ranks
        ranks = game_page.find_all('rank')
        for item in ranks:
            this_game['Rank:'+item['name']] = float(item['value'])
        
        # Try to add components
        try: 
            families = game_page.find_all('link', type='boardgamefamily', value=re.compile("Component"))
            for item in families:
                this_game['Components:'+item['name']] = item['value']
        except: pass
        
        # Try to add game series/family
        try:
            family = game_page.find('link', type='boardgamefamily', value=re.compile("Game:"))['value'].strip('Game:').strip(' ')
            this_game['Family'] = family
        except: pass
        try:
            family = game_page.find('link', type='boardgamefamily', value=re.compile("Series:"))['value'].strip('Series:').strip(' ')
            this_game['Family'] = family
        except: pass
        
        # Try to add theme
        try:
            theme = game_page.find('link', type='boardgamefamily', value=re.compile("Theme:"))['value'].strip('Theme:').strip(' ')
            this_game['Theme'] = theme
        except: pass
        
        # Try to add game category
        try:
            category = game_page.find('link', type='boardgamefamily', value=re.compile("Category:"))['value'].strip('Category:').strip(' ')
            this_game['Category'] = category
        except: pass
        
        
        # Try is Kickstarted
        try:
            game_page.find('link', type='boardgamefamily', value=re.compile("Crowdfunding"))['value']
            this_game['Kickstarted'] = int(1)
        except: pass
        
        
        
        # create specialty dataframes
        print("Making specialty data frames")
        designers = create_designers(game_page, game_id)
        categories = create_categories(game_page, game_id)
        mechanics = create_mechanics(game_page, game_id)
        artists = create_artists(game_page, game_id)
        publishers = create_publishers(game_page, game_id)
        awards = create_awards(awards_level, game_id)
        ratings_dist = create_ratings_dist(stats_page, game_id)
        
        
        # Get comments - new api calls
        
        comment_pages = int(np.ceil(int(game_page.find('comments')['totalitems'])/100))
        comments = pd.DataFrame(columns=['BGGId', 'Name', 'Rating', 'Value', 'Username'])
        print("Getting comments. There are "+str(comment_pages)+" pages to parse.")
        
        bggid, names, ratings, comment, usernames = [], [], [], [], []

        
        for i in range(0, comment_pages):
            print("page "+str(i+1)+" of "+str(comment_pages))
            path2 = 'https://www.boardgamegeek.com/xmlapi2/thing?id='+str(game_id)+'&comments=1&ratingcomments=1&page='+str(i)+'&pagesize=100'
            all_comments = requests.get(path2)
            comments_page = BeautifulSoup(all_comments.content, 'xml')
            list_of_comments = comments_page.find_all('comment')
            for item in list_of_comments:
                bggid.append(int(game_id))
                names.append(game_name)
                ratings.append(item['rating'])
                comment.append(item['value'])
                usernames.append(item['username'].strip('_'))
            time.sleep(2)
            
        comments['BGGId'] = bggid    
        comments['Name'] = names
        comments['Rating'] = ratings
        comments['Value'] = comment
        comments['Username'] = usernames
        
        
        
        print(f'Time: {time.time() - start}')
        
        # Pause script
        randit = np.random.randint(1,3)
        time.sleep(1)
        
        return this_game, designers, categories, mechanics, artists, publishers, comments, awards, ratings_dist#, reviews

# Getting Game Categories

In [None]:
game_ids = pd.read_pickle('data_cleaned/game_ids.pkl')
drop_these = list(game_ids.loc[game_ids[0]=='xpansion'].index)
game_ids.drop(drop_these, axis=0, inplace=True)

In [66]:
categories_storage = pd.DataFrame(columns=['BGGId'])

In [67]:
start_position = 0
end_position = 500

while end_position < 25001:
    
    print("Getting "+str(start_position)+' through '+str(end_position-1))
    
    grab_list = game_ids[0][start_position:end_position]
    
    targets = ''
    for item in grab_list:
        targets += item+','

    start = time.time()# log the start time for this entry
    
    # get the game path using the game id, call the api and get the page
    path = 'https://www.boardgamegeek.com/xmlapi2/thing?id='+targets+'&stats=1'
    page = requests.get(path) # get the page
    game_page = BeautifulSoup(page.content, 'xml') # parse the page with beautifulsoup
    
    game_entries = game_page.find_all('item')
    
    for game_id, entry in zip(grab_list, game_entries):
    
        all_categories = entry.find_all('link', type='boardgamecategory')
    
        categories = pd.DataFrame(columns=['BGGId'])
        category = {'BGGId':int(game_id)}
    
        for item in all_categories:
            category[item['value']] = int(1)
        
        categories = categories.append(category, ignore_index=True)
        categories_storage = categories_storage.append(categories, ignore_index=True)
    
    start_position += 500
    end_position += 500
    
    time.sleep(3)

Getting 0 through 499
Getting 500 through 999
Getting 1000 through 1499
Getting 1500 through 1999
Getting 2000 through 2499
Getting 2500 through 2999
Getting 3000 through 3499
Getting 3500 through 3999
Getting 4000 through 4499
Getting 4500 through 4999
Getting 5000 through 5499
Getting 5500 through 5999
Getting 6000 through 6499
Getting 6500 through 6999
Getting 7000 through 7499
Getting 7500 through 7999
Getting 8000 through 8499
Getting 8500 through 8999
Getting 9000 through 9499
Getting 9500 through 9999
Getting 10000 through 10499
Getting 10500 through 10999
Getting 11000 through 11499
Getting 11500 through 11999
Getting 12000 through 12499
Getting 12500 through 12999
Getting 13000 through 13499
Getting 13500 through 13999
Getting 14000 through 14499
Getting 14500 through 14999
Getting 15000 through 15499
Getting 15500 through 15999
Getting 16000 through 16499
Getting 16500 through 16999
Getting 17000 through 17499
Getting 17500 through 17999
Getting 18000 through 18499
Getting 18

In [68]:
categories_storage.to_pickle('data_dirty/subcategories.pkl')

In [69]:
# for some reason these indices were missing when we loaded in to our cleaning file, so we are getting them here manually
categories_storage = pd.read_pickle('data_dirty/subcategories.pkl')

grab_list = [145599, 8317, 252197]
targets = '145599, 8317, 252197'

In [70]:
path = 'https://www.boardgamegeek.com/xmlapi2/thing?id='+targets+'&stats=1'
page = requests.get(path) # get the page
game_page = BeautifulSoup(page.content, 'xml') # parse the page with beautifulsoup
    
game_entries = game_page.find_all('item')
    
for game_id, entry in zip(grab_list, game_entries):
    
    all_categories = entry.find_all('link', type='boardgamecategory')
    
    categories = pd.DataFrame(columns=['BGGId'])
    category = {'BGGId':int(game_id)}
    
    for item in all_categories:
        category[item['value']] = int(1)
        
    categories = categories.append(category, ignore_index=True)
    categories_storage = categories_storage.append(categories, ignore_index=True)

In [71]:
categories_storage.to_pickle('data_dirty/subcategories.pkl')

# Game Scraping

In [32]:
columns = ['BGGId',
                'Name',
               'Description',
                'YearPublished',
                'GameWeight',
                'AvgRating',  
                'BayesAvgRating',
                'StdDev',
                'MinPlayers',
                'MaxPlayers',
                'ComAgeRec',
                'LanguageEase',
                'BestPlayers',
                'GoodPlayers',
                'NumOwned',
                'NumWant',
                'NumWish',
                'NumWeightVotes',
                'MfgPlaytime',
                'ComMinPlaytime',
                'ComMaxPlaytime',
                'MfgAgeRec',
                'NumUserRatings',
                'NumComments',
                'NumAlternates',
                'NumExpansions',
               'NumAwards',
                'NumImplementations',
               'NumFans',
               'NumPageViews',
               'RulesPosts',
               'TotalPosts',
               'IsExpansion',
           'IsReimplementation',
                'Family',
                'Theme',
               'Category',
               'Kickstarted',
               'ImagePath',
          ]

### "Fancy" Scraper. Keeps breaking.

In [65]:
game_ids = pd.read_pickle('data_cleaned/game_ids.pkl')
drop_these = list(game_ids.loc[game_ids[0]=='xpansion'].index)
game_ids.drop(drop_these, axis=0, inplace=True)

start_position = 0
end_position = 500
file_suffix = 0

while end_position < 150001:
    
    file_suffix += 1
    suffix_str = str(file_suffix)
    
    games = pd.DataFrame(columns=columns)
    designers = pd.DataFrame(columns=['BGGId'])
    categories = pd.DataFrame(columns=['BGGId'])
    mechanics = pd.DataFrame(columns=['BGGId'])
    artists = pd.DataFrame(columns=['BGGId'])
    publishers = pd.DataFrame(columns=['BGGId'])
    comments = pd.DataFrame(columns=['BGGId'])
    awards = pd.DataFrame(columns=['BGGId'])
    categories_storage = pd.DataFrame(columns=['BGGId'])
    
    print("Getting "+str(start_position)+' through '+str(end_position-1))
    
    grab_list = game_ids[0][start_position:end_position]
    
    targets = ''
    for item in grab_list:
        targets += item+','

    start = time.time()# log the start time for this entry
    print(start)
    
    # get the game path using the game id, call the api and get the page
    path = 'https://www.boardgamegeek.com/xmlapi2/thing?id='+targets+'&stats=1&comments=1' #&ratingcomments=1&page=1&pagesize=100
    page = requests.get(path) # get the page
    game_page = BeautifulSoup(page.content, 'xml') # parse the page with beautifulsoup
    
    game_entries = game_page.find_all('item')
    
    for game_id, entry in zip(grab_list, game_entries):
    
        all_subcategories = entry.find_all('link', type='boardgamecategory')
    
        subcategory = {'BGGId':int(game_id)}
    
        for item in all_subcategories:
            subcategory[item['value']] = int(1)       
        
        game_name = entry.find('name', type='primary')['value']
        #print(game_name,game_id)
        
        # check that this game has sufficient user ratings to incluide
        user_ratings = int(entry.find('usersrated')['value'])# get the number of user ratings
        if user_ratings < 30: #check if user ratings are under 30
            print("Not enough data to include this listing")# if so, print a decline message and exit the function
            continue
            #return pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
        
        else:
            #print("Getting basic stats")
            description = entry.find('description').text # description text of the game
            year_pub = int(entry.find('yearpublished')['value']) # year published
            minplayers = int(entry.find('minplayers')['value']) # minimum players
            maxplayers = int(entry.find('maxplayers')['value']) # maximum players
            avg_rating = float(entry.find('average')['value']) # average rating
            bayes_avg = float(entry.find('bayesaverage')['value']) # bayes average rating
            std_dev = float(entry.find('stddev')['value']) # standard deviation of rating
            num_owned = int(entry.find('owned')['value']) # num of people own this game
            num_want = int(entry.find('wanting')['value']) # num of people want this game
            num_wish = int(entry.find('wishing')['value']) # num of people with game on wishlist
            num_weight_votes = int(entry.find('numweights')['value']) # num of votes for game weight
            game_weight = float(entry.find('averageweight')['value']) # voted game weight
            try: image_path = entry.find('image').text # path to image
            except: image_path = None
            mfg_play_time = int(entry.find('playingtime')['value']) # mfg stated playtime
            comm_min_play = int(entry.find('minplaytime')['value']) # community min playtime
            comm_max_play = int(entry.find('maxplaytime')['value']) # community max playtime
            mfg_age = int(entry.find('minage')['value']) # mfg min age
            num_comments = int(entry.find('comments')['totalitems']) # num of ratings comments
            num_alts = len(entry.find_all('name', type='alternate')) # number alternate versions
            num_expansions = len(entry.find_all('link', type='boardgameexpansion')) # number of expansions
            num_implementations = len(entry.find_all('link', type='boardgameimplementation')) # number of implementations
            
            # Get expansion flag
            #gametype = entry.find('item')['type'] # check game type
            #if gametype == 'boardgameexpansion': expansion_flag = 1 # if game is an expansion, flag it 1
            #else: expansion_flag=0    
                
            # Get reimplementation flag
            reimplementation = entry.find('link', type="boardgameimplementation", inbound="true") # check if game is a reimplementation
            if reimplementation: reimplements = 1 # if it's a reimplementation, flag it 1
            else: reimplements = 0
            
            # Get community age min
            age_poll = entry.find('poll', title="User Suggested Player Age").find_all('result')

            total = 0
            items = 0

            for item in age_poll:   
                vote = int(item['numvotes']) * int(item['value'][:2])
                total += vote
                items += int(item['numvotes'])

            if items>0: comm_age = total/items # make sure not dividing by 0, get community recommended age
            else: comm_age=None # if no votes, record none
    
    
    
            # Language Ease
            lang_poll = entry.find('poll', title="Language Dependence").find_all('result')
            total, items = 0, 0

            for item in lang_poll:   
                vote = int(item['numvotes']) * int(item['level'])
                total += vote
                items += int(item['numvotes'])

            if items>0: lang_ease = total/items # make sure not dividing by 0, get community language ease
            else: lang_ease=None # if no votes, record none
    
    
    
    
            # Best and Good Players
            players = entry.find('poll', title="User Suggested Number of Players").find_all('results') # get user players poll
            player_num_votes = int(entry.find('poll', title="User Suggested Number of Players")['totalvotes'])# get total votes
        
            best_players, best_score, good_players = 0, 0, [] # set up for best players loop
        
            if player_num_votes > 30: # evaluate if more than 30 votes for num players
                for player in players:
                    best = int(player.find('result', value='Best')['numvotes'])
                    rec = int(player.find('result', value='Recommended')['numvotes'])
                    score = best*2 + rec*1
                    positives = best+rec
                    ratio = positives/player_num_votes
                    if score > best_score: best_players, best_score = player['numplayers'], score # put in # players for best score
                    if ratio > .5: good_players.append(player['numplayers']) # put in good players if over 50% ratio
            else: best_players=None
            
            
            
            #print(game_id)
            # make dataframe for this game
            this_game = pd.DataFrame()
        
            this_game['BGGId']=int(game_id),
            this_game['Name']=game_name,
            this_game['Description']=description,
            this_game['YearPublished']=int(year_pub),
            this_game['GameWeight']=float(game_weight),
            this_game['AvgRating']=float(avg_rating),
            this_game['BayesAvgRating']=float(bayes_avg),
            this_game['StdDev']=float(std_dev),
            this_game['MinPlayers']=int(minplayers),
            this_game['MaxPlayers']=int(maxplayers),
            try: this_game['ComAgeRec']=float(comm_age),
            except: this_game['ComAgeRec']=None,
            try: this_game['LanguageEase']=float(lang_ease),
            except: this_game['LanguageEase']=None,
            this_game['BestPlayers']=best_players,
            this_game['GoodPlayers']=good_players,
            this_game['NumOwned']=int(num_owned),
            this_game['NumWant']=int(num_want),
            this_game['NumWish']=int(num_wish),
            this_game['NumWeightVotes']=int(num_weight_votes),
            this_game['MfgPlaytime']=int(mfg_play_time),
            this_game['ComMinPlaytime']=int(comm_min_play),
            this_game['ComMaxPlaytime']=int(comm_max_play),
            this_game['MfgAgeRec']=int(mfg_age),
            this_game['NumUserRatings']=int(user_ratings),
            this_game['NumComments']=int(num_comments),
            this_game['NumAlternates']=int(num_alts),
            this_game['NumExpansions']=int(num_expansions),
            #this_game['NumAwards'] = int(num_awards)
            this_game['NumImplementations']=int(num_implementations),
            #this_game['NumFans']=int(num_fans),
            #this_game['NumPageViews']=int(num_views),
            #this_game['RulesPosts']=int(rules_threads),
            #this_game['TotalPosts']=int(total_threads),
            #this_game['IsExpansion']=int(expansion_flag),
            this_game['IsReimplementation']=int(reimplements),
            this_game['ImagePath']=image_path
            
        
            # add unique information to end of df
        
            # Add game ranks
            ranks = entry.find_all('rank')
            for item in ranks:
                this_game['Rank:'+item['name']] = float(item['value'])
        
            # Try to add components
            try: 
                families = entry.find_all('link', type='boardgamefamily', value=re.compile("Component"))
                for item in families:
                    this_game['Components:'+item['name']] = item['value']
            except: pass
        
            # Try to add game series/family
            try:
                family = entry.find('link', type='boardgamefamily', value=re.compile("Game:"))['value'].strip('Game:').strip(' ')
                this_game['Family'] = family
            except: pass
            
            try:
                family = entry.find('link', type='boardgamefamily', value=re.compile("Series:"))['value'].strip('Series:').strip(' ')
                this_game['Family'] = family
            except: pass
            
            try:
                setting = entry.find('link', type='boardgamefamily', value=re.compile("Setting:"))['value'].strip('Setting:').strip(' ')
                this_game['Setting'] = setting
            except: pass
            
        
            # Try to add theme
            try:
                theme = entry.find('link', type='boardgamefamily', value=re.compile("Theme:"))['value'].strip('Theme:').strip(' ')
                this_game['Theme'] = theme
            except: pass
            
            try:
                mechanism = entry.find('link', type='boardgamefamily', value=re.compile("Mechanism:"))['value'].strip('Mechanism:').strip(' ')
                this_game['Mechanism'] = mechanism
            except: pass
        
            # Try to add game category
            try:
                category = entry.find('link', type='boardgamefamily', value=re.compile("Category:"))['value'].strip('Category:').strip(' ')
                this_game['Category'] = category
            except: pass
        
        
            # Try is Kickstarted
            try:
                entry.find('link', type='boardgamefamily', value=re.compile("Crowdfunding"))['value']
                this_game['Kickstarted'] = int(1)
            except: pass
        
        
        
            # create specialty dataframes
            #print("Making specialty data frames")
            designer = create_designers(entry, game_id)
            category = create_categories(entry, game_id)
            mechanic = create_mechanics(entry, game_id)
            artist = create_artists(entry, game_id)
            publisher = create_publishers(entry, game_id)
            #awards = create_awards(awards_level, game_id)
            #ratings_dist = create_ratings_dist(stats_page, game_id)
            
            categories_storage = categories_storage.append(subcategory, ignore_index=True)
            games = games.append(this_game, ignore_index = True)
            designers = designers.append(designer, ignore_index=True)
            categories = categories.append(category, ignore_index=True)
            mechanics = mechanics.append(mechanic, ignore_index=True)
            artists = artists.append(artist, ignore_index=True)
            publishers = publishers.append(publisher, ignore_index=True)
            #comments = comments.append(comment, ignore_index=True)
            #awards = awards.append(award, ignore_index=True)
            #ratings_dist = ratings_dist.append(ratings_dist_1, ignore_index=True)
            
    games.to_pickle('games'+suffix_str+'.pkl')
    designers.to_pickle('designers'+suffix_str+'.pkl')
    categories.to_pickle('categories'+suffix_str+'.pkl')
    mechanics.to_pickle('mechanics'+suffix_str+'.pkl')
    artists.to_pickle('artists'+suffix_str+'.pkl')
    publishers.to_pickle('publishers'+suffix_str+'.pkl')
        #comments.to_pickle('comments'+suffix_str+'.pkl')
        #awards.to_pickle('awards'+suffix_str+'.pkl')
        #ratings_dist.to_pickle('ratings_dist'+suffix_str+'.pkl')
        #reviews.to_pickle('reviews'+suffix_str+'.pkl')
    categories_storage.to_pickle('subcategories'+suffix_str+'.pkl')
                
    print(f'Time: {time.time() - start}')       
    
    start_position += 500
    end_position += 500
    
    time.sleep(5)
    
    

Getting 0 through 499
1637908614.604395
Time: 70.56513285636902
Getting 500 through 999
1637908685.181539
Time: 60.24852681159973
Getting 1000 through 1499
1637908745.4370723
Time: 60.25000977516174
Getting 1500 through 1999
1637908805.6940882
Time: 60.245524644851685
Getting 2000 through 2499
1637908865.9466183
Time: 112.40582752227783
Getting 2500 through 2999
1637908978.3614535


ChunkedEncodingError: ("Connection broken: InvalidChunkLength(got length b'', 0 bytes read)", InvalidChunkLength(got length b'', 0 bytes read))

### Original Scraper

In [None]:
game_ids = pd.read_pickle('data_cleaned/game_ids.pkl')
scrape = list(game_ids[0][21409:])
scrape

In [33]:
games = pd.read_pickle('games.pkl')
designers = pd.read_pickle('designers.pkl')
categories = pd.read_pickle('categories.pkl')
mechanics = pd.read_pickle('mechanics.pkl')
artists = pd.read_pickle('artists.pkl')
publishers = pd.read_pickle('publishers.pkl')
comments = pd.read_pickle('comments.pkl')
awards = pd.read_pickle('awards.pkl')
ratings_dist = pd.read_pickle('ratings_dist.pkl')
categories_storage = pd.read_pickle('subcategories.pkl')
reviews = pd.read_pickle('reviews.pkl')

In [None]:
for i in scrape:
  
    this_game, designer, category, mechanic, artist, publisher, comment, award, ratings_dist_1 = create_game_entry(i) #, review
    
    games = games.append(this_game, ignore_index = True)
    designers = designers.append(designer, ignore_index=True)
    categories = categories.append(category, ignore_index=True)
    mechanics = mechanics.append(mechanic, ignore_index=True)
    artists = artists.append(artist, ignore_index=True)
    publishers = publishers.append(publisher, ignore_index=True)
    comments = comments.append(comment, ignore_index=True)
    awards = awards.append(award, ignore_index=True)
    ratings_dist = ratings_dist.append(ratings_dist_1, ignore_index=True)


In [None]:
games

In [None]:
comments

In [None]:
'''games.to_pickle('games.pkl')
designers.to_pickle('designers.pkl')
categories.to_pickle('categories.pkl')
mechanics.to_pickle('mechanics.pkl')
artists.to_pickle('artists.pkl')
publishers.to_pickle('publishers.pkl')
comments.to_pickle('comments.pkl')
awards.to_pickle('awards.pkl')
ratings_dist.to_pickle('ratings_dist.pkl')
#reviews.to_pickle('reviews.pkl')'''

In [None]:
break

## Workspace

# Get game ids

In [72]:
game_ids = []

for i in range(1,1501):
    path = "https://boardgamegeek.com/browse/boardgame/page/"+str(i)
    print(path)
    page = requests.get(path)
    time.sleep(1)
    rank_titles = BeautifulSoup(page.content, 'html.parser')
    titles = rank_titles.find_all('a', class_='primary')
    for item in titles:
        game_id = item['href'].strip('https://boardgamegeek.com/boardgame/')
        game_id = re.sub("/.*$", "", game_id)
        game_ids.append(game_id)
    
    wait = np.random.randint(1,3)
    time.sleep(wait)

https://boardgamegeek.com/browse/boardgame/page/1
https://boardgamegeek.com/browse/boardgame/page/2
https://boardgamegeek.com/browse/boardgame/page/3
https://boardgamegeek.com/browse/boardgame/page/4
https://boardgamegeek.com/browse/boardgame/page/5
https://boardgamegeek.com/browse/boardgame/page/6
https://boardgamegeek.com/browse/boardgame/page/7
https://boardgamegeek.com/browse/boardgame/page/8
https://boardgamegeek.com/browse/boardgame/page/9
https://boardgamegeek.com/browse/boardgame/page/10
https://boardgamegeek.com/browse/boardgame/page/11
https://boardgamegeek.com/browse/boardgame/page/12
https://boardgamegeek.com/browse/boardgame/page/13
https://boardgamegeek.com/browse/boardgame/page/14
https://boardgamegeek.com/browse/boardgame/page/15
https://boardgamegeek.com/browse/boardgame/page/16
https://boardgamegeek.com/browse/boardgame/page/17
https://boardgamegeek.com/browse/boardgame/page/18
https://boardgamegeek.com/browse/boardgame/page/19
https://boardgamegeek.com/browse/boardga

https://boardgamegeek.com/browse/boardgame/page/161
https://boardgamegeek.com/browse/boardgame/page/162
https://boardgamegeek.com/browse/boardgame/page/163
https://boardgamegeek.com/browse/boardgame/page/164
https://boardgamegeek.com/browse/boardgame/page/165
https://boardgamegeek.com/browse/boardgame/page/166
https://boardgamegeek.com/browse/boardgame/page/167
https://boardgamegeek.com/browse/boardgame/page/168
https://boardgamegeek.com/browse/boardgame/page/169
https://boardgamegeek.com/browse/boardgame/page/170
https://boardgamegeek.com/browse/boardgame/page/171
https://boardgamegeek.com/browse/boardgame/page/172
https://boardgamegeek.com/browse/boardgame/page/173
https://boardgamegeek.com/browse/boardgame/page/174
https://boardgamegeek.com/browse/boardgame/page/175
https://boardgamegeek.com/browse/boardgame/page/176
https://boardgamegeek.com/browse/boardgame/page/177
https://boardgamegeek.com/browse/boardgame/page/178
https://boardgamegeek.com/browse/boardgame/page/179
https://boar

https://boardgamegeek.com/browse/boardgame/page/319
https://boardgamegeek.com/browse/boardgame/page/320
https://boardgamegeek.com/browse/boardgame/page/321
https://boardgamegeek.com/browse/boardgame/page/322
https://boardgamegeek.com/browse/boardgame/page/323
https://boardgamegeek.com/browse/boardgame/page/324
https://boardgamegeek.com/browse/boardgame/page/325
https://boardgamegeek.com/browse/boardgame/page/326
https://boardgamegeek.com/browse/boardgame/page/327
https://boardgamegeek.com/browse/boardgame/page/328
https://boardgamegeek.com/browse/boardgame/page/329
https://boardgamegeek.com/browse/boardgame/page/330
https://boardgamegeek.com/browse/boardgame/page/331
https://boardgamegeek.com/browse/boardgame/page/332
https://boardgamegeek.com/browse/boardgame/page/333
https://boardgamegeek.com/browse/boardgame/page/334
https://boardgamegeek.com/browse/boardgame/page/335
https://boardgamegeek.com/browse/boardgame/page/336
https://boardgamegeek.com/browse/boardgame/page/337
https://boar

https://boardgamegeek.com/browse/boardgame/page/477
https://boardgamegeek.com/browse/boardgame/page/478
https://boardgamegeek.com/browse/boardgame/page/479
https://boardgamegeek.com/browse/boardgame/page/480
https://boardgamegeek.com/browse/boardgame/page/481
https://boardgamegeek.com/browse/boardgame/page/482
https://boardgamegeek.com/browse/boardgame/page/483
https://boardgamegeek.com/browse/boardgame/page/484
https://boardgamegeek.com/browse/boardgame/page/485
https://boardgamegeek.com/browse/boardgame/page/486
https://boardgamegeek.com/browse/boardgame/page/487
https://boardgamegeek.com/browse/boardgame/page/488
https://boardgamegeek.com/browse/boardgame/page/489
https://boardgamegeek.com/browse/boardgame/page/490
https://boardgamegeek.com/browse/boardgame/page/491
https://boardgamegeek.com/browse/boardgame/page/492
https://boardgamegeek.com/browse/boardgame/page/493
https://boardgamegeek.com/browse/boardgame/page/494
https://boardgamegeek.com/browse/boardgame/page/495
https://boar

https://boardgamegeek.com/browse/boardgame/page/635
https://boardgamegeek.com/browse/boardgame/page/636
https://boardgamegeek.com/browse/boardgame/page/637
https://boardgamegeek.com/browse/boardgame/page/638
https://boardgamegeek.com/browse/boardgame/page/639
https://boardgamegeek.com/browse/boardgame/page/640
https://boardgamegeek.com/browse/boardgame/page/641
https://boardgamegeek.com/browse/boardgame/page/642
https://boardgamegeek.com/browse/boardgame/page/643
https://boardgamegeek.com/browse/boardgame/page/644
https://boardgamegeek.com/browse/boardgame/page/645
https://boardgamegeek.com/browse/boardgame/page/646
https://boardgamegeek.com/browse/boardgame/page/647
https://boardgamegeek.com/browse/boardgame/page/648
https://boardgamegeek.com/browse/boardgame/page/649
https://boardgamegeek.com/browse/boardgame/page/650
https://boardgamegeek.com/browse/boardgame/page/651
https://boardgamegeek.com/browse/boardgame/page/652
https://boardgamegeek.com/browse/boardgame/page/653
https://boar

https://boardgamegeek.com/browse/boardgame/page/793
https://boardgamegeek.com/browse/boardgame/page/794
https://boardgamegeek.com/browse/boardgame/page/795
https://boardgamegeek.com/browse/boardgame/page/796
https://boardgamegeek.com/browse/boardgame/page/797
https://boardgamegeek.com/browse/boardgame/page/798
https://boardgamegeek.com/browse/boardgame/page/799
https://boardgamegeek.com/browse/boardgame/page/800
https://boardgamegeek.com/browse/boardgame/page/801
https://boardgamegeek.com/browse/boardgame/page/802
https://boardgamegeek.com/browse/boardgame/page/803
https://boardgamegeek.com/browse/boardgame/page/804
https://boardgamegeek.com/browse/boardgame/page/805
https://boardgamegeek.com/browse/boardgame/page/806
https://boardgamegeek.com/browse/boardgame/page/807
https://boardgamegeek.com/browse/boardgame/page/808
https://boardgamegeek.com/browse/boardgame/page/809
https://boardgamegeek.com/browse/boardgame/page/810
https://boardgamegeek.com/browse/boardgame/page/811
https://boar

https://boardgamegeek.com/browse/boardgame/page/951
https://boardgamegeek.com/browse/boardgame/page/952
https://boardgamegeek.com/browse/boardgame/page/953
https://boardgamegeek.com/browse/boardgame/page/954
https://boardgamegeek.com/browse/boardgame/page/955
https://boardgamegeek.com/browse/boardgame/page/956
https://boardgamegeek.com/browse/boardgame/page/957
https://boardgamegeek.com/browse/boardgame/page/958
https://boardgamegeek.com/browse/boardgame/page/959
https://boardgamegeek.com/browse/boardgame/page/960
https://boardgamegeek.com/browse/boardgame/page/961
https://boardgamegeek.com/browse/boardgame/page/962
https://boardgamegeek.com/browse/boardgame/page/963
https://boardgamegeek.com/browse/boardgame/page/964
https://boardgamegeek.com/browse/boardgame/page/965
https://boardgamegeek.com/browse/boardgame/page/966
https://boardgamegeek.com/browse/boardgame/page/967
https://boardgamegeek.com/browse/boardgame/page/968
https://boardgamegeek.com/browse/boardgame/page/969
https://boar

https://boardgamegeek.com/browse/boardgame/page/1107
https://boardgamegeek.com/browse/boardgame/page/1108
https://boardgamegeek.com/browse/boardgame/page/1109
https://boardgamegeek.com/browse/boardgame/page/1110
https://boardgamegeek.com/browse/boardgame/page/1111
https://boardgamegeek.com/browse/boardgame/page/1112
https://boardgamegeek.com/browse/boardgame/page/1113
https://boardgamegeek.com/browse/boardgame/page/1114
https://boardgamegeek.com/browse/boardgame/page/1115
https://boardgamegeek.com/browse/boardgame/page/1116
https://boardgamegeek.com/browse/boardgame/page/1117
https://boardgamegeek.com/browse/boardgame/page/1118
https://boardgamegeek.com/browse/boardgame/page/1119
https://boardgamegeek.com/browse/boardgame/page/1120
https://boardgamegeek.com/browse/boardgame/page/1121
https://boardgamegeek.com/browse/boardgame/page/1122
https://boardgamegeek.com/browse/boardgame/page/1123
https://boardgamegeek.com/browse/boardgame/page/1124
https://boardgamegeek.com/browse/boardgame/pag

https://boardgamegeek.com/browse/boardgame/page/1262
https://boardgamegeek.com/browse/boardgame/page/1263
https://boardgamegeek.com/browse/boardgame/page/1264
https://boardgamegeek.com/browse/boardgame/page/1265
https://boardgamegeek.com/browse/boardgame/page/1266
https://boardgamegeek.com/browse/boardgame/page/1267
https://boardgamegeek.com/browse/boardgame/page/1268
https://boardgamegeek.com/browse/boardgame/page/1269
https://boardgamegeek.com/browse/boardgame/page/1270
https://boardgamegeek.com/browse/boardgame/page/1271
https://boardgamegeek.com/browse/boardgame/page/1272
https://boardgamegeek.com/browse/boardgame/page/1273
https://boardgamegeek.com/browse/boardgame/page/1274
https://boardgamegeek.com/browse/boardgame/page/1275
https://boardgamegeek.com/browse/boardgame/page/1276
https://boardgamegeek.com/browse/boardgame/page/1277
https://boardgamegeek.com/browse/boardgame/page/1278
https://boardgamegeek.com/browse/boardgame/page/1279
https://boardgamegeek.com/browse/boardgame/pag

https://boardgamegeek.com/browse/boardgame/page/1417
https://boardgamegeek.com/browse/boardgame/page/1418
https://boardgamegeek.com/browse/boardgame/page/1419
https://boardgamegeek.com/browse/boardgame/page/1420
https://boardgamegeek.com/browse/boardgame/page/1421
https://boardgamegeek.com/browse/boardgame/page/1422
https://boardgamegeek.com/browse/boardgame/page/1423
https://boardgamegeek.com/browse/boardgame/page/1424
https://boardgamegeek.com/browse/boardgame/page/1425
https://boardgamegeek.com/browse/boardgame/page/1426
https://boardgamegeek.com/browse/boardgame/page/1427
https://boardgamegeek.com/browse/boardgame/page/1428
https://boardgamegeek.com/browse/boardgame/page/1429
https://boardgamegeek.com/browse/boardgame/page/1430
https://boardgamegeek.com/browse/boardgame/page/1431
https://boardgamegeek.com/browse/boardgame/page/1432
https://boardgamegeek.com/browse/boardgame/page/1433
https://boardgamegeek.com/browse/boardgame/page/1434
https://boardgamegeek.com/browse/boardgame/pag

In [73]:
#game_ids = pd.DataFrame(game_ids)
#game_ids.to_pickle('data_dirty/big_game_ids.pkl')

## Create Storage Frames

DO NOT RUN AGAIN

In [None]:
break

In [None]:
'''columns = ['BGGId',
                'Name',
               'Description',
                'YearPublished',
                'GameWeight',
                'AvgRating',  
                'BayesAvgRating',
                'StdDev',
                'MinPlayers',
                'MaxPlayers',
                'ComAgeRec',
                'LanguageEase',
                'BestPlayers',
                'GoodPlayers',
                'NumOwned',
                'NumWant',
                'NumWish',
                'NumWeightVotes',
                'MfgPlaytime',
                'ComMinPlaytime',
                'ComMaxPlaytime',
                'MfgAgeRec',
                'NumUserRatings',
                'NumComments',
                'NumAlternates',
                'NumExpansions',
               'NumAwards',
                'NumImplementations',
               'NumFans',
               'NumPageViews',
               'RulesPosts',
               'TotalPosts',
               'IsExpansion',
           'IsReimplementation',
                'Family',
                'Theme',
               'Category',
               'Kickstarted',
               'ImagePath',
          ]

games = pd.DataFrame(columns=columns)
designers = pd.DataFrame(columns=['BGGId'])
categories = pd.DataFrame(columns=['BGGId'])
mechanics = pd.DataFrame(columns=['BGGId'])
artists = pd.DataFrame(columns=['BGGId'])
publishers = pd.DataFrame(columns=['BGGId'])
comments = pd.DataFrame(columns=['BGGId'])
awards = pd.DataFrame(columns=['BGGId'])
ratings_dist = pd.DataFrame(columns=['BGGId'])
reviews = pd.DataFrame(columns=['BGGId'])
categories_storage = pd.DataFrame(columns=['BGGId'])'''

In [None]:
'''games.to_pickle('games.pkl')
designers.to_pickle('designers.pkl')
categories.to_pickle('categories.pkl')
mechanics.to_pickle('mechanics.pkl')
artists.to_pickle('artists.pkl')
publishers.to_pickle('publishers.pkl')
comments.to_pickle('comments.pkl')
awards.to_pickle('awards.pkl')
ratings_dist.to_pickle('ratings_dist.pkl')
reviews.to_pickle('reviews.pkl')
categories_storage.to_pickle('subcategories.pkl')'''

# Deprecated Code Work

In [None]:
game_ids
temp = game_ids[0][:10]

targets = ''
for item in temp:
    targets += item+','

print(targets)