In [1]:
# Basic DS stuff
import numpy as np
import pandas as pd
import json

# Web scraping
import requests
from bs4 import BeautifulSoup
from urllib.request import urlopen
import asyncio

# For labeling records, tracking files, and formatting
from datetime import datetime
import time

# For Rick
import pickle

In [2]:
# NOTE: Use this cell to reset the scraper, making it forget all records.

# game_mapped_users = set([0])
# with open('../data/raw/game_mapped_users.pkl', 'rb+') as file :
#     pickle.dump(game_mapped_users, file)

# with open('../data/raw/recently_played.pkl', 'wb+') as file :
#     pickle.dump(set([0]), file)

In [3]:
# Load data

with open('../data/raw/all_users', 'rb+') as file :
    all_users = set(pickle.load(file))

with open('../data/raw/game_mapped_users.pkl', 'rb+') as file :
    game_mapped_users = pickle.load(file)
    
# with open('../data/raw/skipped_game_map_users', 'rb+') as file :
#     skipped_users = pickle.load(file)

In [4]:
# Create vars

holding_set = set()

unmapped_users = list(all_users-game_mapped_users)

key = '83A112A706DF72CB0920A20F28F04252'

interval = 10

base_url = 'http://api.steampowered.com/IPlayerService/GetRecentlyPlayedGames/v0001/?key={}&steamid={}&format=json'

-------------
### This is the API version

In [5]:
async def get_games(user, url) :
    return (user, urlopen(url))

In [6]:
to_scrape = len(unmapped_users)

for i in range(0, to_scrape, interval) :
    try :
        iter_max = min(i+interval, to_scrape)
        users = [unmapped_users[i] for i in range(i, iter_max)]
        tasks = [get_games(user, base_url.format(key, user)) for user in users]
        responses = await asyncio.gather(*tasks)
        for response in responses :
            user = response[0]
            jason = json.load(response[1])
            games_list = jason['response']
            if len(games_list) > 0 :
                if games_list['total_count'] > 0 :
                    for game in games_list['games'] :
                        appid = game['appid']
                        playtime_2weeks = game['playtime_2weeks']
                        playtime_forever = game['playtime_forever']
                        holding_set.add((user, appid, playtime_2weeks, playtime_forever))
            game_mapped_users.add(user)
    except Exception as e:
        print(e)
        print(datetime.now())
        print('Trying again in 1 minute...')
        time.sleep(60)

    if i % 200 == 0 :
        with open('../data/raw/recently_played.pkl', 'rb+') as file :
            total_set = pickle.load(file)
        total_set.update(holding_set)
        with open('../data/raw/recently_played.pkl', 'wb+') as file :
            pickle.dump(total_set, file)
        with open('../data/raw/game_mapped_users.pkl', 'wb+') as file :
            pickle.dump(game_mapped_users, file)
        print(f'Added: {len(holding_set)}')
        print(f"Saved so far: {len(total_set)}")
        holding_set = set()
        print(datetime.now())
        print('')


Added: 4
Saved so far: 1683062
2024-04-03 16:22:54.727017

Added: 208
Saved so far: 1683270
2024-04-03 16:23:50.661788

Added: 149
Saved so far: 1683419
2024-04-03 16:24:47.406699

Added: 244
Saved so far: 1683663
2024-04-03 16:25:43.620682

Added: 204
Saved so far: 1683867
2024-04-03 16:26:40.293120

Added: 136
Saved so far: 1684003
2024-04-03 16:27:51.209271

Added: 150
Saved so far: 1684153
2024-04-03 16:29:06.891358

Added: 242
Saved so far: 1684395
2024-04-03 16:30:06.272444

Added: 211
Saved so far: 1684606
2024-04-03 16:31:08.292874

Added: 185
Saved so far: 1684791
2024-04-03 16:32:11.387658

Added: 174
Saved so far: 1684965
2024-04-03 16:33:11.293408

Added: 160
Saved so far: 1685125
2024-04-03 16:34:05.905625

Added: 175
Saved so far: 1685300
2024-04-03 16:34:58.231284

Added: 157
Saved so far: 1685457
2024-04-03 16:35:54.492506

Added: 226
Saved so far: 1685683
2024-04-03 16:36:51.093181

Added: 137
Saved so far: 1685820
2024-04-03 16:37:49.491899

Added: 254
Saved so far: 1

CancelledError: 

In [None]:
# This works but is deprecated. Saved as a potential reference.

# counter = 0
# game_counts = set()

# for i in range(100) :

#     user = all_users[i]

#     url = f'http://api.steampowered.com/IPlayerService/GetRecentlyPlayedGames/v0001/?key={key}&steamid={user}&format=json'
#     with urlopen(url) as response :
#         jason = json.load(response)
#     games_list = jason['response']
#     if len(games_list) > 0 :
#         if games_list['total_count'] > 0 :
#             print(jason)
#             print('\n')
#             counter +=1
#             game_counts.add(len(games_list))

# print(f'{counter} users had recently played games. Of them:')
# print(f'Avg games played: {sum(game_counts)/len(game_counts)}')
# print(f'Most games played: {max(game_counts)}')