In [8]:
# Basic DS stuff
import numpy as np
import pandas as pd
import json

# Web scraping
import requests
from bs4 import BeautifulSoup
from urllib.request import urlopen
import asyncio

# For labeling records, tracking files, and formatting
from datetime import datetime
import time

# For Rick
import pickle

In [9]:
last_digit = 2

In [10]:
# NOTE: Use this cell to reset the scraper, making it forget all records.

# with open(f'../data/raw/game_mapped_users_{last_digit}.pkl', 'wb+') as file :
#     pickle.dump(set([]), file)

# with open(f'../data/raw/recently_played_{last_digit}.pkl', 'wb+') as file :
#     pickle.dump(set([]), file)

In [11]:
# Load data

with open('../data/raw/all_users', 'rb+') as file :
    all_users = set(pickle.load(file))

with open('../data/raw/game_mapped_users.pkl', 'rb+') as file :
    game_mapped_users = pickle.load(file)

with open(f'../data/raw/game_mapped_users_{last_digit}.pkl', 'rb+') as file :
    game_mapped_users_digit = pickle.load(file)
    
# with open('../data/raw/skipped_game_map_users', 'rb+') as file :
#     skipped_users = pickle.load(file)

In [12]:
# Create vars

holding_set = set()

relevant_users = all_users-game_mapped_users

relevant_users = relevant_users-game_mapped_users_digit

unmapped_users = [user for user in relevant_users if int(user[-1])==last_digit]

key = '83A112A706DF72CB0920A20F28F04252'

interval = 10

base_url = 'http://api.steampowered.com/IPlayerService/GetRecentlyPlayedGames/v0001/?key={}&steamid={}&format=json'

-------------
### This is the API version

In [13]:
async def get_games(user, url) :
    return (user, urlopen(url))

In [14]:
to_scrape = len(unmapped_users)

for i in range(0, to_scrape, interval) :
    try :
        iter_max = min(i+interval, to_scrape)
        users = [unmapped_users[i] for i in range(i, iter_max)]
        tasks = [get_games(user, base_url.format(key, user)) for user in users]
        responses = await asyncio.gather(*tasks)
        for response in responses :
            user = response[0]
            jason = json.load(response[1])
            games_list = jason['response']
            if len(games_list) > 0 :
                if games_list['total_count'] > 0 :
                    for game in games_list['games'] :
                        appid = game['appid']
                        playtime_2weeks = game['playtime_2weeks']
                        playtime_forever = game['playtime_forever']
                        holding_set.add((user, appid, playtime_2weeks, playtime_forever))
            game_mapped_users.add(user)
    except Exception as e:
        print(e)
        print(datetime.now())
        print('Trying again in 1 minute...')
        time.sleep(60)

    if i % 200 == 0 :
        with open(f'../data/raw/recently_played_{last_digit}.pkl', 'rb+') as file :
            total_set = pickle.load(file)
        total_set.update(holding_set)
        with open(f'../data/raw/recently_played_{last_digit}.pkl', 'wb+') as file :
            pickle.dump(total_set, file)
        with open(f'../data/raw/game_mapped_users_{last_digit}.pkl', 'wb+') as file :
            pickle.dump(game_mapped_users, file)
        print(f'Added: {len(holding_set)}')
        print(f"Saved so far: {len(total_set)}")
        holding_set = set()
        print(datetime.now())
        print('')


Added: 0
Saved so far: 1710
2024-04-03 17:54:14.557751

Added: 108
Saved so far: 1818
2024-04-03 17:54:44.243968

Added: 196
Saved so far: 2014
2024-04-03 17:55:13.704369

Added: 229
Saved so far: 2243
2024-04-03 17:55:48.657623

Added: 224
Saved so far: 2467
2024-04-03 17:56:20.756392

Added: 230
Saved so far: 2697
2024-04-03 17:56:51.126323

Added: 190
Saved so far: 2887
2024-04-03 17:57:20.091075

Added: 176
Saved so far: 3063
2024-04-03 17:57:49.037669

Added: 143
Saved so far: 3206
2024-04-03 17:58:18.178106

Added: 189
Saved so far: 3395
2024-04-03 17:58:47.889717

Added: 123
Saved so far: 3518
2024-04-03 17:59:16.594497

Added: 217
Saved so far: 3735
2024-04-03 17:59:45.739150

Added: 240
Saved so far: 3975
2024-04-03 18:00:16.173806

Added: 243
Saved so far: 4218
2024-04-03 18:00:44.601183

Added: 165
Saved so far: 4383
2024-04-03 18:01:13.257818

Added: 189
Saved so far: 4572
2024-04-03 18:01:41.385887

Added: 168
Saved so far: 4740
2024-04-03 18:02:09.449938

Added: 164
Saved

In [15]:
# This works but is deprecated. Saved as a potential reference.

# counter = 0
# game_counts = set()

# for i in range(100) :

#     user = all_users[i]

#     url = f'http://api.steampowered.com/IPlayerService/GetRecentlyPlayedGames/v0001/?key={key}&steamid={user}&format=json'
#     with urlopen(url) as response :
#         jason = json.load(response)
#     games_list = jason['response']
#     if len(games_list) > 0 :
#         if games_list['total_count'] > 0 :
#             print(jason)
#             print('\n')
#             counter +=1
#             game_counts.add(len(games_list))

# print(f'{counter} users had recently played games. Of them:')
# print(f'Avg games played: {sum(game_counts)/len(game_counts)}')
# print(f'Most games played: {max(game_counts)}')