In [8]:
# Basic DS stuff
import numpy as np
import pandas as pd
import json

# Web scraping
import requests
from bs4 import BeautifulSoup
from urllib.request import urlopen
import asyncio

# For labeling records, tracking files, and formatting
from datetime import datetime
import time

# For Rick
import pickle

In [9]:
last_digit = 1

In [10]:
# NOTE: Use this cell to reset the scraper, making it forget all records.

# with open(f'../data/raw/game_mapped_users_{last_digit}.pkl', 'wb+') as file :
#     pickle.dump(set([]), file)

# with open(f'../data/raw/recently_played_{last_digit}.pkl', 'wb+') as file :
#     pickle.dump(set([]), file)

In [11]:
# Load data

with open('../data/raw/all_users', 'rb+') as file :
    all_users = set(pickle.load(file))

with open('../data/raw/game_mapped_users.pkl', 'rb+') as file :
    game_mapped_users = pickle.load(file)

with open(f'../data/raw/game_mapped_users_{last_digit}.pkl', 'rb+') as file :
    game_mapped_users_digit = pickle.load(file)
    
# with open('../data/raw/skipped_game_map_users', 'rb+') as file :
#     skipped_users = pickle.load(file)

In [12]:
# Create vars

holding_set = set()

relevant_users = all_users-game_mapped_users

relevant_users = relevant_users-game_mapped_users_digit

unmapped_users = [user for user in relevant_users if int(user[-1])==last_digit]

key = '83A112A706DF72CB0920A20F28F04252'

interval = 10

base_url = 'http://api.steampowered.com/IPlayerService/GetRecentlyPlayedGames/v0001/?key={}&steamid={}&format=json'

-------------
### This is the API version

In [13]:
async def get_games(user, url) :
    return (user, urlopen(url))

In [14]:
to_scrape = len(unmapped_users)

for i in range(0, to_scrape, interval) :
    try :
        iter_max = min(i+interval, to_scrape)
        users = [unmapped_users[i] for i in range(i, iter_max)]
        tasks = [get_games(user, base_url.format(key, user)) for user in users]
        responses = await asyncio.gather(*tasks)
        for response in responses :
            user = response[0]
            jason = json.load(response[1])
            games_list = jason['response']
            if len(games_list) > 0 :
                if games_list['total_count'] > 0 :
                    for game in games_list['games'] :
                        appid = game['appid']
                        playtime_2weeks = game['playtime_2weeks']
                        playtime_forever = game['playtime_forever']
                        holding_set.add((user, appid, playtime_2weeks, playtime_forever))
            game_mapped_users.add(user)
    except Exception as e:
        print(e)
        print(datetime.now())
        print('Trying again in 1 minute...')
        time.sleep(60)

    if i % 200 == 0 :
        with open(f'../data/raw/recently_played_{last_digit}.pkl', 'rb+') as file :
            total_set = pickle.load(file)
        total_set.update(holding_set)
        with open(f'../data/raw/recently_played_{last_digit}.pkl', 'wb+') as file :
            pickle.dump(total_set, file)
        with open(f'../data/raw/game_mapped_users_{last_digit}.pkl', 'wb+') as file :
            pickle.dump(game_mapped_users, file)
        print(f'Added: {len(holding_set)}')
        print(f"Saved so far: {len(total_set)}")
        holding_set = set()
        print(datetime.now())
        print('')


Added: 11
Saved so far: 1978
2024-04-03 17:54:11.931967

Added: 140
Saved so far: 2118
2024-04-03 17:54:40.446317

Added: 121
Saved so far: 2239
2024-04-03 17:55:11.090544

Added: 227
Saved so far: 2466
2024-04-03 17:55:42.864160

Added: 333
Saved so far: 2799
2024-04-03 17:56:17.804206

Added: 193
Saved so far: 2992
2024-04-03 17:56:48.534417

Added: 155
Saved so far: 3147
2024-04-03 17:57:16.772762

Added: 162
Saved so far: 3309
2024-04-03 17:57:45.731775

Added: 213
Saved so far: 3522
2024-04-03 17:58:14.553372

Added: 129
Saved so far: 3651
2024-04-03 17:58:44.014761

Added: 227
Saved so far: 3878
2024-04-03 17:59:14.336093

Added: 179
Saved so far: 4057
2024-04-03 17:59:42.645247

Added: 123
Saved so far: 4180
2024-04-03 18:00:14.031814

Added: 152
Saved so far: 4332
2024-04-03 18:00:43.566888

Added: 343
Saved so far: 4675
2024-04-03 18:01:11.992115

Added: 193
Saved so far: 4868
2024-04-03 18:01:40.257197

Added: 187
Saved so far: 5055
2024-04-03 18:02:08.381046

Added: 234
Save

In [15]:
# This works but is deprecated. Saved as a potential reference.

# counter = 0
# game_counts = set()

# for i in range(100) :

#     user = all_users[i]

#     url = f'http://api.steampowered.com/IPlayerService/GetRecentlyPlayedGames/v0001/?key={key}&steamid={user}&format=json'
#     with urlopen(url) as response :
#         jason = json.load(response)
#     games_list = jason['response']
#     if len(games_list) > 0 :
#         if games_list['total_count'] > 0 :
#             print(jason)
#             print('\n')
#             counter +=1
#             game_counts.add(len(games_list))

# print(f'{counter} users had recently played games. Of them:')
# print(f'Avg games played: {sum(game_counts)/len(game_counts)}')
# print(f'Most games played: {max(game_counts)}')