In [114]:
# standard library imports
import csv
import datetime as dt
import json
import os
import statistics
import time

# third-party imports
import numpy as np
import pandas as pd
import requests
import xml.etree.ElementTree as ET
import pickle
# customisations - ensure tables show all columns
pd.set_option("max_columns", 100)

In [65]:
def get_request(url, parameters, retry_count = 0):
    """Return json-formatted response of a get request using optional parameters.
    
    Parameters
    ----------
    url : string
    parameters : {'parameter': 'value'}
        parameters to pass as part of get request
    
    Returns
    -------
    json_data
        json-formatted response (dict-like)
    """
    try:
        time.sleep(0.2)
        response = requests.get(url=url, params=parameters, verify=False)
    except requests.exceptions.SSLError as s:
        print('SSL Error:', s)
        
        for i in range(5, 0, -1):
            print('\rWaiting... ({})'.format(i), end='')
            time.sleep(1)
        print('\rRetrying.' + ' '*10)
        
        # recusively try again
        return get_request(url, parameters)
    except requests.exceptions.ConnectionError as c:
        print('Connection Reset Error:', c)
        
        if retry_count == 20:
            raise c
        time.sleep(5)
        return get_request(url, parameters, retry_count + 1)

    if response:
        return response.json()
    else:
        # response is none usually means too many requests. Wait and try again 
        print('No response, waiting 10 seconds...')
        time.sleep(10)
        print('Retrying.')
        return get_request(url, parameters)

In [75]:
finalFiliteredGames = pd.read_csv('Documents/SoSe2020/NEProject/Data/finalFilteredGames.csv')
listOfGameIds = finalFiliteredGames['appid'].to_list()
listOfGameIds

[440,
 550,
 570,
 620,
 659,
 730,
 1250,
 1840,
 2870,
 4000,
 4560,
 4920,
 4932,
 17570,
 17710,
 24010,
 24043,
 24049,
 24058,
 24085,
 25000,
 35450,
 35700,
 35720,
 35725,
 40960,
 40980,
 41014,
 41070,
 46770,
 48700,
 65800,
 65980,
 72850,
 96800,
 98600,
 98800,
 104900,
 107410,
 111800,
 113020,
 116100,
 200710,
 201420,
 203290,
 204300,
 205990,
 206420,
 207140,
 207170,
 208580,
 209060,
 209080,
 209540,
 209670,
 211820,
 214190,
 214360,
 214550,
 214770,
 218410,
 218680,
 219640,
 219740,
 220200,
 220700,
 221100,
 221380,
 221540,
 222880,
 224260,
 224440,
 224480,
 224500,
 224540,
 224820,
 225420,
 225600,
 226840,
 226860,
 227160,
 227300,
 227780,
 227860,
 228200,
 228380,
 228760,
 229870,
 230190,
 230230,
 230290,
 231430,
 232090,
 232890,
 232910,
 232950,
 233450,
 233610,
 233840,
 233860,
 234650,
 235800,
 236150,
 236850,
 237870,
 238430,
 238460,
 239070,
 239820,
 240440,
 240970,
 241240,
 241720,
 242130,
 243360,
 244030,
 244160,
 24

In [92]:
modderUserIds = pd.read_csv('Documents/SoSe2020/NEProject/Data/UserIds/ModderIds.csv')
modderUserIds
listOfModders = modderUserIds['Steam64Ids'].to_list()
listOfModders

[76561198015140861,
 76561198075720845,
 76561198044898225,
 76561198090784137,
 76561198018122283,
 76561198032326299,
 76561197993579336,
 76561198027947201,
 76561197984960890,
 76561198016477566,
 76561197980085804,
 76561198036862517,
 76561198084015153,
 76561198035252995,
 76561197992101053,
 76561198044470633,
 76561197965244290,
 76561197999008072,
 76561198045788203,
 76561198094206627,
 76561198136130631,
 76561198268809258,
 76561198243716042,
 76561198091715664,
 76561198094324696,
 76561198088228019,
 76561198317403826,
 76561198051384176,
 76561198184791242,
 76561198329948635,
 76561198017529845,
 76561198146671948,
 76561198153317742,
 76561198116667719,
 76561198027102120,
 76561198245928610,
 76561198100310532,
 76561198372548163,
 76561198363521862,
 76561198159288400,
 76561198099841193,
 76561198377984946,
 76561198029976238,
 76561198210302219,
 76561198127690244,
 76561198258016596,
 76561198272555412,
 76561198189863786,
 76561198102529465,
 76561197966032040,


In [118]:
allUserIds = pd.read_csv('Documents/SoSe2020/NEProject/Data/UserIds/userIdLists.csv')
allUserIds
listOfUserIds = allUserIds['Steam64IDs'].to_list()
listOfUserIds[len(listOfUserIds) -1]

76561198439696219

In [130]:
userDetailsDict = {}

In [137]:
# with open("Documents/SoSe2020/NEProject/Data/UserIds/userIdList.csv", "a") as f:
#     writer = csv.writer(f)
#     for i in range(245, 300):
#         url = "https://steamcommunity.com/groups/SteamClientBeta/memberslistxml"
#         parameters = {"xml": 1,
#                       "p": i }
#         r = requests.get(url=url, params=parameters, verify=False)
#         root = ET.fromstring(r.text)
#         for child in root:
#             if child.tag == 'members':
#                 for j in range(0,1000):
#                     id = [child[j].text]
#                     writer.writerow(id)
#         print("Pages done: " + str(i))
#         time.sleep(5)

# request 'all' from steam spy and parse into dataframe
#i = "76561198019610464"

apiKey = "A1EE4577A46C736ED59DF31E0B7DBA51"
userIdList = listOfUserIds
j = 0 
for i in userIdList[286986:300000]:
    j += 1
    url = "http://api.steampowered.com/IPlayerService/GetOwnedGames/v0001"
    parameters = {"key": apiKey,
                  "steamid": i,
                  "format": "json"}

    resJson = get_request(url, parameters=parameters)['response']
    # print(resJson)

    listGames = resJson.get('games', [])
    if len(listGames) != 0:
        appLists = []
        for each in listGames:
            if each['appid'] in listOfGameIds:
                appDict = {'appid': each['appid'],
                           'playtime_forever': each['playtime_forever']     
                           }
                appLists.append(appDict)
        userDetailsDict[i] = appLists
    if j % 100 == 0:
        print('Completed Percentage: ' + str(j))


with open('Documents/SoSe2020/NEProject/Data/userGames/userIdGames2v2.pkl', 'wb') as f:
    pickle.dump(userDetailsDict, f, pickle.HIGHEST_PROTOCOL)

        
# for each in list_json:
#     json_data[each["appid"]] = each
    
# steam_spy_all = pd.DataFrame.from_dict(json_data, orient='index')

# # generate sorted app_list from steamspy data
# app_list = steam_spy_all[['appid', 'name']].sort_values('appid').reset_index(drop=True)

# # export disabled to keep consistency across download sessions
# app_list.to_csv('Documents/SoSe2020/NEProject/Data/app_list_new.csv', index=False)
# # steam_spy_all.to_csv('Documents/SoSe2020/NEProject/Data/steam_spy_all_new.csv', index=False)

# # instead read from stored csv
# # app_list = pd.read_csv('Documents/SoSe2020/NEProject/Data/app_list.csv')


# # display first few rows
# # app_list.head()

Completed Percentage: 100
Completed Percentage: 200
Completed Percentage: 300
Completed Percentage: 400
Completed Percentage: 500
Completed Percentage: 600
Completed Percentage: 700
Completed Percentage: 800
Completed Percentage: 900
Completed Percentage: 1000
Completed Percentage: 1100
Completed Percentage: 1200
Completed Percentage: 1300
Completed Percentage: 1400
Completed Percentage: 1500
Completed Percentage: 1600
Completed Percentage: 1700
Completed Percentage: 1800
Completed Percentage: 1900
Completed Percentage: 2000
Completed Percentage: 2100
Completed Percentage: 2200
Completed Percentage: 2300
Completed Percentage: 2400
Completed Percentage: 2500
Completed Percentage: 2600
Completed Percentage: 2700
Completed Percentage: 2800
Completed Percentage: 2900
Completed Percentage: 3000
Completed Percentage: 3100
Completed Percentage: 3200
Completed Percentage: 3300
Completed Percentage: 3400
Completed Percentage: 3500
Completed Percentage: 3600
Completed Percentage: 3700
Completed 

In [135]:
i #99022

76561198365669877

In [133]:
keyList = []
for k in userDetailsDict.keys():
    keyList.append(k)
keyList

[]

In [132]:
for k in keyList[:15]:
    print(k)

In [136]:
with open('Documents/SoSe2020/NEProject/Data/userGames/userIdGames2v1.pkl', 'wb') as f:
        pickle.dump(userDetailsDict, f, pickle.HIGHEST_PROTOCOL)


In [None]:
'Documents/SoSe2020/NEProject/Data/userGames/modderIdsGames'