In [3]:
import json
import os
from datetime import datetime
from dateutil.relativedelta import relativedelta

import numpy as np
import pandas as pd

import requests
from dotenv import load_dotenv
from tqdm import tqdm
from bs4 import BeautifulSoup

## Liste mit professionellen Spielern

In [56]:
!curl "https://www.hltv.org/stats/players?startDate=2021-06-03&endDate=2022-06-03" > ../data/hltv-player-stats.html

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 1158k    0 1158k    0     0   365k      0 --:--:--  0:00:03 --:--:--  364k


In [1]:
!curl "https://steamcommunity.com/sharedfiles/filedetails/?id=2135169110" > ../data/steam-profiles.html

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  230k    0  230k    0     0   172k      0 --:--:--  0:00:01 --:--:--  172k


In [4]:
with open("../data/steam-profiles.html") as fp:
    soup = BeautifulSoup(fp, 'lxml')

In [17]:
soup.find_all("div", class_="subSectionDesc")[0]

<div class="subSectionDesc">
<a class="modalContentLink" data-modal-content-popup-url="https://steamcommunity.com/sharedfiles/displayimageformodaldialog/?imageurl=https%3A%2F%2Fsteamuserimages-a.akamaihd.net%2Fugc%2F1268274830573336804%2F114F0F8246DD3BB42C7DC059E2F9724673D24572%2F" href="https://steamuserimages-a.akamaihd.net/ugc/1268274830573336804/114F0F8246DD3BB42C7DC059E2F9724673D24572/"><img alt="" class="sharedFilePreviewImage sizeThumb inline" src="https://steamuserimages-a.akamaihd.net/ugc/1268274830573336804/114F0F8246DD3BB42C7DC059E2F9724673D24572/" title=""/></a><br/><br/>Magisk: <a class="bb_link" href="http://steamcommunity.com/profiles/76561197983956651" rel="noreferrer" target="_blank">http://steamcommunity.com/profiles/76561197983956651</a><br/>device: <a class="bb_link" href="http://steamcommunity.com/profiles/76561197987713664" rel="noreferrer" target="_blank">http://steamcommunity.com/profiles/76561197987713664</a><br/>dupreeh: <a class="bb_link" href="http://steamco

In [29]:
player_profiles = []
last_was_string = False
last_nickname = ''
for e in tqdm(soup.find_all("div", class_="subSectionDesc")):
    for c in e.contents:
        if isinstance(c, str):
            last_nickname = c
            last_was_string = True
        elif last_was_string:
            last_was_string = False
            if c.text.startswith("http://steamcommunity.com/profiles/"):
                player_profiles.append({"Nickname": last_nickname, "Profile": c.text})
                last_nickname = ''
profiles_df = pd.DataFrame(player_profiles)
profiles_df

100%|██████████| 94/94 [00:00<00:00, 39529.23it/s]


Unnamed: 0,Nickname,Profile
0,Magisk:,http://steamcommunity.com/profiles/76561197983...
1,device:,http://steamcommunity.com/profiles/76561197987...
2,dupreeh:,http://steamcommunity.com/profiles/76561198004...
3,Snappi:,http://steamcommunity.com/profiles/76561197989...
4,Xyp9x:,http://steamcommunity.com/profiles/76561197990...
...,...,...
144,gob b:,http://steamcommunity.com/profiles/76561197961...
145,Maniac:,http://steamcommunity.com/profiles/76561197960...
146,mistou:,http://steamcommunity.com/profiles/76561197997...
147,B1ad3:,http://steamcommunity.com/profiles/76561198013...


In [28]:
with open("../data/hltv-player-stats.html") as fp:
    soup = BeautifulSoup(fp, 'lxml')
stats_table = soup.find_all("table", class_="stats-table player-ratings-table")[0]
stat_rows = stats_table.tbody.find_all('tr')

player_ratings = []
for stat_row in stat_rows:
    nickname = stat_row.find_all("td", class_="playerCol")[0].a.text
    rating = float(stat_row.find_all("td", class_="ratingCol")[0].text)
    player_ratings.append({"Nickname": nickname, "HLTV Rating": rating})
nicknames_df = pd.DataFrame(player_ratings)
nicknames_df = nicknames_df.drop_duplicates("Nickname", ignore_index=True)
nicknames_df.to_feather("../data/player_ratings.feather")
nicknames_df

Unnamed: 0,Nickname,HLTV Rating
0,s1mple,1.34
1,ZywOo,1.28
2,sh1ro,1.26
3,NiKo,1.25
4,XANTARES,1.25
...,...,...
417,HooXi,0.89
418,ScrunK,0.89
419,friberg,0.88
420,ADEJIS,0.87


## Query Daten von FACEIT

Die Statistiken aller `Matches` von einer Reihe an professionellen Spielern soll über die [FACEIT Data API](https://developers.faceit.com/docs/tools/data-api) erfasst werden.

In [58]:
load_dotenv()
headers = {"Authorization": f"Bearer {os.environ['FACEIT_API_KEY']}"}

Eine Liste mit Nicknamen von professionellen Spielern wurde bezogen von [CSGO Buff](https://csgobuff.pro/player/?page=5&country=all). Diese wurden Manuell in die Datei `data/professional-player-list.txt` eingetragen. Insgesamt wurden die Nicknamen von 140 professionellen Spielern erfasst.

In [59]:
#nicknames_of_professionals = [line.strip() for line in open("../data/professional-player-list.txt").readlines()]
# Anzahl an Spielern, Die Nicknamen der ersten 5 Spieler
#len(nicknames_of_professionals), nicknames_of_professionals[:5]

Die FACEIT Spieler IDs von den 140 Spielern werden über die [FACEIT Data API](https://developers.faceit.com/docs/tools/data-api) abgerufen.

In [60]:
res = []
for i, row in tqdm(nicknames_df.iterrows(), "Getting player information", total=nicknames_df.shape[0]):
    nickname = row["Nickname"]
    r = requests.get('https://open.faceit.com/data/v4/players', params={'nickname': nickname}, headers=headers)
    res.append(r.json())
json.dump(res, open("../data/response-professional-players.json", "w"), sort_keys=True)
# Beispiel Response vom Server
res[0]

Getting player information: 100%|██████████| 423/423 [02:04<00:00,  3.40it/s]


{'player_id': 'ac71ba3c-d3d4-45e7-8be2-26aa3986867d',
 'nickname': 's1mple',
 'avatar': 'https://assets.faceit-cdn.net/avatars/ac71ba3c-d3d4-45e7-8be2-26aa3986867d_1581521810013.jpg',
 'country': 'ua',
 'cover_image': 'https://assets.faceit-cdn.net/users_covers/ac71ba3c-d3d4-45e7-8be2-26aa3986867d_1550753463686.jpg',
 'platforms': {'steam': 'STEAM_1:1:36968273'},
 'games': {'csgo': {'region': 'EU',
   'game_player_id': '76561198034202275',
   'skill_level': 10,
   'faceit_elo': 4356,
   'game_player_name': 's1',
   'skill_level_label': '',
   'regions': {},
   'game_profile_id': ''}},
 'settings': {'language': 'en'},
 'friends_ids': ['71cb86b2-8bfc-42a7-809a-999ecb850577',
  'ef2b7cb4-acb2-4144-9c3f-6fcd9200f9cdgui',
  'ef2b7cb4-acb2-4144-9c3f-6fcd9200f9cd',
  'ef2b7cb4-acb2-4144-9c3f-6fcd9200f9cd',
  '0ca93b88-3ec7-4e1e-8b2d-3ebfe31c3414',
  'e20b949c-d784-4d57-94ef-fb0bc92ad7cd',
  'c3136a69-204a-4dfb-9766-ba6e954ecfcc',
  'a62034c1-bfc8-41cf-af38-2564a48bfe71',
  '011a79c8-e237-47d7

In [61]:
# Welche Nicknamen konnten nicht gefunden werden?
infos_of_professionals = json.load(open("../data/response-professional-players.json"))
unfound = []
for i, row in tqdm(nicknames_df.iterrows(), "Getting player information", total=nicknames_df.shape[0]):
    nickname = row["Nickname"]
    r = infos_of_professionals[i]
    try:
        r['player_id']
    except KeyError:
        # Player was not found
        unfound.append(nickname)
len(unfound)

Getting player information: 100%|██████████| 423/423 [00:00<00:00, 34412.88it/s]


137

41 Spieler konnten nicht über ihre Nicknamen gefunden werden. Dies kann verschiedene Gründe haben: Manche Spieler varieren die Schreibweise ihrer Nicknamen, so nennt sich der Spieler GuardiaN manchmal auch -GuardiaN. Ebenso sind manche Spieler bewusst unter anderem Nicknamen bei FACEIT registriert um nicht erkannt zu werden. Somit bleiben 99 professionelle Spieler für die kommende Analyse.

Die Informationen zu allen Spielen aus dem letzten Jahr von den übrigen 99 spielen werden abgerufen.

In [62]:
infos_of_professionals = json.load(open("../data/response-professional-players.json"))
today = datetime.today()
last_year = datetime.today() - relativedelta(years=1)
res = []
for player_info in tqdm(infos_of_professionals, "Getting player match-ids"):
    try:
        player_id = player_info['player_id']
    except KeyError:
        # Player was not found
        continue
    params = {
        'game': 'csgo',
        'from': last_year.timestamp(),
        'to': today.timestamp(),
        'limit': 100
    }
    r = requests.get(f'https://open.faceit.com/data/v4/players/{player_id}/history', headers=headers, params=params)
    res.append(r.json())
json.dump(res, open("../data/response-professional-player-match_infos.json", "w"), sort_keys=True)
res[:5]

Getting player match-ids: 100%|██████████| 423/423 [02:14<00:00,  3.15it/s]


[{'items': [{'match_id': '1-85b41bc9-8ebd-41e6-908f-f33cc55aee53',
    'game_id': 'csgo',
    'region': 'EU',
    'match_type': '',
    'game_mode': '5v5',
    'max_players': 10,
    'teams_size': 5,
    'teams': {'faction1': {'team_id': 'ac71ba3c-d3d4-45e7-8be2-26aa3986867d',
      'nickname': 'team_s1mple',
      'avatar': 'https://assets.faceit-cdn.net/avatars/ac71ba3c-d3d4-45e7-8be2-26aa3986867d_1581521810013.jpg',
      'type': '',
      'players': [{'player_id': '9a8979c1-d78c-40ea-a0e8-76685761b25e',
        'nickname': 'dotoooo',
        'avatar': 'https://assets.faceit-cdn.net/avatars/9a8979c1-d78c-40ea-a0e8-76685761b25e_1550489171090.png',
        'skill_level': 10,
        'game_player_id': '76561197999584343',
        'game_player_name': 'doto',
        'faceit_url': 'https://www.faceit.com/{lang}/players/dotoooo'},
       {'player_id': 'ac71ba3c-d3d4-45e7-8be2-26aa3986867d',
        'nickname': 's1mple',
        'avatar': 'https://assets.faceit-cdn.net/avatars/ac71ba3c-d3d

In [63]:
match_infos = json.load(open("../data/response-professional-player-match_infos.json"))
match_ids = [match_info['match_id'] for res_info in match_infos for match_info in res_info['items']]
len(match_ids)

21205

Es wurden 7896 Spiele gefunden. Zu diesen werden die Statistiken der 

In [64]:
match_infos = json.load(open("../data/response-professional-player-match_infos.json"))
res = []
match_ids = [match_info['match_id'] for res_info in match_infos for match_info in res_info['items']]
for match_id in tqdm(match_ids, "Getting match stats"):
    r = requests.get(f'https://open.faceit.com/data/v4/matches/{match_id}/stats', headers=headers)
    res.append(r.json())
json.dump(res, open("../data/response-professional-player-match_stats.json", "w"), sort_keys=True)
res[:5]

Getting match stats: 100%|██████████| 21205/21205 [1:17:55<00:00,  4.54it/s] 


[{'rounds': [{'best_of': '2',
    'competition_id': None,
    'game_id': 'csgo',
    'game_mode': '5v5',
    'match_id': '1-85b41bc9-8ebd-41e6-908f-f33cc55aee53',
    'match_round': '1',
    'played': '1',
    'round_stats': {'Score': '13 / 16',
     'Winner': '39056d07-cdc5-4613-906d-e900a1f781a2',
     'Rounds': '29',
     'Region': 'EU',
     'Map': 'de_mirage'},
    'teams': [{'team_id': 'ac71ba3c-d3d4-45e7-8be2-26aa3986867d',
      'premade': False,
      'team_stats': {'First Half Score': '9',
       'Team': 'team_s1mple',
       'Final Score': '13',
       'Second Half Score': '4',
       'Team Win': '0',
       'Team Headshots': '9.4',
       'Overtime score': '0'},
      'players': [{'player_id': '9a8979c1-d78c-40ea-a0e8-76685761b25e',
        'nickname': 'dotoooo',
        'player_stats': {'K/R Ratio': '0.48',
         'Assists': '2',
         'Headshots %': '50',
         'Result': '0',
         'K/D Ratio': '0.7',
         'Deaths': '20',
         'Quadro Kills': '0',
     

## JSON-Responses in DataFrames konvertieren



In [3]:
match_stats = json.load(open("../data/response-professional-player-match_stats.json"))
len(match_stats)

21205

`match_stats` ist eine Liste aus Dicts mit einem einzigen key: `rounds`.
Wie viele Runden kann ein Match haben? 

In [4]:
num_rounds_min = np.inf
num_rounds_max = 0
num_rounds_sum = 0
unfound_matches = []
for stat in match_stats:
    try:
        n = len(stat['rounds'])
    except KeyError:
        unfound_matches.append(stat)
        continue
    if n > num_rounds_max:
        num_rounds_max = n
    if n < num_rounds_min:
        num_rounds_min = n
    num_rounds_sum += n
num_rounds_mean = num_rounds_sum / len(match_stats)
num_rounds_min, num_rounds_max, num_rounds_sum, num_rounds_mean

(1, 5, 22030, 1.038905918415468)

In [5]:
len(unfound_matches)

350

Jedes Match hat mindestens eine Runde und maximal vier. Insgesamt hat der Datensatz 8312 Runden, diese werden einzeln behandelt. 130 von den 7896 Matches konnten nicht gefunden werden und werden ignoriert.

In [6]:
round_stats = [round_stat for stat in match_stats if 'rounds' in stat.keys() for round_stat in stat['rounds']]
round_keys = {}
for stat in round_stats:
    for k, v in stat.items():
        if k not in round_keys and k != 'teams':
            round_keys[k]= v
round_keys

{'best_of': '2',
 'competition_id': None,
 'game_id': 'csgo',
 'game_mode': '5v5',
 'match_id': '1-85b41bc9-8ebd-41e6-908f-f33cc55aee53',
 'match_round': '1',
 'played': '1',
 'round_stats': {'Map': 'de_mirage',
  'Region': 'EU',
  'Rounds': '29',
  'Score': '13 / 16',
  'Winner': '39056d07-cdc5-4613-906d-e900a1f781a2'}}

Attribute von Interesse: `game_mode`, `round_stats`, `teams`. Entfernen aller nicht 5v5-Matches:

In [7]:
round_stats = [stat for stat in round_stats if stat['game_mode'] == "5v5"]
len(round_stats)

21807

Nach entfernen der Nicht-5v5 Matches bleiben 8079 Runden. Aus diesen können nun Features zu jedem Spieler gezogen werden.

In [8]:
team_keys = {}
for round_stat in round_stats:
    # Define round-specific attributes / features
    for team in round_stat['teams']:
        for k, v in team.items():
            if k not in team_keys and k != 'players':
                team_keys[k] = v
team_keys
        

{'premade': False,
 'team_id': 'ac71ba3c-d3d4-45e7-8be2-26aa3986867d',
 'team_stats': {'Final Score': '13',
  'First Half Score': '9',
  'Overtime score': '0',
  'Second Half Score': '4',
  'Team': 'team_s1mple',
  'Team Headshots': '9.4',
  'Team Win': '0'}}

In [9]:
player_keys = {}
for round_stat in round_stats:
    # Define round-specific attributes / features
    for team in round_stat['teams']:
        for player in team['players']:
            for k, v in player.items():
                if k not in player_keys:
                    player_keys[k] = v
player_keys

{'nickname': 'dotoooo',
 'player_id': '9a8979c1-d78c-40ea-a0e8-76685761b25e',
 'player_stats': {'Assists': '2',
  'Deaths': '20',
  'Headshots': '7',
  'Headshots %': '50',
  'K/D Ratio': '0.7',
  'K/R Ratio': '0.48',
  'Kills': '14',
  'MVPs': '2',
  'Penta Kills': '0',
  'Quadro Kills': '0',
  'Result': '0',
  'Triple Kills': '1'}}

In [27]:
len(nicknames_df.loc[nicknames_df['Nickname'] == 'Lucky']["HLTV Rating"])

2

In [13]:
player_stats = []
for round_stat in round_stats:
    # Define round-specific attributes / features
    cs_map = round_stat['round_stats']['Map']
    region = round_stat['round_stats']['Region']
    rounds_played = int(round_stat['round_stats']['Rounds'])
    winner_team_id = round_stat['round_stats']['Winner']
    round_features = {
        "Map": cs_map,
        "Region": region,
        "Rounds": rounds_played
    }
    for team in round_stat['teams']:
        is_winner = winner_team_id == team['team_id']
        premade = team['premade']
        score = int(team['team_stats']['Final Score'])
        score_first_half = int(team['team_stats']['First Half Score'])
        score_second_half = int(team['team_stats']['Second Half Score'])
        score_overtime = int(team['team_stats']['Overtime score'])
        team_features = {
            "Winner": is_winner,
            "Premade": premade,
            "Score": score,
            "Score First Half": score_first_half,
            "Score Second Half": score_second_half,
            "Score Overtime": score_overtime
        }
        for player in team['players']:
            nickname = player['nickname']
            player_id = player['player_id']
            is_professional = nickname in nicknames_df["Nickname"].values
            hltv_rating = nicknames_df.loc[nicknames_df['Nickname'] == nickname]["HLTV Rating"].item() if is_professional else 0

            assists = int(player['player_stats']['Assists'])
            deaths = int(player['player_stats']['Deaths'])
            headshots = int(player['player_stats']['Headshots'])
            headshot_ratio = float(player['player_stats']['Headshots %']) / 100
            kd_ratio = float(player['player_stats']['K/D Ratio'])
            kr_ratio = float(player['player_stats']['K/R Ratio'])
            kills = int(player['player_stats']['Kills'])
            mvps = int(player['player_stats']['MVPs'])
            aces = int(player['player_stats']['Penta Kills'])
            quad = int(player['player_stats']['Quadro Kills'])
            triple = int(player['player_stats']['Triple Kills'])

            player_features = {
                "Nickname": nickname,
                "Player ID": player_id,
                "Professional": is_professional,
                "HLTV Rating": hltv_rating,
                "Assists": assists,
                "Deaths": deaths,
                "Headshots": headshots,
                "Headshot Ratio": headshot_ratio,
                "K/D Ratio": kd_ratio,
                "K/R Ratio": kr_ratio,
                "Kills": kills,
                "MVPs": mvps,
                "Ace": aces,
                "Quad Kills": quad,
                "Triple Kills": triple
            }

            player_stats.append(dict(
                **round_features,
                **team_features,
                **player_features
            ))
df = pd.DataFrame(player_stats)
df.to_feather("../data/player_match_statistics.feather")
df.head()

Lucky
can only convert an array of size 1 to a Python scalar
Lucky
can only convert an array of size 1 to a Python scalar
Lucky
can only convert an array of size 1 to a Python scalar
Lucky
can only convert an array of size 1 to a Python scalar
Lucky
can only convert an array of size 1 to a Python scalar
Lucky
can only convert an array of size 1 to a Python scalar
Lucky
can only convert an array of size 1 to a Python scalar
Lucky
can only convert an array of size 1 to a Python scalar
Lucky
can only convert an array of size 1 to a Python scalar
Lucky
can only convert an array of size 1 to a Python scalar
Lucky
can only convert an array of size 1 to a Python scalar
Lucky
can only convert an array of size 1 to a Python scalar
Lucky
can only convert an array of size 1 to a Python scalar
Lucky
can only convert an array of size 1 to a Python scalar
Lucky
can only convert an array of size 1 to a Python scalar
Lucky
can only convert an array of size 1 to a Python scalar
Lucky
can only convert a

Unnamed: 0,Map,Region,Rounds,Winner,Premade,Score,Score First Half,Score Second Half,Score Overtime,Nickname,...,Deaths,Headshots,Headshot Ratio,K/D Ratio,K/R Ratio,Kills,MVPs,Ace,Quad Kills,Triple Kills
0,de_mirage,EU,29,False,False,13,9,4,0,dotoooo,...,20,7,0.5,0.7,0.48,14,2,0,0,1
1,de_mirage,EU,29,False,False,13,9,4,0,s1mple,...,22,13,0.39,1.5,1.14,33,4,0,1,4
2,de_mirage,EU,29,False,False,13,9,4,0,Fessor,...,24,14,0.74,0.79,0.66,19,5,0,0,0
3,de_mirage,EU,29,False,False,13,9,4,0,rallen,...,20,4,0.4,0.5,0.34,10,1,0,0,0
4,de_mirage,EU,29,False,False,13,9,4,0,flameZ-,...,21,9,0.64,0.67,0.48,14,1,1,0,0
