In [1]:
import json
import os
import time
from datetime import datetime
from dateutil.relativedelta import relativedelta
from dataclasses import dataclass, field
from typing import List

import numpy as np
import pandas as pd

import requests
from dotenv import load_dotenv
from tqdm import tqdm

load_dotenv()

True

In [2]:
headers = {"Authorization": f"Bearer {os.environ['FACEIT_API_KEY']}"}

## Query Daten von FACEIT

Die Statistiken aller `Matches` von einer Reihe an professionellen Spielern soll über die [FACEIT Data API](https://developers.faceit.com/docs/tools/data-api) erfasst werden.

Eine Liste mit Nicknamen von professionellen Spielern wurde bezogen von [CSGO Buff](https://csgobuff.pro/player/?page=5&country=all). Diese wurden Manuell in die Datei `data/professional-player-list.txt` eingetragen. Insgesamt wurden die Nicknamen von 140 professionellen Spielern erfasst.

In [3]:
nicknames_of_professionals = [line.strip() for line in open("../data/professional-player-list.txt").readlines()]
# Anzahl an Spielern, Die Nicknamen der ersten 5 Spieler
len(nicknames_of_professionals), nicknames_of_professionals[:5]

(140, ['ScreaM', 'markeloff', 'GuardiaN', 'ceh9', 'flusha'])

Die FACEIT Spieler IDs von den 140 Spielern werden über die [FACEIT Data API](https://developers.faceit.com/docs/tools/data-api) abgerufen.

In [14]:
res = []
for nickname in tqdm(nicknames_of_professionals, "Getting player information"):
    r = requests.get('https://open.faceit.com/data/v4/players', params={'nickname': nickname}, headers=headers)
    res.append(r.json())
json.dump(res, open("../data/response-professional-players.json", "w"), sort_keys=True)
# Beispiel Response vom Server
res[0]

Getting player information: 100%|██████████| 140/140 [00:38<00:00,  3.59it/s]


AttributeError: 'str' object has no attribute 'write'

In [50]:
# Welche Nicknamen konnten nicht gefunden werden?
infos_of_professionals = json.load(open("../data/response-professional-players.json"))
unfound = []
for i, nickname in enumerate(nicknames_of_professionals):
    r = infos_of_professionals[i]
    try:
        r['player_id']
    except KeyError:
        # Player was not found
        unfound.append(nickname)
len(unfound)

Getting player information: 140it [00:00, 407779.56it/s]


41

41 Spieler konnten nicht über ihre Nicknamen gefunden werden. Dies kann verschiedene Gründe haben: Manche Spieler varieren die Schreibweise ihrer Nicknamen, so nennt sich der Spieler GuardiaN manchmal auch -GuardiaN. Ebenso sind manche Spieler bewusst unter anderem Nicknamen bei FACEIT registriert um nicht erkannt zu werden. Somit bleiben 99 professionelle Spieler für die kommende Analyse.

Die Informationen zu allen Spielen aus dem letzten Jahr von den übrigen 99 spielen werden abgerufen.

In [47]:
infos_of_professionals = json.load(open("../data/response-professional-players.json"))
today = datetime.today()
last_year = datetime.today() - relativedelta(years=1)
res = []
for player_info in tqdm(infos_of_professionals, "Getting player match-ids"):
    try:
        player_id = player_info['player_id']
    except KeyError:
        # Player was not found
        continue
    params = {
        'game': 'csgo',
        'from': last_year.timestamp(),
        'to': today.timestamp(),
        'limit': 100
    }
    r = requests.get(f'https://open.faceit.com/data/v4/players/{player_id}/history', headers=headers, params=params)
    res.append(r.json())
json.dump(res, open("../data/response-professional-player-match_infos.json", "w"), sort_keys=True)
res[:5]

Getting player match-ids: 100%|██████████| 140/140 [00:00<00:00, 284635.27it/s]


41

In [3]:
match_infos = json.load(open("../data/response-professional-player-match_infos.json"))
match_ids = [match_info['match_id'] for res_info in match_infos for match_info in res_info['items']]
len(match_ids)

7896

Es wurden 7896 Spiele gefunden. Zu diesen werden die Statistiken der 

In [43]:
match_infos = json.load(open("../data/response-professional-player-match_infos.json"))
res = []
match_ids = [match_info['match_id'] for res_info in match_infos for match_info in res_info['items']]
for match_id in tqdm(match_ids, "Getting match stats"):
    r = requests.get(f'https://open.faceit.com/data/v4/matches/{match_id}/stats', headers=headers)
    res.append(r.json())
json.dump(res, open("../data/response-professional-player-match_stats.json", "w"), sort_keys=True)
res[:5]

Getting match stats: 100%|██████████| 7896/7896 [28:11<00:00,  4.67it/s]  


[{'rounds': [{'best_of': '2',
    'competition_id': None,
    'game_id': 'csgo',
    'game_mode': '5v5',
    'match_id': '1-e9647001-0a77-4c7f-84dc-a5e42df6c223',
    'match_round': '1',
    'played': '1',
    'round_stats': {'Score': '16 / 9',
     'Region': 'EU',
     'Winner': 'a20b7fb8-b93f-4608-90af-4f1239576ea2',
     'Rounds': '25',
     'Map': 'de_train'},
    'teams': [{'team_id': 'a20b7fb8-b93f-4608-90af-4f1239576ea2',
      'premade': False,
      'team_stats': {'Team Win': '1',
       'Overtime score': '0',
       'First Half Score': '8',
       'Second Half Score': '8',
       'Team Headshots': '10',
       'Final Score': '16',
       'Team': 'team_Babalicious-'},
      'players': [{'player_id': '2a81b56b-1068-4891-b773-7f1c328a6003',
        'nickname': 'GrandeS',
        'player_stats': {'Triple Kills': '1',
         'Headshots %': '53',
         'Result': '1',
         'K/R Ratio': '0.68',
         'Headshots': '9',
         'K/D Ratio': '1.21',
         'Deaths': '14',

## JSON-Responses in DataFrames konvertieren



In [8]:
match_stats = json.load(open("../data/response-professional-player-match_stats.json"))
len(match_stats)

7896

`match_stats` ist eine Liste aus Dicts mit einem einzigen key: `rounds`.
Wie viele Runden kann ein Match haben? 

In [9]:
num_rounds_min = np.inf
num_rounds_max = 0
num_rounds_sum = 0
unfound_matches = []
for stat in match_stats:
    try:
        n = len(stat['rounds'])
    except KeyError:
        unfound_matches.append(stat)
        continue
    if n > num_rounds_max:
        num_rounds_max = n
    if n < num_rounds_min:
        num_rounds_min = n
    num_rounds_sum += n
num_rounds_mean = num_rounds_sum / len(match_stats)
num_rounds_min, num_rounds_max, num_rounds_sum, num_rounds_mean

(1, 4, 8312, 1.0526849037487336)

In [10]:
len(unfound_matches)

130

Jedes Match hat mindestens eine Runde und maximal vier. Insgesamt hat der Datensatz 8312 Runden, diese werden einzeln behandelt. 130 von den 7896 Matches konnten nicht gefunden werden und werden ignoriert.

In [11]:
round_stats = [round_stat for stat in match_stats if 'rounds' in stat.keys() for round_stat in stat['rounds']]
round_keys = {}
for stat in round_stats:
    for k, v in stat.items():
        if k not in round_keys and k != 'teams':
            round_keys[k]= v
round_keys

{'best_of': '2',
 'competition_id': None,
 'game_id': 'csgo',
 'game_mode': '5v5',
 'match_id': '1-e9647001-0a77-4c7f-84dc-a5e42df6c223',
 'match_round': '1',
 'played': '1',
 'round_stats': {'Map': 'de_train',
  'Region': 'EU',
  'Rounds': '25',
  'Score': '16 / 9',
  'Winner': 'a20b7fb8-b93f-4608-90af-4f1239576ea2'}}

Attribute von Interesse: `game_mode`, `round_stats`, `teams`. Entfernen aller nicht 5v5-Matches:

In [12]:
round_stats = [stat for stat in round_stats if stat['game_mode'] == "5v5"]
len(round_stats)

8079

Nach entfernen der Nicht-5v5 Matches bleiben 8079 Runden. Aus diesen können nun Features zu jedem Spieler gezogen werden.

In [13]:
team_keys = {}
for round_stat in round_stats:
    # Define round-specific attributes / features
    for team in round_stat['teams']:
        for k, v in team.items():
            if k not in team_keys and k != 'players':
                team_keys[k] = v
team_keys
        

{'premade': False,
 'team_id': 'a20b7fb8-b93f-4608-90af-4f1239576ea2',
 'team_stats': {'Final Score': '16',
  'First Half Score': '8',
  'Overtime score': '0',
  'Second Half Score': '8',
  'Team': 'team_Babalicious-',
  'Team Headshots': '10',
  'Team Win': '1'}}

In [14]:
player_keys = {}
for round_stat in round_stats:
    # Define round-specific attributes / features
    for team in round_stat['teams']:
        for player in team['players']:
            for k, v in player.items():
                if k not in player_keys:
                    player_keys[k] = v
player_keys

{'nickname': 'GrandeS',
 'player_id': '2a81b56b-1068-4891-b773-7f1c328a6003',
 'player_stats': {'Assists': '2',
  'Deaths': '14',
  'Headshots': '9',
  'Headshots %': '53',
  'K/D Ratio': '1.21',
  'K/R Ratio': '0.68',
  'Kills': '17',
  'MVPs': '4',
  'Penta Kills': '0',
  'Quadro Kills': '0',
  'Result': '1',
  'Triple Kills': '1'}}

In [15]:
player_stats = []
for round_stat in round_stats:
    # Define round-specific attributes / features
    cs_map = round_stat['round_stats']['Map']
    region = round_stat['round_stats']['Region']
    rounds_played = int(round_stat['round_stats']['Rounds'])
    winner_team_id = round_stat['round_stats']['Winner']
    round_features = {
        "Map": cs_map,
        "Region": region,
        "Rounds": rounds_played
    }
    for team in round_stat['teams']:
        is_winner = winner_team_id == team['team_id']
        premade = team['premade']
        score = int(team['team_stats']['Final Score'])
        score_first_half = int(team['team_stats']['First Half Score'])
        score_second_half = int(team['team_stats']['Second Half Score'])
        score_overtime = int(team['team_stats']['Overtime score'])
        team_features = {
            "Winner": is_winner,
            "Premade": premade,
            "Score": score,
            "Score First Half": score_first_half,
            "Score Second Half": score_second_half,
            "Score Overtime": score_overtime
        }
        for player in team['players']:
            nickname = player['nickname']
            player_id = player['player_id']
            is_professional = nickname in nicknames_of_professionals

            assists = int(player['player_stats']['Assists'])
            deaths = int(player['player_stats']['Deaths'])
            headshots = int(player['player_stats']['Headshots'])
            headshot_ratio = float(player['player_stats']['Headshots %']) / 100
            kd_ratio = float(player['player_stats']['K/D Ratio'])
            kr_ratio = float(player['player_stats']['K/R Ratio'])
            kills = int(player['player_stats']['Kills'])
            mvps = int(player['player_stats']['MVPs'])
            aces = int(player['player_stats']['Penta Kills'])
            quad = int(player['player_stats']['Quadro Kills'])
            triple = int(player['player_stats']['Triple Kills'])


            player_features = {
                "Nickname": nickname,
                "Player ID": player_id,
                "Professional": is_professional,
                "Assists": assists,
                "Deaths": deaths,
                "Headshots": headshots,
                "Headshot Ratio": headshot_ratio,
                "K/D Ratio": kd_ratio,
                "K/R Ratio": kr_ratio,
                "Kills": kills,
                "MVPs": mvps,
                "Ace": aces,
                "Quad Kills": quad,
                "Triple Kills": triple
            }

            player_stats.append(dict(
                **round_features,
                **team_features,
                **player_features
            ))
df = pd.DataFrame(player_stats)
df.to_feather("../data/player_match_statistics.feather")
df.head()

Unnamed: 0,Map,Region,Rounds,Winner,Premade,Score,Score First Half,Score Second Half,Score Overtime,Nickname,...,Deaths,Headshots,Headshot Ratio,K/D Ratio,K/R Ratio,Kills,MVPs,Ace,Quad Kills,Triple Kills
0,de_train,EU,25,True,False,16,8,8,0,GrandeS,...,14,9,0.53,1.21,0.68,17,4,0,0,1
1,de_train,EU,25,True,False,16,8,8,0,Babalicious-,...,18,16,0.62,1.44,1.04,26,2,0,1,2
2,de_train,EU,25,True,False,16,8,8,0,Celebrations,...,14,4,0.27,1.07,0.6,15,2,0,0,1
3,de_train,EU,25,True,False,16,8,8,0,gabex0,...,14,13,0.43,2.14,1.2,30,6,0,1,1
4,de_train,EU,25,True,False,16,8,8,0,Ekuuu,...,18,8,0.36,1.22,0.88,22,2,0,0,1
