# Anwendung des besten Modells

In diesem Notebook wird das beste Modell aus den vorherigen Notebook auf den Datensatz angewandt. Es werden alle Spiele der professionellen Spieler herausgefiltert.



## Erneutes Training des Perceptrons

Doch zuerst wird das Perceptron erneut mit den besten Parametern trainiert. Da der Ablauf gleich ist wird hier der gleiche Code verwendet.

In [1]:
import pandas as pd
import numpy as np

import sklearn.preprocessing
import sklearn.metrics
from sklearn.linear_model import Perceptron
from sklearn_pandas import DataFrameMapper

In [2]:
df = pd.read_feather("../data/2-player_match_statistics_cleaned.feather")
df

Unnamed: 0,Map,Region,Rounds,Winner,Premade,Score,Score First Half,Score Second Half,Score Overtime,Nickname,...,Deaths,Headshots,Headshot Ratio,K/D Ratio,K/R Ratio,Kills,MVPs,Ace,Quad Kills,Triple Kills
0,de_dust2,EU,10,True,False,9,7,2,0,ForceHunterX,...,2,10,0.62,8.00,1.60,16,4,0,1,0
1,de_dust2,EU,10,True,False,9,7,2,0,R_SaitamaTV,...,4,1,0.20,1.25,0.50,5,1,0,0,1
2,de_dust2,EU,10,True,False,9,7,2,0,kubinatorLSD,...,2,4,0.40,5.00,1.00,10,3,0,0,1
3,de_dust2,EU,10,True,False,9,7,2,0,_mhN,...,5,6,0.75,1.60,0.80,8,1,0,0,0
4,de_dust2,EU,10,True,False,9,7,2,0,RUcKeTa,...,3,4,0.40,3.33,1.00,10,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
233642,de_mirage,US,25,True,False,16,8,8,0,csniise,...,12,13,0.57,1.92,0.92,23,3,0,0,2
233643,de_mirage,US,25,True,False,16,8,8,0,freako,...,18,7,0.41,0.94,0.68,17,3,0,0,1
233644,de_mirage,US,25,True,False,16,8,8,0,1saMster,...,14,11,0.46,1.71,0.96,24,3,0,1,0
233645,de_mirage,US,25,True,False,16,8,8,0,shike,...,15,10,0.43,1.53,0.92,23,5,0,1,1


In [3]:
col_names = ['Map', 'Region', 'Rounds', 'Score', 'Score First Half', 'Score Second Half', 'Score Overtime', 'Assists', 'Deaths', 'Headshots', 'Kills', 'MVPs', 'Ace', 'Quad Kills', 'Triple Kills', 'Winner', 'Premade', 'Professional', 'Headshot Ratio', 'K/D Ratio', 'K/R Ratio']
# Mapper Object which maps the Features to either Label-Encoder or Standard-Scaler
mapper = DataFrameMapper([
    ('Map', sklearn.preprocessing.LabelEncoder()), # Different Mapping for same function since the two Feature don't share the same categories
    ('Region', sklearn.preprocessing.LabelEncoder()),
    (['Rounds', 'Score', 'Score First Half', 'Score Second Half', 'Score Overtime', 'Assists', 'Deaths', 'Headshots', 'Kills', 'MVPs', 'Ace', 'Quad Kills', 'Triple Kills'], sklearn.preprocessing.StandardScaler()), # Standardize over all numerical Features
    (['Winner', 'Premade', 'Professional'], None), # Already 0 or 1
    (['Headshot Ratio', 'K/D Ratio', 'K/R Ratio'], None), # Already between 0 and 1 since all are ratios
], df_out=True)
df = mapper.fit_transform(df.copy()) # Copy is needed to avoid data loss through missused CBR (Call-By-Reference)
df = df.rename(columns={k:v for k, v in zip(df.columns, col_names)}) # Since the mapper renames some columns, rename them back again
df

Unnamed: 0,Map,Region,Rounds,Score,Score First Half,Score Second Half,Score Overtime,Assists,Deaths,Headshots,...,MVPs,Ace,Quad Kills,Triple Kills,Winner,Premade,Professional,Headshot Ratio,K/D Ratio,K/R Ratio
0,1,0,-3.588426,-1.005840,-0.176514,-1.237520,-0.269394,-1.637944,-3.698886,0.245655,...,0.748333,-0.150172,1.692307,-0.945769,True,False,False,0.62,8.00,1.60
1,1,0,-3.588426,-1.005840,-0.176514,-1.237520,-0.269394,-0.638139,-3.252398,-1.895589,...,-0.916309,-0.150172,-0.441106,-0.012668,True,False,False,0.20,1.25,0.50
2,1,0,-3.588426,-1.005840,-0.176514,-1.237520,-0.269394,-0.638139,-3.698886,-1.181841,...,0.193452,-0.150172,-0.441106,-0.012668,True,False,False,0.40,5.00,1.00
3,1,0,-3.588426,-1.005840,-0.176514,-1.237520,-0.269394,-1.138042,-3.029154,-0.706009,...,-0.916309,-0.150172,-0.441106,-0.945769,True,False,False,0.75,1.60,0.80
4,1,0,-3.588426,-1.005840,-0.176514,-1.237520,-0.269394,0.861567,-3.475642,-1.181841,...,-1.471190,-0.150172,-0.441106,-0.945769,True,False,False,0.40,3.33,1.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
233642,3,1,-0.313897,0.659924,0.176247,0.914446,-0.269394,-0.638139,-1.466447,0.959403,...,0.193452,-0.150172,-0.441106,0.920433,True,False,False,0.57,1.92,0.92
233643,3,1,-0.313897,0.659924,0.176247,0.914446,-0.269394,0.861567,-0.126983,-0.468093,...,0.193452,-0.150172,-0.441106,-0.012668,True,False,False,0.41,0.94,0.68
233644,3,1,-0.313897,0.659924,0.176247,0.914446,-0.269394,1.861372,-1.019959,0.483571,...,0.193452,-0.150172,1.692307,-0.945769,True,False,False,0.46,1.71,0.96
233645,3,1,-0.313897,0.659924,0.176247,0.914446,-0.269394,-0.138237,-0.796715,0.245655,...,1.303213,-0.150172,1.692307,-0.012668,True,False,False,0.43,1.53,0.92


In [4]:
# List of used features & specifying the target feature
features = ["Region", "Map", "K/R Ratio", "K/D Ratio", "Premade", "Kills", "MVPs", "Headshots", "Triple Kills"]
target = "Professional"

split_at = int(df.shape[0] * 0.9)
X = df[features].to_numpy().astype(np.float64)
y = df[target].to_numpy().astype(np.float64)
X_train = X[:split_at]
X_test = X[split_at:]
y_train = y[:split_at]
y_test = y[split_at:]

assert len(features) == X_train.shape[1], "Missconfiguration: Length of features don't match!"
train_len = X_train.shape[0]
test_len = X_test.shape[0]
split_ratio = train_len / (train_len + test_len)

print(f" - Number of Features: {len(features)}\n - Length of Trainset: {train_len} ({split_ratio:.1%})\n - Length of Testset: {test_len} ({(1 - split_ratio):.1%})")

 - Number of Features: 9
 - Length of Trainset: 210282 (90.0%)
 - Length of Testset: 23365 (10.0%)


In [5]:
# Hyperparameter from 3. Notebook
alpha = 6.56395668385533e-05
l1_ratio = 0.728476488797924
eta0 = 0.010257051752370759

clf = Perceptron(penalty="elasticnet", alpha=alpha, l1_ratio=l1_ratio, eta0=eta0, n_jobs=-1, class_weight='balanced').fit(X_train, y_train)

## Finden von möglichen professionellen Spielern

Nachdem das Perceptron trainiert ist kann es nun genutzt werden um professionelle Spieler zu erkennen. Dazu werden zuerst die professionellen Spieler aus dem Datensatz entfernt und für den Rest sowohl die `Decision`-Werte als auch die tatsächliche Vorhersage berechnet. Darauf wird dem originalem Datensatz, wieder ohne professionelle Spieler, diese beiden Werte hinzugefügt. Schließlich werden die Statistiken aller Spieler gemittelt, Spieler welche weniger als 5 mal auftreten werden entfernt und schlussendlich die besten 10 Spieler ausgegeben. Das Ranking der Spieler wird über den Anteil an "als professionell klassifiziert" und darauf über den gemittelten `Decsion`-Wert bestimmt. Diese 10 besten Spieler sind:

1. Shlyaperson
2. -Roze1337
3. mazdaaa-
4. BARTK-
5. Korry
6. f-jaguljnjak
7. Skase-
8. wast3d
9. evv3n
10. -Kebab_

Glückwunsch an diese Spieler!

In [6]:
# Load X for all non-professional player
df = df[df["Professional"] == False].reset_index(drop=True)
X = df[features].to_numpy().astype(np.float64)
probs = clf.decision_function(X) # Get the Probabilities/Logits
preds = clf.predict(X) # Get the Predictions

In [7]:
df = pd.read_feather("../data/2-player_match_statistics_cleaned.feather") # Before the df was still numeric transformed. To also have access to Nicknames and the real values read the DataFrame again
df = df[df["Professional"] == False].reset_index(drop=True) # Remove professional players
df.pop("Professional") # Remove professional column - its unnecessary now since no player is professional
df["Professional Probability"] = probs # Add Probabilites
df["Professional Predicted"] = preds # Add Predictions
df

Unnamed: 0,Map,Region,Rounds,Winner,Premade,Score,Score First Half,Score Second Half,Score Overtime,Nickname,...,Headshot Ratio,K/D Ratio,K/R Ratio,Kills,MVPs,Ace,Quad Kills,Triple Kills,Professional Probability,Professional Predicted
0,de_dust2,EU,10,True,False,9,7,2,0,ForceHunterX,...,0.62,8.00,1.60,16,4,0,1,0,0.056959,1.0
1,de_dust2,EU,10,True,False,9,7,2,0,R_SaitamaTV,...,0.20,1.25,0.50,5,1,0,0,1,-0.011886,0.0
2,de_dust2,EU,10,True,False,9,7,2,0,kubinatorLSD,...,0.40,5.00,1.00,10,3,0,0,1,0.029346,1.0
3,de_dust2,EU,10,True,False,9,7,2,0,_mhN,...,0.75,1.60,0.80,8,1,0,0,0,-0.009709,0.0
4,de_dust2,EU,10,True,False,9,7,2,0,RUcKeTa,...,0.40,3.33,1.00,10,0,0,0,0,-0.007901,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
142515,de_mirage,US,25,True,False,16,8,8,0,csniise,...,0.57,1.92,0.92,23,3,0,0,2,0.005973,1.0
142516,de_mirage,US,25,True,False,16,8,8,0,freako,...,0.41,0.94,0.68,17,3,0,0,1,-0.000123,0.0
142517,de_mirage,US,25,True,False,16,8,8,0,1saMster,...,0.46,1.71,0.96,24,3,0,1,0,0.004667,1.0
142518,de_mirage,US,25,True,False,16,8,8,0,shike,...,0.43,1.53,0.92,23,5,0,1,1,0.021453,1.0


In [14]:
# Drop Players which occur less than 5 times
grouped_df = df.groupby("Nickname", sort=False).filter(lambda x: len(x) >= 5)
# Mean Values by Nickname
grouped_df = grouped_df.groupby("Nickname", sort=False).mean()
# Get the best 10 most likely professional players and their stats 
grouped_df.sort_values(by=['Professional Predicted', 'Professional Probability'], ascending=[False, False]).head(10)

Unnamed: 0_level_0,Rounds,Winner,Premade,Score,Score First Half,Score Second Half,Score Overtime,Assists,Deaths,Headshots,Headshot Ratio,K/D Ratio,K/R Ratio,Kills,MVPs,Ace,Quad Kills,Triple Kills,Professional Probability,Professional Predicted
Nickname,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
donk666,23.8,0.7,0.0,13.6,7.5,6.1,0.0,3.6,15.8,15.8,0.696,1.701,0.969,22.8,4.6,0.0,0.4,2.4,0.02315,1.0
zont1x,28.7,0.6,0.2,15.3,7.6,6.9,0.8,3.3,19.4,12.3,0.458,1.539,0.953,27.0,4.4,0.0,0.4,1.3,0.020351,1.0
-LeO-,27.928571,0.5,0.0,12.857143,6.142857,5.642857,1.071429,3.214286,22.0,10.785714,0.497143,1.034286,0.797143,22.428571,4.428571,0.0,0.357143,1.928571,0.017468,1.0
Newzeha,27.181818,0.272727,0.0,12.636364,7.181818,5.454545,0.0,3.272727,19.727273,12.454545,0.574545,1.130909,0.795455,21.636364,4.181818,0.0,0.0,1.272727,0.01586,1.0
SBolt-,27.25,0.6875,0.0,15.375,8.875,6.4375,0.0625,4.25,17.5,9.0625,0.388125,1.44125,0.86,23.1875,3.9375,0.0,0.3125,1.4375,0.015603,1.0
Leftie,30.5,0.5,0.0,15.3,7.9,5.6,1.8,4.6,21.0,9.8,0.452,1.089,0.715,21.8,3.9,0.0,0.2,1.0,0.013076,1.0
volt-,24.9,0.2,0.4,9.8,5.8,3.9,0.1,3.4,19.7,11.8,0.677,0.963,0.722,18.4,3.7,0.0,0.4,1.0,0.010501,1.0
Rf-_-,27.866667,0.466667,0.4,14.4,5.4,9.0,0.0,3.933333,20.733333,13.066667,0.564,1.126,0.829333,23.066667,3.466667,0.0,1.066667,2.0,0.009426,1.0
X1gN,25.0,0.0,1.0,9.0,6.0,3.0,0.0,2.5,20.5,7.5,0.505,0.74,0.6,15.0,3.0,0.0,0.0,2.0,0.002847,1.0
KusMe,27.413793,0.62069,0.0,14.310345,8.172414,5.724138,0.413793,2.862069,17.482759,10.689655,0.458966,1.383103,0.833793,23.103448,4.275862,0.0,0.172414,1.655172,0.01827,0.931034
