In [10]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

## Erstelle aus `lol.csv` einen DataFrame

In [3]:
df = pd.read_csv('data/lol.csv')
df.head()

Unnamed: 0,gameId,blueWins,blueWardsPlaced,blueWardsDestroyed,blueFirstBlood,blueKills,blueDeaths,blueAssists,blueEliteMonsters,blueDragons,...,redTowersDestroyed,redTotalGold,redAvgLevel,redTotalExperience,redTotalMinionsKilled,redTotalJungleMinionsKilled,redGoldDiff,redExperienceDiff,redCSPerMin,redGoldPerMin
0,4519157822,0,28,2,1,9,6,11,0,0,...,0,16567,6.8,17047,197,55,-643,8,19.7,1656.7
1,4523371949,0,12,1,0,5,5,5,0,0,...,1,17620,6.8,17438,240,52,2908,1173,24.0,1762.0
2,4521474530,0,15,0,0,7,11,4,1,1,...,0,17285,6.8,17254,203,28,1172,1033,20.3,1728.5
3,4524384067,0,43,1,0,4,5,5,1,0,...,0,16478,7.0,17961,235,47,1321,7,23.5,1647.8
4,4436033771,0,75,4,0,6,6,6,0,0,...,0,17404,7.0,18313,225,67,1004,-230,22.5,1740.4


## Data Cleaning

Die Korrelationskoeffizienten sind *interessant*

In [5]:
df.corr()

Unnamed: 0,gameId,blueWins,blueWardsPlaced,blueWardsDestroyed,blueFirstBlood,blueKills,blueDeaths,blueAssists,blueEliteMonsters,blueDragons,...,redTowersDestroyed,redTotalGold,redAvgLevel,redTotalExperience,redTotalMinionsKilled,redTotalJungleMinionsKilled,redGoldDiff,redExperienceDiff,redCSPerMin,redGoldPerMin
gameId,1.0,0.000985,0.005361,-0.012057,-0.011577,-0.038993,-0.01316,-0.023329,0.016599,0.008962,...,0.003557,-0.010622,-0.012419,-0.021187,-0.005118,0.00604,0.01467,0.012315,-0.005118,-0.010622
blueWins,0.000985,1.0,8.7e-05,0.044247,0.201769,0.337358,-0.339297,0.276685,0.221944,0.213768,...,-0.103696,-0.411396,-0.352127,-0.387588,-0.212171,-0.110994,-0.511119,-0.489558,-0.212171,-0.411396
blueWardsPlaced,0.005361,8.7e-05,1.0,0.034447,0.003228,0.018138,-0.002612,0.033217,0.019892,0.017676,...,-0.008225,-0.005685,-0.008882,-0.013,-0.012395,0.001224,-0.0158,-0.027943,-0.012395,-0.005685
blueWardsDestroyed,-0.012057,0.044247,0.034447,1.0,0.017717,0.033748,-0.073182,0.067793,0.0417,0.040504,...,-0.023943,-0.067467,-0.05909,-0.057314,0.040023,-0.035732,-0.078585,-0.077946,0.040023,-0.067467
blueFirstBlood,-0.011577,0.201769,0.003228,0.017717,1.0,0.269425,-0.247929,0.229485,0.151603,0.134309,...,-0.069584,-0.301479,-0.182602,-0.19492,-0.156711,-0.024559,-0.378511,-0.240665,-0.156711,-0.301479
blueKills,-0.038993,0.337358,0.018138,0.033748,0.269425,1.0,0.004044,0.813667,0.17854,0.170436,...,-0.082491,-0.161127,-0.412219,-0.462333,-0.472203,-0.214454,-0.654148,-0.58373,-0.472203,-0.161127
blueDeaths,-0.01316,-0.339297,-0.002612,-0.073182,-0.247929,0.004044,1.0,-0.026372,-0.204764,-0.188852,...,0.15678,0.885728,0.433383,0.464584,-0.040521,-0.100271,0.64,0.577613,-0.040521,0.885728
blueAssists,-0.023329,0.276685,0.033217,0.067793,0.229485,0.813667,-0.026372,1.0,0.149043,0.170873,...,-0.06088,-0.133948,-0.356928,-0.396652,-0.337515,-0.160915,-0.549761,-0.437002,-0.337515,-0.133948
blueEliteMonsters,0.016599,0.221944,0.019892,0.0417,0.151603,0.17854,-0.204764,0.149043,1.0,0.781039,...,-0.052029,-0.216616,-0.169649,-0.189816,-0.074838,-0.087893,-0.281464,-0.263991,-0.074838,-0.216616
blueDragons,0.008962,0.213768,0.017676,0.040504,0.134309,0.170436,-0.188852,0.170873,0.781039,1.0,...,-0.032865,-0.192871,-0.149806,-0.159485,-0.059803,-0.098446,-0.233875,-0.211496,-0.059803,-0.192871


## Training
* 20% Testdaten
* `random_state=42` sofern unterstützt
* target feature: `blueWins`
* Ziel: score von 0.72

In [9]:
X = df.drop(columns='blueWins')
y = df.blueWins

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

ann = Sequential([
    layers.Dense(units=4, activation='relu', dtype='float64'),
    layers.Dense(units=4, activation='relu', dtype='float64'),
    layers.Dense(units=1, activation='sigmoid', dtype='float64'),  # LogisticRegression
])

ann.compile(optimizer='adam', loss='binary_crossentropy', metrics='accuracy')
history = ann.fit(X_train,
                  y_train,
                  batch_size=16,
                  validation_data=(X_test, y_test),
                  epochs=100,
                  verbose=1)

In [11]:
forest = RandomForestClassifier()
forest.fit(X_train, y_train)
forest.score(X_test, y_test)

0.7139001349527665

In [12]:
import numpy as np
from sklearn.metrics import mean_squared_error

predictions = forest.predict(X_test)
np.sqrt(mean_squared_error(y_test, predictions))

0.5348830386610081