In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import RobustScaler, MinMaxScaler
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split

# Parameters

In [None]:
TRAIN_TEST_SPLIT = 0.7
VALIDATION_SPLIT = 0.2
EPOCHS = 128
BATCH_SIZE = 64
DENSE1=64

optimizer = tf.keras.optimizers.Adam()
loss = tf.keras.losses.SparseCategoricalCrossentropy()
#scaler = RobustScaler()
scaler = MinMaxScaler()

# Data

In [None]:
data = pd.read_csv('../input/league-of-legends-diamond-ranked-games-10-min/high_diamond_ranked_10min.csv')
data.head()

In [None]:
data.shape

In [None]:
data.drop(['gameId','redKills','redDeaths',
           'blueGoldDiff','redGoldDiff','blueExperienceDiff','redExperienceDiff',
           'blueCSPerMin','redCSPerMin','blueAssists','redAssists',
           'blueWardsPlaced','redWardsPlaced','blueWardsDestroyed','redWardsDestroyed',
           'redFirstBlood','blueAvgLevel','redAvgLevel','blueHeralds','redHeralds','blueTotalJungleMinionsKilled',
          'redTotalJungleMinionsKilled','blueTowersDestroyed','redTowersDestroyed'], 
          axis=1, inplace=True)

In [None]:
# Shuffle
data= data.sample(frac=1).reset_index(drop=True)

In [None]:
# Extract label
y = data['blueWins']

In [None]:
X = data.drop('blueWins', axis=1, inplace=False)
#scaler = RobustScaler()
#X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

In [None]:
X.head()

In [None]:
y.head()

In [None]:
sns.heatmap(X.corr())

# Dimensionality Reduction

In [None]:
pca = PCA(n_components=3)
pca.fit(X)

plt.figure(figsize=(10,10))
plt.hist(pca.explained_variance_ratio_)
plt.show()

X_PCA = pd.DataFrame(pca.transform(X), columns=['PC1', 'PC2', 'PC3'])

In [None]:
X=pd.DataFrame(scaler.fit_transform(X_PCA), columns=X_PCA.columns)

plt.figure(figsize=(14,10))
plt.scatter(X['PC1'], X['PC2'])
plt.show()
plt.figure(figsize=(14,10))
plt.scatter(X['PC1'], X['PC3'])
plt.show()

In [None]:
num_examples = X.shape[0]
num_train_examples = int(np.floor(num_examples*TRAIN_TEST_SPLIT))
num_test_examples = int(np.ceil(num_examples-num_train_examples))

print(f"samples: {num_examples}  train: {num_train_examples} test: {num_test_examples}")

# Model

In [None]:
inputs = tf.keras.Input(shape=[X.shape[1],])
x = tf.keras.layers.Dense(DENSE1, activation=tf.nn.sigmoid)(inputs)
# x = tf.keras.layers.Dense(8, activation=tf.nn.relu)(x)

outputs = tf.keras.layers.Dense(2, activation=tf.nn.softmax)(x)

model = tf.keras.Model(
    inputs = inputs,
    outputs = outputs
)
model.summary()

In [None]:
result=model.compile(
    optimizer = optimizer, 
    loss = loss,
    metrics = ['accuracy']
)

In [None]:
# This validates our choice of 'accuracy' metric if close to 0.5
y.sum()/y.shape[0]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1-TRAIN_TEST_SPLIT)

In [None]:
X_train.head()

# Training

In [None]:
result = model.fit(x=X_train, 
          y=y_train, 
          batch_size=BATCH_SIZE,
          epochs=EPOCHS,
          verbose=1,
          validation_split=VALIDATION_SPLIT,
          shuffle=True)

In [None]:
plt.figure(figsize=(10,10))
plt.plot(range(EPOCHS), result.history['loss'], color='b')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.show()

plt.figure(figsize=(10,10))
plt.plot(range(EPOCHS), result.history['val_accuracy'], color='r')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.show()
np.argmax(result.history['val_accuracy'])

# Results

In [None]:
loss, accuracy = model.evaluate(x=X_test, y=y_test)

# END placeholder