In [19]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

from keras.layers import *
from keras.models import Model
from keras.callbacks import *
from keras import regularizers
import keras

import pandas as pd

In [3]:
ROLES = [
    "Kerrigan",
    "Scientist",
    "Dark Templar",
    "Ascendant",
    "Spirit",
    "Ares",
    "Prophet",
    "Stukov",
    "Artanis",
    "Zagara",
    "Engineer",
    "Team Nova",
    "Nomad",
    "Dehaka",
    "Helios",
    "Random",
    "Thakras",
    "Swann",
    "Warden",
    "Selendis",
    "Niadra",
    "Mira",
    "Scion"
]
KERRI_ROLES = [0, 9, 13, 16, 20]
SUPPORT_ROLES = [2, 6, 11, 18, 22]
MISSING_SURV_MMR = 1000
MISSING_KERRI_MMR = 1000
SURV_TEAM_MAX = 8
KERRI_TEAM_MAX = 2
MAX_PLAYERS = 10

In [28]:
class Game:
    def __init__(self, time, roles, mmrs, win):
        self._time = time
        self._roles = roles
        self._mmrs = [mmr * 20 + 600 for mmr in mmrs]
        self._win = win
    
    def time(self):
        return self._time
        
    def roles(self):
        return self._roles
    
    def mmrs(self):
        return self._mmrs
    
    def num_players(self):
        return len(self.mmrs())
    
    def win(self):
        return self._win
    
    def surv_mmrs(self):
        mmrs = [self.mmrs()[i] for i in range(self.num_players()) if self.roles()[i] not in KERRI_ROLES]
        #return mmrs + [np.mean(mmrs)] * (SURV_TEAM_MAX - len(mmrs))
        return mmrs + [MISSING_SURV_MMR] * (SURV_TEAM_MAX - len(mmrs))
    
    def kerri_mmrs(self):
        mmrs = [self.mmrs()[i] for i in range(self.num_players()) if self.roles()[i] in KERRI_ROLES]
        return mmrs + [np.mean(mmrs)] * (KERRI_TEAM_MAX - len(mmrs))
        #return mmrs + [MISSING_KERRI_MMR] * (KERRI_TEAM_MAX - len(mmrs))
        
    def avg_surv_mmrs(self):
        return [np.mean(self.surv_mmrs())]
    
    def avg_kerri_mmrs(self):
        return [np.mean(self.kerri_mmrs())]
        
    def other_surv_features(self):
        return [len(self.surv_mmrs()), self.num_supports()]
    
    def num_supports(self):
        return sum([1 for role in self.roles() if role in SUPPORT_ROLES])
    
    def other_kerri_features(self):
        return [len(self.kerri_mmrs())]
    
    def histogram(self, values):
        return np.histogram(values, bins=10, range=(1000, 2400))[0]
    
    def _player_string(self, i):
        return '[' + ROLES[self.roles()[i]] + ', ' + str(self.mmrs()[i]) + ']'
    
    def csv_str(self):
        combined = ''
        for player in range(len(self.roles())):
            combined += ROLES[self.roles()[player]] + ',' + str(self.mmrs()[player]) + ','
        return combined
    
    def __str__(self):
        combined = ''
        for player in range(len(self.roles())):
            combined += self._player_string(player)
        combined += '|' + 'Survivor' if self.win() == 0 else 'Kerrigan'
        return combined
    
def parse_legacy(line):
    time = int(line[:4])
    roles = []
    mmrs = []
    line = line[4:]
    while len(line) > 2:
        roles.append(int(line[:2]))
        mmrs.append(int(line[2:4]))
        line = line[4:]
    win = int(line)
    return Game(time, roles, mmrs, win)
    
    
with open('balancebank132020.txt', 'r') as f:
    lines = f.readlines()

games = [parse_legacy(line[1:].strip()) for line in lines]

In [48]:
sorted_surv_mmrs = []
sorted_kerri_mmrs = []
other_surv_features = []
other_kerri_features = []
labels = []
for game in games:
    sorted_surv_mmrs.append(game.avg_surv_mmrs())
    sorted_kerri_mmrs.append(game.avg_kerri_mmrs())
    #sorted_surv_mmrs.append(sorted(game.surv_mmrs(), reverse=True))
    #sorted_kerri_mmrs.append(sorted(game.kerri_mmrs(), reverse=True))
    #sorted_surv_mmrs.append(game.histogram(game.surv_mmrs()))
    #sorted_kerri_mmrs.append(game.histogram(game.kerri_mmrs()))
    other_surv_features.append(game.other_surv_features())
    other_kerri_features.append(game.other_kerri_features())
    labels.append(game.win())
    
sorted_surv_mmrs = np.array(sorted_surv_mmrs)
sorted_kerri_mmrs = np.array(sorted_kerri_mmrs)

In [49]:
surv_scaler = StandardScaler().fit(np.array(sorted_surv_mmrs).flatten().reshape((-1, 1)))
kerri_scaler = StandardScaler().fit(np.array(sorted_kerri_mmrs).flatten().reshape((-1, 1)))
#surv_power_scaler = StandardScaler().fit(np.power(sorted_surv_mmrs, 2).flatten().reshape((-1, 1)))
#kerri_power_scaler = StandardScaler().fit(np.power(sorted_kerri_mmrs, 2).flatten().reshape((-1, 1)))

In [50]:
#scaled_surv_mmrs = surv_scaler.transform(sorted_surv_mmrs.flatten().reshape((-1, 1))).reshape((-1, SURV_TEAM_MAX))
#scaled_kerri_mmrs = kerri_scaler.transform(sorted_kerri_mmrs.flatten().reshape((-1, 1))).reshape((-1, KERRI_TEAM_MAX))
scaled_surv_mmrs = surv_scaler.transform(sorted_surv_mmrs.flatten().reshape((-1, 1))).reshape((-1, 1))
scaled_kerri_mmrs = kerri_scaler.transform(sorted_kerri_mmrs.flatten().reshape((-1, 1))).reshape((-1, 1))

#scaled_power_surv_mmrs = surv_power_scaler.transform(np.power(sorted_surv_mmrs, 2).reshape((-1, 1))).reshape((-1, SURV_TEAM_MAX))
#scaled_power_kerri_mmrs = kerri_power_scaler.transform(np.power(sorted_kerri_mmrs, 2).reshape((-1, 1))).reshape((-1, KERRI_TEAM_MAX))

In [51]:
#input_surv_mmrs = np.hstack([scaled_surv_mmrs, scaled_power_surv_mmrs])
#input_kerri_mmrs = np.hstack([scaled_kerri_mmrs, scaled_power_kerri_mmrs])
other_surv_scaled = StandardScaler().fit_transform(other_surv_features)
other_kerri_scaled = StandardScaler().fit_transform(other_kerri_features)

input_surv_mmrs = scaled_surv_mmrs
input_kerri_mmrs = scaled_kerri_mmrs
#input_surv_mmrs = np.hstack([scaled_surv_mmrs, other_surv_scaled])
#input_kerri_mmrs = np.hstack([scaled_kerri_mmrs, other_kerri_scaled])

In [57]:

# This returns a tensor
surv_team_input = Input(shape=(input_surv_mmrs.shape[1],))
kerri_team_input = Input(shape=(input_kerri_mmrs.shape[1],))

# a layer instance is callable on a tensor, and returns a tensor
surv_team_strength = Dense(1, activation='linear', 
              kernel_regularizer=keras.regularizers.l2(0.01), use_bias=False)(surv_team_input)
kerri_team_strength = Dense(1, activation='linear', 
              kernel_regularizer=keras.regularizers.l2(0.01), use_bias=False)(kerri_team_input)
output = Dense(1, activation='sigmoid', kernel_constraint=keras.constraints.NonNeg())(Subtract()([kerri_team_strength, surv_team_strength]))

# This creates a model that includes
# the Input layer and three Dense layers
model = Model(inputs=[surv_team_input, kerri_team_input], outputs=output)
model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['accuracy'])

callbacks = [
    EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10),
    ModelCheckpoint('best_model.h5', monitor='val_loss', mode='min', verbose=1, save_best_only=True)
]
model.fit(x=[input_surv_mmrs, input_kerri_mmrs], 
          y=labels,
          epochs=100,
          shuffle=True,
          callbacks=callbacks,
          validation_split=0.15,
          batch_size=64
          )  # starts training

Train on 1700 samples, validate on 300 samples
Epoch 1/100

Epoch 00001: val_loss improved from inf to 0.66295, saving model to best_model.h5
Epoch 2/100

Epoch 00002: val_loss improved from 0.66295 to 0.65834, saving model to best_model.h5
Epoch 3/100

Epoch 00003: val_loss improved from 0.65834 to 0.65368, saving model to best_model.h5
Epoch 4/100

Epoch 00004: val_loss improved from 0.65368 to 0.64935, saving model to best_model.h5
Epoch 5/100

Epoch 00005: val_loss improved from 0.64935 to 0.64507, saving model to best_model.h5
Epoch 6/100

Epoch 00006: val_loss improved from 0.64507 to 0.64062, saving model to best_model.h5
Epoch 7/100

Epoch 00007: val_loss improved from 0.64062 to 0.63630, saving model to best_model.h5
Epoch 8/100

Epoch 00008: val_loss improved from 0.63630 to 0.63201, saving model to best_model.h5
Epoch 9/100

Epoch 00009: val_loss improved from 0.63201 to 0.62769, saving model to best_model.h5
Epoch 10/100

Epoch 00010: val_loss improved from 0.62769 to 0.623


Epoch 00036: val_loss improved from 0.55351 to 0.55280, saving model to best_model.h5
Epoch 37/100

Epoch 00037: val_loss improved from 0.55280 to 0.55217, saving model to best_model.h5
Epoch 38/100

Epoch 00038: val_loss improved from 0.55217 to 0.55153, saving model to best_model.h5
Epoch 39/100

Epoch 00039: val_loss improved from 0.55153 to 0.55107, saving model to best_model.h5
Epoch 40/100

Epoch 00040: val_loss improved from 0.55107 to 0.55058, saving model to best_model.h5
Epoch 41/100

Epoch 00041: val_loss improved from 0.55058 to 0.55017, saving model to best_model.h5
Epoch 42/100

Epoch 00042: val_loss improved from 0.55017 to 0.54975, saving model to best_model.h5
Epoch 43/100

Epoch 00043: val_loss improved from 0.54975 to 0.54937, saving model to best_model.h5
Epoch 44/100

Epoch 00044: val_loss improved from 0.54937 to 0.54900, saving model to best_model.h5
Epoch 45/100

Epoch 00045: val_loss improved from 0.54900 to 0.54868, saving model to best_model.h5
Epoch 46/100



Epoch 00071: val_loss improved from 0.54332 to 0.54317, saving model to best_model.h5
Epoch 72/100

Epoch 00072: val_loss improved from 0.54317 to 0.54301, saving model to best_model.h5
Epoch 73/100

Epoch 00073: val_loss improved from 0.54301 to 0.54289, saving model to best_model.h5
Epoch 74/100

Epoch 00074: val_loss improved from 0.54289 to 0.54278, saving model to best_model.h5
Epoch 75/100

Epoch 00075: val_loss improved from 0.54278 to 0.54264, saving model to best_model.h5
Epoch 76/100

Epoch 00076: val_loss improved from 0.54264 to 0.54253, saving model to best_model.h5
Epoch 77/100

Epoch 00077: val_loss improved from 0.54253 to 0.54237, saving model to best_model.h5
Epoch 78/100

Epoch 00078: val_loss improved from 0.54237 to 0.54226, saving model to best_model.h5
Epoch 79/100

Epoch 00079: val_loss improved from 0.54226 to 0.54214, saving model to best_model.h5
Epoch 80/100

Epoch 00080: val_loss improved from 0.54214 to 0.54196, saving model to best_model.h5
Epoch 81/100


<keras.callbacks.callbacks.History at 0x14d74ec50>

In [55]:
model = keras.models.load_model('best_model.h5')
model.weights

[<tf.Variable 'dense_23_1/kernel:0' shape=(1, 1) dtype=float32, numpy=array([[0.8657648]], dtype=float32)>,
 <tf.Variable 'dense_22_1/kernel:0' shape=(1, 1) dtype=float32, numpy=array([[0.5728003]], dtype=float32)>,
 <tf.Variable 'dense_24_1/kernel:0' shape=(1, 1) dtype=float32, numpy=array([[1.5281595]], dtype=float32)>,
 <tf.Variable 'dense_24_1/bias:0' shape=(1,) dtype=float32, numpy=array([-0.28499046], dtype=float32)>]

In [56]:
predictions = model.predict([input_surv_mmrs, input_kerri_mmrs]).reshape(-1)
for i, game in enumerate(games):
    print(predictions[i], game)

0.89338756 [Selendis, 1940][Helios, 2580][Scientist, 1760][Ascendant, 1520][Dehaka, 2280][Prophet, 1040][Selendis, 1060][Scientist, 2160][Selendis, 1500]|Survivor
0.033741537 [Team Nova, 1940][Ascendant, 2580][Dehaka, 1180][Zagara, 1220][Scientist, 1960][Prophet, 1500][Mira, 1560][Spirit, 1300][Stukov, 1720][Selendis, 1880]|Survivor
0.77485377 [Dehaka, 1400][Ascendant, 1200][Ascendant, 1200][Ascendant, 1200][Zagara, 1340][Ascendant, 1500]Kerrigan
0.19682398 [Ascendant, 1180][Ascendant, 1460][Dehaka, 1360][Prophet, 1820][Dark Templar, 1500][Team Nova, 1540][Nomad, 1460][Nomad, 2000][Ascendant, 1200][Zagara, 1180]|Survivor
0.6161351 [Dehaka, 1180][Spirit, 1560][Stukov, 720][Scientist, 2140][Zagara, 1980][Nomad, 2080][Ascendant, 1180][Ares, 1380][Swann, 1200][Prophet, 1220]Kerrigan
0.97010493 [Thakras, 2580][Scion, 1800][Spirit, 1880][Niadra, 2500][Selendis, 1800][Helios, 1600][Ascendant, 1220][Dark Templar, 2220][Scientist, 1380][Scientist, 1400]Kerrigan
0.66683346 [Stukov, 640][Nomad, 1

0.3304757 [Scientist, 1620][Dehaka, 1920][Zagara, 1200][Selendis, 1540][Artanis, 1260][Scientist, 1480][Scientist, 2380][Stukov, 1680][Artanis, 1260][Prophet, 1780]Kerrigan
0.40752596 [Scientist, 1400][Spirit, 1300][Scientist, 1160][Spirit, 1440][Team Nova, 1360][Dehaka, 1260][Engineer, 1420][Prophet, 1260][Scientist, 1380][Zagara, 1280]|Survivor
0.93894124 [Kerrigan, 2480][Niadra, 1380][Spirit, 1120][Ascendant, 1180][Spirit, 1060][Scientist, 1180][Nomad, 1840][Ascendant, 1600][Artanis, 1340][Scientist, 1200]Kerrigan
0.96361315 [Spirit, 2020][Spirit, 1220][Swann, 1680][Nomad, 1220][Kerrigan, 2540][Artanis, 1200][Zagara, 1760][Ascendant, 1740][Prophet, 1220][Selendis, 840]Kerrigan
0.24859177 [Zagara, 1160][Artanis, 1180][Prophet, 1200][Stukov, 1980][Ascendant, 1380][Ascendant, 1520][Artanis, 1060][Thakras, 1220][Selendis, 1420][Selendis, 1500]Kerrigan
0.16188954 [Ascendant, 1340][Kerrigan, 1200][Scion, 2320][Scientist, 1240][Prophet, 1480][Mira, 1500][Ascendant, 1400][Spirit, 1740][Spir

0.40315345 [Scientist, 1380][Scientist, 1280][Dehaka, 1140][Prophet, 1320][Selendis, 1400][Dark Templar, 1280][Zagara, 1360][Scientist, 1180][Spirit, 1260][Ascendant, 1520]Kerrigan
0.7820946 [Selendis, 1300][Zagara, 1580][Artanis, 1220][Selendis, 660][Stukov, 1220][Ascendant, 1540][Swann, 1160][Ascendant, 1460][Dehaka, 1300][Dark Templar, 920]Kerrigan
0.14231397 [Zagara, 1180][Dark Templar, 2300][Ascendant, 1260][Ares, 2080][Scientist, 1420][Scientist, 1960][Thakras, 1480][Selendis, 1600][Stukov, 1460]Kerrigan
0.13324454 [Prophet, 2040][Team Nova, 1820][Ares, 1660][Zagara, 1300][Nomad, 1320][Helios, 1520][Dark Templar, 1640][Ascendant, 1300][Helios, 1260][Thakras, 1160]|Survivor
0.24273908 [Prophet, 2060][Dehaka, 1340][Helios, 1140][Scientist, 1480][Ascendant, 1400][Zagara, 1160][Mira, 1420][Selendis, 1220][Dark Templar, 1640][Nomad, 1300]Kerrigan
0.10453364 [Ascendant, 2520][Artanis, 1380][Spirit, 2080][Prophet, 1280][Zagara, 1160][Dehaka, 1420][Team Nova, 1880][Dark Templar, 1420][Sp

In [12]:
with open("export.csv", 'w') as f:
    for game in games:
        f.write(game.csv_str() + '\n')

In [13]:
headers = [['class_' + str(i), 'mmr_' + str(i)] for i in range(MAX_PLAYERS)]
headers = [item for sublist in headers for item in sublist]

In [14]:
csv = pd.read_csv('export.csv', header=None, names=headers)
csv['win'] = ["Survivor" if game.win() == 0 else "Kerrigan" for game in games]
csv['kerri_win_pct'] = predictions

In [15]:
csv.to_excel('balance_bank_1_3_2020.xlsx', index=False)

In [16]:
csv.to_csv('balance_bank_export.csv', index=False)

In [18]:
csv

Unnamed: 0,class_0,mmr_0,class_1,mmr_1,class_2,mmr_2,class_3,mmr_3,class_4,mmr_4,...,class_6,mmr_6,class_7,mmr_7,class_8,mmr_8,class_9,mmr_9,win,kerri_win_pct
0,Selendis,1940,Helios,2580,Scientist,1760,Ascendant,1520,Dehaka,2280,...,Selendis,1060.0,Scientist,2160.0,Selendis,1500.0,,,Survivor,0.827352
1,Team Nova,1940,Ascendant,2580,Dehaka,1180,Zagara,1220,Scientist,1960,...,Mira,1560.0,Spirit,1300.0,Stukov,1720.0,Selendis,1880.0,Survivor,0.033203
2,Dehaka,1400,Ascendant,1200,Ascendant,1200,Ascendant,1200,Zagara,1340,...,,,,,,,,,Kerrigan,0.773006
3,Ascendant,1180,Ascendant,1460,Dehaka,1360,Prophet,1820,Dark Templar,1500,...,Nomad,1460.0,Nomad,2000.0,Ascendant,1200.0,Zagara,1180.0,Survivor,0.175135
4,Dehaka,1180,Spirit,1560,Stukov,720,Scientist,2140,Zagara,1980,...,Ascendant,1180.0,Ares,1380.0,Swann,1200.0,Prophet,1220.0,Kerrigan,0.549447
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,Zagara,1260,Ascendant,1820,Prophet,1780,Dehaka,1200,Stukov,1960,...,Scion,2040.0,Scientist,1180.0,Scientist,2040.0,,,Survivor,0.055562
1996,Helios,1180,Dehaka,1380,Stukov,1380,Ascendant,1100,Prophet,1360,...,Mira,1680.0,Ascendant,1140.0,Ares,1560.0,,,Kerrigan,0.513498
1997,Spirit,1180,Zagara,1560,Prophet,1440,Dark Templar,1440,Ascendant,1240,...,Ascendant,1200.0,Selendis,1340.0,Ares,1040.0,Kerrigan,1440.0,Survivor,0.744102
1998,Ascendant,2120,Nomad,2300,Scientist,2520,Stukov,1580,Mira,1980,...,Zagara,1240.0,Engineer,1840.0,Kerrigan,2360.0,Swann,1920.0,Survivor,0.155971
