In [1]:
import numpy as np

In [2]:
m = np.loadtxt('dota_data.txt', dtype=int)
print(m)

[[3810300591          0        100 ...         21        101         30]
 [3810300590          0         69 ...         14          7         95]
 [3810300589          1         41 ...         16         12         87]
 ...
 [3809780562          0         86 ...         51          1         12]
 [3809780560          0         10 ...          5         98         67]
 [3809780558          0         86 ...         18         16         99]]


In [14]:
def compute_synnergy(lines):
    matches_pairs = np.zeros((121, 121))
    wins_pairs = np.zeros((121, 121)) 
    wr_pairs = np.zeros((121, 121))

    for line in lines:
        rad_wins = line[1] == 1
        dire_wins = not rad_wins
        heroes = line[2:]
        rad = heroes[:5]
        dire = heroes[5:]

        for team in [rad, dire]:
            for i in range(5):
                hero1 = team[i]
                for j in range(5):
                    hero2 = team[j]
                    matches_pairs[hero1, hero2] += 1

                    if rad_wins and hero1 in rad:
                        wins_pairs[hero1, hero2] += 1 
                    elif dire_wins and hero1 in dire:
                        wins_pairs[hero1, hero2] += 1

    matches_pairs[matches_pairs == 0] = 1
    wr_pairs = wins_pairs / matches_pairs
    return wr_pairs

In [15]:
def compute_counters(lines):
    matches_counter = np.zeros((121, 121))
    wins_counter = np.zeros((121, 121)) 
    wr_counter = np.zeros((121, 121))

    for line in lines:
        rad_wins = line[1] == 1
        dire_wins = not rad_wins
        heroes = line[2:]
        rad = heroes[:5]
        dire = heroes[5:]

        #rad first
        for hero1 in rad:
            for hero2 in dire:
                matches_counter[hero1, hero2] += 1
                if rad_wins: wins_counter[hero1, hero2] += 1

        #dire first
        for hero1 in dire:
            for hero2 in rad:
                matches_counter[hero1, hero2] += 1
                if dire_wins: wins_counter[hero1, hero2] += 1


    matches_counter[matches_counter == 0] = 1
    wr_counter = wins_counter / matches_counter
    return wr_counter

In [16]:
factor = 0.9
train_size = int(len(m) * factor)
wr_pairs = compute_synnergy(m[: train_size])
wr_counters = compute_counters(m[ : train_size])

In [6]:
print(wr_counters)

[[0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.41973415 ... 0.         0.48491379 0.52463382]
 [0.         0.58026585 0.         ... 0.         0.52132196 0.54919908]
 ...
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.51508621 0.47867804 ... 0.         0.         0.53470032]
 [0.         0.47536618 0.45080092 ... 0.         0.46529968 0.        ]]


In [17]:
def gen_train_test(lines, valp):
    feats = 121 + 55
    #feats = 10 + 20 + 25
    train_size = int(len(lines) * (1 - valp))
    test_size = len(lines) - train_size
    data = {'train': {}, 'test': {}}
    data['train']['in'] = np.zeros((train_size, feats))
    data['train']['out'] = np.zeros((train_size), dtype=int)
    data['test']['in'] = np.zeros((test_size, feats))
    data['test']['out'] = np.zeros((test_size), dtype=int)
    data['test']['lines'] = lines[train_size : ]
    data['train']['lines'] = lines[ : train_size]

    for kind in ['train', 'test']:
        for i in range(len(data[kind]['lines'])):
            line = data[kind]['lines'][i]
            
            inputs = data[kind]['in'][i]
        
            data[kind]['out'][i] = line[1]

            heroes = line[2:]
            rad = heroes[:5]
            dire = heroes[5:]
            
            for hero in rad:
                inputs[hero] = 1

            for hero in dire:
                inputs[hero] = -1
            
            counter = 121

            for hero_i in range(5):
                for hero_j in range(hero_i, 5):
                    inputs[counter] = wr_pairs[rad[hero_i], rad[hero_j]]
                    counter += 1

            for hero_i in range(5):
                for hero_j in range(hero_i, 5):
                    inputs[counter] = - wr_pairs[dire[hero_i], dire[hero_j]]
                    counter += 1

            for hero_i in range(5):
                for hero_j in range(5):
                    inputs[counter] = wr_counters[rad[hero_i], dire[hero_j]]
                    counter += 1
                    
    return data

In [18]:
data = gen_train_test(m, 0.1)
print(data['train']['in'][3])

[ 0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.         -1.          0.          0.          0.
  0.          0.          0.          0.         -1.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.         -1.          0.          0.          1.
  0.          0.          0.          0.          0.          0.
  0.          0.         -1.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  1.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          1.          0.          0.          0.
  0.          0.         

In [21]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.optimizers import SGD, Adagrad, Adam
from keras import regularizers

model = Sequential()

model.add(Dense(10000, activation='relu', input_dim=len(data['train']['in'][0])))
model.add(Dropout(0.5))
model.add(Dense(10000, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(100, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(100, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

sgd = SGD(lr=0.05)
model.compile(loss='binary_crossentropy',
              optimizer=Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False),
              #optimizer='adadelta',
              #optimizer=sgd,
              metrics=['binary_accuracy'])

model.fit(data['train']['in'], data['train']['out'],
          epochs=100,
          validation_data=(data['test']['in'], data['test']['out']))

result = model.evaluate(data['test']['in'], data['test']['out'])

print(result[1])

NameError: name 'dropout' is not defined

In [12]:
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(n_estimators=100, max_features=11, min_samples_split=2, max_depth=None, n_jobs=8)
clf = clf.fit(data['train']['in'], data['train']['out'])
clf_forest = clf

In [10]:
from sklearn.linear_model import LogisticRegression

clf = LogisticRegression()
clf = clf.fit(data['train']['in'], data['train']['out'])
clf_log = clf

In [None]:
from sklearn import neighbors

clf = neighbors.KNeighborsClassifier(5, weights='uniform')
clf = clf.fit(data['train']['in'], data['train']['out'])

In [None]:
print()
# from sklearn import svm
# clf = svm.SVC()
# clf.fit(data['train']['in'], data['train']['out'])  

In [None]:
from sklearn.ensemble import AdaBoostClassifier
clf = AdaBoostClassifier(n_estimators=10000)
clf = clf.fit(data['train']['in'], data['train']['out'])

In [None]:
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
clf = clf.fit(data['train']['in'], data['train']['out'])
clf_naive = clf

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
clf = GradientBoostingClassifier(n_estimators=2000, learning_rate=1.0, max_depth=1, random_state=0)
clf = clf.fit(data['train']['in'], data['train']['out'])

In [None]:
from sklearn.ensemble import BaggingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression

clf = BaggingClassifier(LogisticRegression(),
                             max_samples=0.5, max_features=0.5)
clf = clf.fit(data['train']['in'], data['train']['out'])

In [None]:
from sklearn.ensemble import VotingClassifier

clf = VotingClassifier(estimators=[('for', clf_forest), ('naive', clf_naive), ('log', clf_log)], voting='soft', weights=[1,2,2])
clf = clf.fit(data['train']['in'], data['train']['out'])

In [13]:
#print('oi')
model = clf
preds = model.predict(data['test']['in'])
correct = data['test']['out']

eq = 0

wrong = []

for i in range(len(preds)):
    if correct[i] == preds[i]:
        eq += 1
    else:
        wrong.append(i)
        
print(eq / len(preds))

0.5877466830192887


In [None]:
wrong_forest = wrong

In [None]:
print(wrong_log)
print(wrong_naive)
print(wrong_forest)