In [1]:
import numpy as np

In [2]:
m = np.loadtxt('dota_data.txt', dtype=int)
print(m)

[[3810300591          0        100 ...         21        101         30]
 [3810300590          0         69 ...         14          7         95]
 [3810300589          1         41 ...         16         12         87]
 ...
 [3809780562          0         86 ...         51          1         12]
 [3809780560          0         10 ...          5         98         67]
 [3809780558          0         86 ...         18         16         99]]


In [3]:
def compute_synnergy(lines):
    matches_pairs = np.zeros((121, 121))
    wins_pairs = np.zeros((121, 121)) 
    wr_pairs = np.zeros((121, 121))

    for line in lines:
        rad_wins = line[1] == 1
        dire_wins = not rad_wins
        heroes = line[2:]
        rad = heroes[:5]
        dire = heroes[5:]

        for team in [rad, dire]:
            for i in range(5):
                hero1 = team[i]
                for j in range(i, 5):
                    hero2 = team[j]
                    matches_pairs[hero1, hero2] += 1
                    matches_pairs[hero2, hero1] += 1
                    if rad_wins and hero1 in rad:
                        wins_pairs[hero1, hero2] += 1 
                        wins_pairs[hero2, hero1] += 1 
                    elif dire_wins and hero1 in dire:
                        wins_pairs[hero1, hero2] += 1
                        wins_pairs[hero2, hero1] += 1

    matches_pairs[matches_pairs == 0] = 1
    wr_pairs = wins_pairs / matches_pairs
    return wr_pairs

In [4]:
def compute_counters(lines):
    matches_counter = np.zeros((121, 121))
    wins_counter = np.zeros((121, 121)) 
    wr_counter = np.zeros((121, 121))

    for line in m:
        rad_wins = line[1] == 1
        dire_wins = not rad_wins
        heroes = line[2:]
        rad = heroes[:5]
        dire = heroes[5:]

        #rad first
        for hero1 in rad:
            for hero2 in dire:
                matches_counter[hero1, hero2] += 1
                if rad_wins: wins_counter[hero1, hero2] += 1

        #dire first
        for hero1 in dire:
            for hero2 in rad:
                matches_counter[hero1, hero2] += 1
                if dire_wins: wins_counter[hero1, hero2] += 1


    matches_counter[matches_counter == 0] = 1
    wr_counter = wins_counter / matches_counter
    return wr_counter

In [5]:
factor = 0.9
train_size = int(len(m) * factor)
wr_pairs = compute_synnergy(m[: train_size])
wr_counters = compute_counters(m[ : train_size])

In [None]:
print(wr_counters)

In [6]:
def gen_train_test(lines, valp):
    feats = 121 + 55
    #feats = 10 + 20 + 25
    train_size = int(len(lines) * (1 - valp))
    test_size = len(lines) - train_size
    data = {'train': {}, 'test': {}}
    data['train']['in'] = np.zeros((train_size, feats))
    data['train']['out'] = np.zeros((train_size), dtype=int)
    data['test']['in'] = np.zeros((test_size, feats))
    data['test']['out'] = np.zeros((test_size), dtype=int)
    data['test']['lines'] = lines[train_size : ]
    data['train']['lines'] = lines[ : train_size]
    
    #wr_pairs = compute_synnergy(data['train']['lines'])
    #wr_counters = compute_counters(data['train']['lines'])

    for kind in ['train', 'test']:
        for i in range(len(data[kind]['lines'])):
            line = data[kind]['lines'][i]
            
            #print(line)
            
            inputs = data[kind]['in'][i]
        
            data[kind]['out'][i] = line[1]

            heroes = line[2:]
            rad = heroes[:5]
            dire = heroes[5:]
            
            for hero in rad:
                inputs[hero] = 1

            for hero in dire:
                inputs[hero] = -1
            
            counter = 121

            for hero_i in range(5):
                for hero_j in range(hero_i, 5):
                    inputs[counter] = wr_pairs[rad[hero_i], rad[hero_j]]
                    counter += 1

            for hero_i in range(5):
                for hero_j in range(hero_i, 5):
                    inputs[counter] = - wr_pairs[dire[hero_i], dire[hero_j]]
                    counter += 1

            for hero_i in range(5):
                for hero_j in range(5):
                    inputs[counter] = wr_counters[rad[hero_i], dire[hero_j]]
                    counter += 1
                    
    return data

In [7]:
data = gen_train_test(m, 0.1)
print(data['train']['in'][3])

[ 0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.         -1.          0.          0.          0.
  0.          0.          0.          0.         -1.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.         -1.          0.          0.          1.
  0.          0.          0.          0.          0.          0.
  0.          0.         -1.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  1.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          1.          0.          0.          0.
  0.          0.         

In [35]:
from keras import backend as K
K.clear_session()

In [37]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.optimizers import SGD, Adagrad, Adam
from keras import regularizers

model = Sequential()

model.add(Dense(100, activation='relu', input_dim=len(data['train']['in'][0])))
model.add(Dense(300, activation='relu', input_dim=len(data['train']['in'][0])))
model.add(Dense(1, activation='sigmoid'))

sgd = SGD(lr=0.05)
model.compile(loss='binary_crossentropy',
              #optimizer=Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False),
              #optimizer='adadelta',
              optimizer=sgd,
              metrics=['binary_accuracy'])

model.fit(data['train']['in'], data['train']['out'],
          epochs=1000,
          batch_size=512, validation_data=(data['test']['in'], data['test']['out']))

result = model.evaluate(data['test']['in'], data['test']['out'])

print(result[1])

Train on 161442 samples, validate on 17938 samples
Epoch 1/1000


ResourceExhaustedError: OOM when allocating tensor with shape[300] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[Node: training_1/SGD/Variable_3/Assign = Assign[T=DT_FLOAT, use_locking=true, validate_shape=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](training_1/SGD/Variable_3, dense_5/Const)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.


Caused by op 'training_1/SGD/Variable_3/Assign', defined at:
  File "/home/rafael/mestrado/anaconda3/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/rafael/mestrado/anaconda3/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/rafael/mestrado/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/rafael/mestrado/anaconda3/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/rafael/mestrado/anaconda3/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 478, in start
    self.io_loop.start()
  File "/home/rafael/mestrado/anaconda3/lib/python3.6/site-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/home/rafael/mestrado/anaconda3/lib/python3.6/site-packages/tornado/ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "/home/rafael/mestrado/anaconda3/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/rafael/mestrado/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/home/rafael/mestrado/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/home/rafael/mestrado/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/home/rafael/mestrado/anaconda3/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/rafael/mestrado/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/home/rafael/mestrado/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "/home/rafael/mestrado/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/home/rafael/mestrado/anaconda3/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/rafael/mestrado/anaconda3/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/rafael/mestrado/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2728, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/rafael/mestrado/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2850, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/rafael/mestrado/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2910, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-37-95b358db7743>", line 22, in <module>
    batch_size=512, validation_data=(data['test']['in'], data['test']['out']))
  File "/home/rafael/mestrado/anaconda3/lib/python3.6/site-packages/keras/models.py", line 963, in fit
    validation_steps=validation_steps)
  File "/home/rafael/mestrado/anaconda3/lib/python3.6/site-packages/keras/engine/training.py", line 1682, in fit
    self._make_train_function()
  File "/home/rafael/mestrado/anaconda3/lib/python3.6/site-packages/keras/engine/training.py", line 990, in _make_train_function
    loss=self.total_loss)
  File "/home/rafael/mestrado/anaconda3/lib/python3.6/site-packages/keras/legacy/interfaces.py", line 91, in wrapper
    return func(*args, **kwargs)
  File "/home/rafael/mestrado/anaconda3/lib/python3.6/site-packages/keras/optimizers.py", line 182, in get_updates
    moments = [K.zeros(shape) for shape in shapes]
  File "/home/rafael/mestrado/anaconda3/lib/python3.6/site-packages/keras/optimizers.py", line 182, in <listcomp>
    moments = [K.zeros(shape) for shape in shapes]
  File "/home/rafael/mestrado/anaconda3/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py", line 694, in zeros
    return variable(v, dtype=dtype, name=name)
  File "/home/rafael/mestrado/anaconda3/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py", line 395, in variable
    v = tf.Variable(value, dtype=tf.as_dtype(dtype), name=name)
  File "/home/rafael/mestrado/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/variables.py", line 235, in __init__
    constraint=constraint)
  File "/home/rafael/mestrado/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/variables.py", line 387, in _init_from_args
    validate_shape=validate_shape).op
  File "/home/rafael/mestrado/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/state_ops.py", line 283, in assign
    validate_shape=validate_shape)
  File "/home/rafael/mestrado/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/gen_state_ops.py", line 60, in assign
    use_locking=use_locking, name=name)
  File "/home/rafael/mestrado/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/home/rafael/mestrado/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3392, in create_op
    op_def=op_def)
  File "/home/rafael/mestrado/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1718, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[300] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[Node: training_1/SGD/Variable_3/Assign = Assign[T=DT_FLOAT, use_locking=true, validate_shape=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](training_1/SGD/Variable_3, dense_5/Const)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.



In [None]:
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(n_estimators=100, max_features=11, min_samples_split=2, max_depth=None, n_jobs=8)
clf = clf.fit(data['train']['in'], data['train']['out'])
clf_forest = clf

In [None]:
from sklearn.linear_model import LogisticRegression

clf = LogisticRegression()
clf = clf.fit(data['train']['in'], data['train']['out'])
clf_log = clf

In [None]:
from sklearn import neighbors

clf = neighbors.KNeighborsClassifier(5, weights='uniform')
clf = clf.fit(data['train']['in'], data['train']['out'])

In [None]:
print()
# from sklearn import svm
# clf = svm.SVC()
# clf.fit(data['train']['in'], data['train']['out'])  

In [None]:
from sklearn.ensemble import AdaBoostClassifier
clf = AdaBoostClassifier(n_estimators=10000)
clf = clf.fit(data['train']['in'], data['train']['out'])

In [None]:
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
clf = clf.fit(data['train']['in'], data['train']['out'])
clf_naive = clf

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
clf = GradientBoostingClassifier(n_estimators=2000, learning_rate=1.0, max_depth=1, random_state=0)
clf = clf.fit(data['train']['in'], data['train']['out'])

In [None]:
from sklearn.ensemble import BaggingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression

clf = BaggingClassifier(LogisticRegression(),
                             max_samples=0.5, max_features=0.5)
clf = clf.fit(data['train']['in'], data['train']['out'])

In [None]:
from sklearn.ensemble import VotingClassifier

clf = VotingClassifier(estimators=[('for', clf_forest), ('naive', clf_naive), ('log', clf_log)], voting='soft', weights=[1,2,2])
clf = clf.fit(data['train']['in'], data['train']['out'])

In [None]:
#print('oi')
model = clf
preds = model.predict(data['test']['in'])
correct = data['test']['out']

eq = 0

wrong = []

for i in range(len(preds)):
    if correct[i] == preds[i]:
        eq += 1
    else:
        wrong.append(i)
        
print(eq / len(preds))

In [None]:
wrong_forest = wrong

In [None]:
print(wrong_log)
print(wrong_naive)
print(wrong_forest)