In [1]:
import json
from collections import Counter

class Deck:
    def __init__(self, info):
        self.name = info['deckName'][0]
        self.hero = info['hero'][0]
        self.cards = Counter()
        for name, [count] in info['cards'].items():
            self.cards[name] = count

training_path = 'data/trainingDecks.json'
test_path = 'data/testDecks.json'
            
def get_decks_dict(path):
    decks_dict = dict()    
    
    with open(path, 'r') as decks:
        for line in decks.readlines():
            json_info = json.loads(line)
            decks_dict[json_info['deckName'][0]] = Deck(json_info)
    return decks_dict

training_decks = get_decks_dict(training_path)
test_decks = get_decks_dict(test_path)

In [2]:
import pandas as pd
training_games = pd.read_csv('data/training_games.csv', delimiter=';', 
                             names=['id', 'bot1', 'deck1', 'bot2', 'deck2', 'result'])
training_games['result'] = training_games['result'] == 'PLAYER_1 WON'
training_games['result'] = training_games['result'].astype(int)
print(training_games.head())
print(len(training_games))

         id bot1       deck1 bot2       deck2  result
0  100001.0   A1  deck113225   A1  deck731599       0
1  100002.0   A1  deck694943   A1  deck929572       1
2  100003.0   A1  deck182567   A1  deck525929       0
3  100004.0   A1  deck219364   A1  deck757429       1
4  100005.0   A1  deck826229   A1  deck337123       1
299680


In [3]:
heros = {deck.hero for deck in training_decks.values()}
print(heros)

{'Mage', 'Shaman', 'Warlock', 'Druid', 'Priest', 'Paladin', 'Rogue', 'Hunter', 'Warrior'}


In [15]:
train_df = training_games.get(['bot1', 'deck1', 'result']).rename(index=str, columns={'bot1': 'player', 'deck1': 'deck'})
train_df['result'] = 1 - train_df['result']
train_df = train_df.append([training_games.get(['bot2', 'deck2', 'result']).rename(index=str, columns={
    'bot2': 'player', 'deck2': 'deck'})])
train_df = train_df.groupby(['player', 'deck'])['result'].mean()
train_df = train_df.reset_index()
print(train_df.head())

  player        deck    result
0     A1  deck100087  0.291667
1     A1  deck101443  0.276627
2     A1  deck102280  0.550613
3     A1  deck104259  0.404459
4     A1  deck105300  0.397929


In [322]:
from sklearn.feature_extraction import DictVectorizer
v = DictVectorizer(sparse=False)
D = [deck.cards for deck in training_decks.values()]
X = v.fit_transform(D)



In [341]:
import numpy as np

players = ['A1', 'A2', 'B1', 'B2']

players_features = [(train_df['player'] == pl).as_matrix() for pl in players]

x = np.array([v.transform(training_decks[deck_name].cards)[0] for deck_name in train_df['deck']])

In [342]:
players_x = np.stack(players_features).T
X = np.concatenate([x, players_x], axis=1)
y = train_df['result'].as_matrix()

In [343]:
print(X.shape)
print(y.shape)

(1600, 334)
(1600,)


**shufflujemy dataset**

In [344]:
import random
indices = random.sample(range(len(y)), len(y))
X = X[indices]
y = y[indices]
y *= 100

In [395]:
from sklearn.linear_model import LinearRegression, Ridge, LogisticRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR, LinearSVR
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

clf = SVR()
gb = GradientBoostingRegressor()
gb_params = {'learning_rate': [0.05, 0.1, 0.5, 1],
            'max_depth': [2, 3, 4],
            #'criterion': ['friedman_mse', 'mse'],
            'max_features': [None, 'sqrt']}
parameters = {'C': np.arange(10) * 100, 
              'gamma': ['auto', 0.01, 0.1, 0.5, 1, 10, 50],
              'epsilon': [0.01, 0.05, 0.1, 0.5, 1, 5]
             }
grid_clf = GridSearchCV(gb, gb_params, scoring='neg_mean_squared_error', cv=8, verbose=5)
grid_clf = grid_clf.fit(X, y)
print(np.sqrt(-grid_clf.best_score_))
print(grid_clf.best_params_)

Fitting 8 folds for each of 24 candidates, totalling 192 fits
[CV] learning_rate=0.05, max_depth=2, max_features=None ..............
[CV]  learning_rate=0.05, max_depth=2, max_features=None, score=-39.54016477758865, total=   0.7s
[CV] learning_rate=0.05, max_depth=2, max_features=None ..............


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.7s remaining:    0.0s


[CV]  learning_rate=0.05, max_depth=2, max_features=None, score=-42.7074638742381, total=   0.7s
[CV] learning_rate=0.05, max_depth=2, max_features=None ..............


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    1.5s remaining:    0.0s


[CV]  learning_rate=0.05, max_depth=2, max_features=None, score=-37.57075939941934, total=   0.7s
[CV] learning_rate=0.05, max_depth=2, max_features=None ..............


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    2.2s remaining:    0.0s


[CV]  learning_rate=0.05, max_depth=2, max_features=None, score=-49.61174469367202, total=   0.7s
[CV] learning_rate=0.05, max_depth=2, max_features=None ..............


[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    2.9s remaining:    0.0s


[CV]  learning_rate=0.05, max_depth=2, max_features=None, score=-39.4773909490801, total=   0.8s
[CV] learning_rate=0.05, max_depth=2, max_features=None ..............
[CV]  learning_rate=0.05, max_depth=2, max_features=None, score=-34.39201158769224, total=   0.7s
[CV] learning_rate=0.05, max_depth=2, max_features=None ..............
[CV]  learning_rate=0.05, max_depth=2, max_features=None, score=-42.19965191189684, total=   0.7s
[CV] learning_rate=0.05, max_depth=2, max_features=None ..............
[CV]  learning_rate=0.05, max_depth=2, max_features=None, score=-47.85213789654758, total=   0.8s
[CV] learning_rate=0.05, max_depth=2, max_features=sqrt ..............
[CV]  learning_rate=0.05, max_depth=2, max_features=sqrt, score=-47.37883120573402, total=   0.1s
[CV] learning_rate=0.05, max_depth=2, max_features=sqrt ..............
[CV]  learning_rate=0.05, max_depth=2, max_features=sqrt, score=-52.01403517873564, total=   0.1s
[CV] learning_rate=0.05, max_depth=2, max_features=sqrt ..

[CV]  learning_rate=0.1, max_depth=2, max_features=None, score=-27.697628864454767, total=   0.6s
[CV] learning_rate=0.1, max_depth=2, max_features=None ...............
[CV]  learning_rate=0.1, max_depth=2, max_features=None, score=-32.25153528000787, total=   0.6s
[CV] learning_rate=0.1, max_depth=2, max_features=None ...............
[CV]  learning_rate=0.1, max_depth=2, max_features=None, score=-36.069075078702234, total=   0.6s
[CV] learning_rate=0.1, max_depth=2, max_features=sqrt ...............
[CV]  learning_rate=0.1, max_depth=2, max_features=sqrt, score=-35.94925760811973, total=   0.1s
[CV] learning_rate=0.1, max_depth=2, max_features=sqrt ...............
[CV]  learning_rate=0.1, max_depth=2, max_features=sqrt, score=-37.9937892127015, total=   0.1s
[CV] learning_rate=0.1, max_depth=2, max_features=sqrt ...............
[CV]  learning_rate=0.1, max_depth=2, max_features=sqrt, score=-37.8434010055837, total=   0.1s
[CV] learning_rate=0.1, max_depth=2, max_features=sqrt ........

[CV]  learning_rate=0.5, max_depth=2, max_features=None, score=-17.435254887467266, total=   0.6s
[CV] learning_rate=0.5, max_depth=2, max_features=None ...............
[CV]  learning_rate=0.5, max_depth=2, max_features=None, score=-19.314493430157498, total=   0.6s
[CV] learning_rate=0.5, max_depth=2, max_features=sqrt ...............
[CV]  learning_rate=0.5, max_depth=2, max_features=sqrt, score=-24.46382200932716, total=   0.1s
[CV] learning_rate=0.5, max_depth=2, max_features=sqrt ...............
[CV]  learning_rate=0.5, max_depth=2, max_features=sqrt, score=-23.096650606289423, total=   0.1s
[CV] learning_rate=0.5, max_depth=2, max_features=sqrt ...............
[CV]  learning_rate=0.5, max_depth=2, max_features=sqrt, score=-23.913390713309212, total=   0.1s
[CV] learning_rate=0.5, max_depth=2, max_features=sqrt ...............
[CV]  learning_rate=0.5, max_depth=2, max_features=sqrt, score=-23.984735546089908, total=   0.1s
[CV] learning_rate=0.5, max_depth=2, max_features=sqrt ...

[CV]  learning_rate=1, max_depth=2, max_features=None, score=-22.771813815869773, total=   0.6s
[CV] learning_rate=1, max_depth=2, max_features=sqrt .................
[CV]  learning_rate=1, max_depth=2, max_features=sqrt, score=-24.973858760438585, total=   0.1s
[CV] learning_rate=1, max_depth=2, max_features=sqrt .................
[CV]  learning_rate=1, max_depth=2, max_features=sqrt, score=-24.798619498777825, total=   0.1s
[CV] learning_rate=1, max_depth=2, max_features=sqrt .................
[CV]  learning_rate=1, max_depth=2, max_features=sqrt, score=-21.84461173981369, total=   0.1s
[CV] learning_rate=1, max_depth=2, max_features=sqrt .................
[CV]  learning_rate=1, max_depth=2, max_features=sqrt, score=-28.909469492611397, total=   0.1s
[CV] learning_rate=1, max_depth=2, max_features=sqrt .................
[CV]  learning_rate=1, max_depth=2, max_features=sqrt, score=-23.73805499182743, total=   0.1s
[CV] learning_rate=1, max_depth=2, max_features=sqrt .................


[Parallel(n_jobs=1)]: Done 192 out of 192 | elapsed:  2.0min finished


4.040185407552556
{'learning_rate': 0.5, 'max_depth': 3, 'max_features': None}


In [383]:
import math

def reduce(x):
    x = np.array([max(val, 0) for val in x])
    return np.array([min(val, 100) for val in x])

In [384]:
print(X_val.shape, y_val.shape)

(320, 334) (320,)


In [385]:
print(y_pred)
print(y_val * 100)

[42.62695312 58.0078125  47.94921875 53.515625   50.         36.03515625
 38.57421875 41.6015625  42.1875     53.41796875 45.3125     24.4140625
 33.44726562 55.078125   28.36914062 69.18945312 53.7109375  45.75195312
 56.54296875 39.11132812 66.16210938 39.94140625 51.171875   36.71875
 48.87695312 59.765625   62.25585938 43.21289062 42.7734375  41.015625
 38.4765625  57.6171875  46.82617188 71.6796875  24.609375   61.62109375
 51.85546875 57.421875   32.95898438 47.94921875 55.51757812 64.2578125
 37.109375   58.0078125  62.3046875  59.27734375 64.453125   51.7578125
 44.04296875 58.88671875 52.83203125 70.703125   44.921875   41.65039062
 61.42578125 52.83203125 78.95507812 40.0390625  61.42578125 61.76757812
 38.03710938 41.015625   71.14257812 59.91210938 40.8203125  66.30859375
 67.72460938 33.83789062 48.046875   57.95898438 62.59765625 76.7578125
 77.00195312 76.5625     72.50976562 69.62890625 48.77929688 42.28515625
 62.5        64.89257812 50.9765625  47.99804688 46.2890625 

In [397]:
svr_classifier = SVR(gamma=0.01, epsilon=0.5, C=100)
svr_classifier = svr_classifier.fit(X, y)
gb_classifier = GradientBoostingRegressor(learning_rate=0.5, max_depth=3, max_features=None)
gb_classifier = gb_classifier.fit(X, y)

### Let's ensemble a bit

In [400]:
def predict(deck, player):
    players_features = [player == pl for pl in players]
    x_test = np.array(v.transform(deck.cards)[0])
    x_test = np.concatenate([x_test, np.array(players_features)])
    return reduce(0.8 * svr_classifier.predict([x_test]) + 0.2 * gb_classifier.predict([x_test]))


In [401]:

def get_test_decks_list():
    test_decks = []

    with open(test_path, 'r') as decks:
        for line in decks.readlines():
            json_info = json.loads(line)
            test_decks.append(json_info['deckName'][0])
    return test_decks

test_decks_names = get_test_decks_list()

def dump_results():
    df_results = pd.DataFrame()
    for player in players:
        for deck_name in test_decks_names:
            deck = test_decks[deck_name]
            win_rate = predict(deck, player)[0]
            df_results = df_results.append({'player': player, 'deck_name': deck_name, 'win_rate': win_rate}, 
                                           ignore_index=True)

    print(df_results.head())
    df_results.to_csv('data/test_results.csv', index=False, header=False, columns=['player', 'deck_name', 'win_rate'],
                     sep=';')

dump_results()

    deck_name player   win_rate
0  deck244804     A1  24.915689
1  deck124802     A1  42.986681
2  deck687350     A1  35.039845
3  deck517728     A1  46.949677
4  deck130762     A1  40.816134
