In [189]:
import json
from collections import Counter

class Deck:
    def __init__(self, info):
        self.name = info['deckName'][0]
        self.hero = info['hero'][0]
        self.cards = Counter()
        for name, [count] in info['cards'].items():
            self.cards[name] = count

training_path = 'data/trainingDecks.json'
test_path = 'data/testDecks.json'
            
def get_decks_dict(path):
    decks_dict = dict()    
    
    with open(path, 'r') as decks:
        for line in decks.readlines():
            json_info = json.loads(line)
            decks_dict[json_info['deckName'][0]] = Deck(json_info)
    return decks_dict

training_decks = get_decks_dict(training_path)
test_decks = get_decks_dict(test_path)

In [190]:
import pandas as pd
training_games = pd.read_csv('data/training_games.csv', delimiter=';', 
                             names=['id', 'bot1', 'deck1', 'bot2', 'deck2', 'result'])
training_games['result'] = training_games['result'] == 'PLAYER_1 WON'
training_games['result'] = training_games['result'].astype(int)
print(training_games.head())
print(len(training_games))

         id bot1       deck1 bot2       deck2  result
0  100001.0   A1  deck113225   A1  deck731599       0
1  100002.0   A1  deck694943   A1  deck929572       1
2  100003.0   A1  deck182567   A1  deck525929       0
3  100004.0   A1  deck219364   A1  deck757429       1
4  100005.0   A1  deck826229   A1  deck337123       1
299680


In [191]:
heros = {deck.hero for deck in training_decks.values()}
print(heros)

{'Warrior', 'Druid', 'Priest', 'Rogue', 'Paladin', 'Hunter', 'Mage', 'Warlock', 'Shaman'}


### Let's look at the cards

In [192]:
test_heros = {deck.hero for deck in test_decks.values()}
print(test_heros)

{'Warrior', 'Druid', 'Priest', 'Rogue', 'Hunter', 'Paladin', 'Mage', 'Warlock', 'Shaman'}


In [197]:
train_cards = set()
test_cards = set()
for deck in training_decks.values():
    train_cards.update(deck.cards.keys())

for deck in test_decks.values():
    test_cards.update(deck.cards.keys())

In [198]:
print(train_cards)
print(test_cards)

{'Dark Pact', 'Cornered Sentry', 'Innervate', 'Shadow Word: Death', 'Nourish', 'Mortal Coil', 'Divine Spirit', 'Glacial Shard', 'Vilespine Slayer', 'Booty Bay Bodyguard', 'Harrison Jones', 'Captured Jormungar', 'Mirror Entity', 'Holy Light', 'Bite', 'Jungle Panther', 'Divine Favor', 'Darkshire Councilman', 'King Mukla', 'Assassinate', 'Kirin Tor Mage', 'Arcane Blast', 'Murloc Tidecaller', 'Blessing of Kings', 'Malfurion the Pestilent', 'Silent Knight', 'Blowgill Sniper', 'Vulgar Homunculus', 'Rockpool Hunter', 'Explosive Runes', 'Volcanic Potion', 'Dragonfire Potion', 'Brawl', 'Puddlestomper', 'Grimscale Oracle', 'Coldlight Seer', 'Twilight Drake', 'Ultimate Infestation', 'Drain Soul', 'Abyssal Enforcer', 'Elven Archer', 'Azure Drake', 'Vilefin Inquisitor', 'Duskboar', 'Ironwood Golem', 'Patches the Pirate', 'Coldlight Oracle', 'Southsea Deckhand', 'Lost Tallstrider', 'Southsea Captain', 'Starfire', "Mogu'shan Warden", 'Houndmaster', 'Truesilver Champion', 'Raid Leader', 'Gnomish Inven

In [221]:
print(len(test_cards), len(train_cards))

313 330


In [229]:
all_cards = test_cards.intersection(train_cards)
print(len(all_cards))

295


In [251]:
train_df = training_games.get(['bot1', 'deck1', 'result']).rename(index=str, columns={'bot1': 'player', 'deck1': 'deck'})
train_df['result'] = 1 - train_df['result']
train_df = train_df.append([training_games.get(['bot2', 'deck2', 'result']).rename(index=str, columns={
    'bot2': 'player', 'deck2': 'deck'})])
train_df = train_df.groupby(['player', 'deck'])['result'].mean()
train_df = train_df.reset_index()
print(train_df.head())

  player        deck    result
0     A1  deck100087  0.291667
1     A1  deck101443  0.276627
2     A1  deck102280  0.550613
3     A1  deck104259  0.404459
4     A1  deck105300  0.397929


### Card names one hot encoding

In [485]:
def intersect_with_test_cards(cards_dict):
    keys = test_cards.intersection(cards_dict)
    return {k:cards_dict[k] for k in keys} 

In [486]:
from sklearn.feature_extraction import DictVectorizer
v = DictVectorizer(sparse=False)
D = [deck.cards for deck in training_decks.values()]
X = v.fit_transform(D)

In [487]:
heros_arr = np.zeros((len(train_df['deck']), len(test_heros)))

for i, deck_name in enumerate(train_df['deck']):
    for j, hero in enumerate(test_heros):
        heros_arr[i, j] = training_decks[deck_name].hero == hero

In [488]:
import numpy as np

players = ['A1', 'A2', 'B1', 'B2']

players_features = [(train_df['player'] == pl).as_matrix() for pl in players]
cards_count = np.array([sum(training_decks[deck_name].cards.values()) for deck_name in train_df['deck']])

x = np.array([v.transform(training_decks[deck_name].cards)[0] for deck_name in train_df['deck']])

### Let's get some statistics for each card

In [489]:
card_stats = json.load(open('data/cards.json', 'r'))
json.dump(card_stats, open('data/formatted_cards.json', 'w'), indent=4)

In [490]:
card_stats = [stat for stat in card_stats if 'name' in stat]

In [491]:
stats_dict = {stat['name']:stat for stat in card_stats}

In [492]:
from collections import Counter

def aggregate_results(feature, fun):
    counter = Counter()

    for stat in stats_dict.values():
        if feature in stat:
            counter[stat['name']] = stat[feature]
    
    def total_val(deck):
        total = 0
        cards = []
        for card, count in deck.cards.items():
            cards += [card] * count
        cards = [stats_dict[card].get(feature, 0) for card in cards]
        return fun(cards)

    return np.array([total_val(training_decks[deck_name]) for deck_name in train_df['deck']])

In [493]:
card_features = ['cost', 'attack', 'health', 'durability', 'armor']
functions = [np.mean, np.median, np.var, np.max]

features_x = []
for feature in card_features:
    for fun in functions:
        features_x.append(aggregate_results(feature, fun))
features_x = np.array(features_x)
print(features_x.shape)

(20, 1600)


In [503]:
print(features_x.T.shape)
print(heros_arr.shape)
print(players_x.shape)
print(x.shape)

(1600, 20)
(1600, 9)
(1600, 4)
(1600, 330)


In [504]:
players_x = np.stack(players_features).T
cards_count_x = np.reshape(cards_count, (len(cards_count), 1))

collectible_ratio = np.reshape(collectible_ratio, (len(collectible_ratio), 1))

X = np.concatenate([x, players_x, features_x.T, heros_arr], axis=1)
y = train_df['result'].as_matrix()

In [505]:
print(X.shape)
print(y.shape)

(1600, 363)
(1600,)


**shufflujemy dataset**

In [506]:
import random
indices = random.sample(range(len(y)), len(y))
X = X[indices]
y = y[indices]
y *= 100

In [508]:
from sklearn.linear_model import LinearRegression, Ridge, LogisticRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR, LinearSVR
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import BayesianRidge

clf = SVR(gamma=0.001, epsilon=0.1, C=100)
gb = GradientBoostingRegressor()
gb_params = {'learning_rate': [0.05, 0.1, 0.5, 0.8, 0.9, 1, 1.1, 1.2],
            'max_depth': [2, 3, 4],
            #'criterion': ['friedman_mse', 'mse'],
            'max_features': [None, 'sqrt']}

parameters = {'C': np.arange(1, 15) * 100, 
              'gamma': ['auto', 0.0005, 0.001, 0.005, 0.01, 0.02],
              'epsilon': [0.005, 0.01, 0.02, 0.05, 0.1]
             }
bagging = BaggingRegressor(SVR(gamma='auto', epsilon=0.005, C=1300))
grid_clf = RandomizedSearchCV(clf, parameters, scoring='neg_mean_squared_error', cv=8, verbose=5, n_jobs=7)
grid_clf = grid_clf.fit(X, y)
print(np.sqrt(-grid_clf.best_score_))
print(grid_clf.best_params_)

Fitting 8 folds for each of 10 candidates, totalling 80 fits
[CV] gamma=0.02, epsilon=0.02, C=300 .................................
[CV] gamma=0.02, epsilon=0.02, C=300 .................................
[CV] gamma=0.02, epsilon=0.02, C=300 .................................
[CV] gamma=0.02, epsilon=0.02, C=300 .................................
[CV] gamma=0.02, epsilon=0.02, C=300 .................................
[CV] gamma=0.02, epsilon=0.02, C=300 .................................
[CV] gamma=0.02, epsilon=0.02, C=300 .................................
[CV]  gamma=0.02, epsilon=0.02, C=300, score=-13.20704887871073, total=   3.0s
[CV] gamma=0.02, epsilon=0.02, C=300 .................................
[CV]  gamma=0.02, epsilon=0.02, C=300, score=-16.239290257880857, total=   3.3s
[CV] gamma=0.001, epsilon=0.01, C=400 ................................
[CV]  gamma=0.02, epsilon=0.02, C=300, score=-13.670829048545729, total=   3.4s
[CV] gamma=0.001, epsilon=0.01, C=400 .......................

[Parallel(n_jobs=7)]: Done   4 tasks      | elapsed:    5.9s


[CV]  gamma=0.02, epsilon=0.02, C=300, score=-14.800133165181627, total=   3.7s
[CV] gamma=0.001, epsilon=0.01, C=400 ................................
[CV]  gamma=0.02, epsilon=0.02, C=300, score=-15.639428148166163, total=   3.7s
[CV] gamma=0.001, epsilon=0.01, C=400 ................................
[CV]  gamma=0.001, epsilon=0.01, C=400, score=-19.60524521874816, total=   3.0s
[CV] gamma=0.001, epsilon=0.01, C=400 ................................
[CV]  gamma=0.001, epsilon=0.01, C=400, score=-15.59059177498402, total=   2.9s
[CV] gamma=0.001, epsilon=0.01, C=400 ................................
[CV]  gamma=0.001, epsilon=0.01, C=400, score=-14.451124621185832, total=   2.7s
[CV] gamma=auto, epsilon=0.005, C=1300 ...............................
[CV]  gamma=0.02, epsilon=0.02, C=300, score=-13.281523952863187, total=   3.6s
[CV] gamma=auto, epsilon=0.005, C=1300 ...............................
[CV]  gamma=0.001, epsilon=0.01, C=400, score=-15.11218038361037, total=   2.8s
[CV] gamma=au

[Parallel(n_jobs=7)]: Done  58 tasks      | elapsed:   50.2s


[CV]  gamma=0.01, epsilon=0.05, C=1100, score=-14.000482409058787, total=   4.4s
[CV] gamma=0.02, epsilon=0.02, C=1000 ................................
[CV]  gamma=0.01, epsilon=0.05, C=1100, score=-13.713877725454427, total=   4.4s
[CV] gamma=0.02, epsilon=0.02, C=1000 ................................
[CV]  gamma=0.01, epsilon=0.05, C=1100, score=-14.982829453659711, total=   4.5s
[CV] gamma=0.02, epsilon=0.02, C=1000 ................................
[CV]  gamma=0.01, epsilon=0.05, C=1100, score=-14.5187764829874, total=   4.4s
[CV] gamma=0.02, epsilon=0.02, C=1000 ................................
[CV]  gamma=0.01, epsilon=0.05, C=1100, score=-15.59660513538507, total=   4.1s
[CV] gamma=0.02, epsilon=0.02, C=1000 ................................
[CV]  gamma=0.01, epsilon=0.05, C=1100, score=-13.379803793136361, total=   6.6s
[CV] gamma=0.02, epsilon=0.02, C=1000 ................................
[CV]  gamma=0.02, epsilon=0.02, C=1000, score=-17.643529726102294, total=   6.5s
[CV] gamma

[Parallel(n_jobs=7)]: Done  80 out of  80 | elapsed:  1.2min finished


3.6716953214348833
{'gamma': 'auto', 'epsilon': 0.005, 'C': 1300}


In [451]:
import math

def reduce(x):
    x = np.array([max(val, 0) for val in x])
    return np.array([min(val, 100) for val in x])

In [452]:
svr_classifier = SVR(gamma=0.001, epsilon=0.1, C=100)
svr_classifier = svr_classifier.fit(X, y)
gb_classifier = GradientBoostingRegressor(learning_rate=0.8, max_depth=4, max_features=None)
gb_classifier = gb_classifier.fit(X, y)

### Let's ensemble a bit

In [453]:
   

def aggregate_results(feature, deck):
    counter = Counter()

    for stat in stats_dict.values():
        if feature in stat:
            counter[stat['name']] = stat[feature]
    
    def total_val(deck):
        total = 0
        for card, count in deck.cards.items():
            total += count * counter[card]
        return total

    return np.array([total_val(deck)])



def predict(deck, player):
    players_features = [player == pl for pl in players]
    x_test = np.array(v.transform(deck.cards)[0])
    x_test = np.concatenate([np.array(players_features)])
    costs = aggregate_results('cost', deck)
    attack = aggregate_results('attack', deck)
    health = aggregate_results('health', deck)
    durability = aggregate_results('durability', deck)
    armor = aggregate_results('armor', deck)
    collectible_ratio = aggregate_results('collectible', deck)
    
    heros_arr = np.zeros((len(test_heros), ))

    for j, hero in enumerate(test_heros):
        heros_arr[j] = deck.hero == hero


    costs = np.reshape(costs, (len(costs), 1))
    attack = np.reshape(attack, (len(attack), 1))
    health = np.reshape(health, (len(health), 1))
    durability = np.reshape(durability, (len(durability), 1))
    armor = np.reshape(armor, (len(armor), 1))
    collectible_ratio = np.reshape(collectible_ratio, (len(collectible_ratio), 1))
    heros_arr = heros_arr.reshape(-1, 1)
    x_test = x_test.reshape(-1, 1)
    #print(costs.shape, heros_arr.shape, x_test.shape)
    X_pred = np.concatenate([x_test, costs, attack, health, 
                                          collectible_ratio, durability, armor, heros_arr])
    return reduce(gb_classifier.predict(X_pred.reshape(1, -1)))


In [454]:

def get_test_decks_list():
    test_decks = []

    with open(test_path, 'r') as decks:
        for line in decks.readlines():
            json_info = json.loads(line)
            test_decks.append(json_info['deckName'][0])
    return test_decks

test_decks_names = get_test_decks_list()

def dump_results():
    df_results = pd.DataFrame()
    for player in players:
        for deck_name in test_decks_names:
            deck = test_decks[deck_name]
            win_rate = predict(deck, player)[0]
            df_results = df_results.append({'player': player, 'deck_name': deck_name, 'win_rate': win_rate}, 
                                           ignore_index=True)

    print(df_results.head())
    df_results.to_csv('data/test_results.csv', index=False, header=False, columns=['player', 'deck_name', 'win_rate'],
                     sep=';')

dump_results()

    deck_name player   win_rate
0  deck244804     A1  19.730219
1  deck124802     A1  46.392929
2  deck687350     A1  34.475045
3  deck517728     A1  41.722489
4  deck130762     A1  34.685953


In [456]:
print(1)

1
