In [1]:
import json
from collections import Counter

class Deck:
    def __init__(self, info):
        self.name = info['deckName'][0]
        self.hero = info['hero'][0]
        self.cards = Counter()
        for name, [count] in info['cards'].items():
            self.cards[name] = count

training_path = 'data/trainingDecks.json'
test_path = 'data/testDecks.json'
            
def get_decks_dict(path):
    decks_dict = dict()    
    
    with open(path, 'r') as decks:
        for line in decks.readlines():
            json_info = json.loads(line)
            decks_dict[json_info['deckName'][0]] = Deck(json_info)
    return decks_dict

training_decks = get_decks_dict(training_path)
test_decks = get_decks_dict(test_path)

In [2]:
import pandas as pd
training_games = pd.read_csv('data/training_games.csv', delimiter=';', 
                             names=['id', 'bot1', 'deck1', 'bot2', 'deck2', 'result'])
training_games['result'] = training_games['result'] == 'PLAYER_1 WON'
training_games['result'] = training_games['result'].astype(int)
print(training_games.head())
print(len(training_games))

         id bot1       deck1 bot2       deck2  result
0  100001.0   A1  deck113225   A1  deck731599       0
1  100002.0   A1  deck694943   A1  deck929572       1
2  100003.0   A1  deck182567   A1  deck525929       0
3  100004.0   A1  deck219364   A1  deck757429       1
4  100005.0   A1  deck826229   A1  deck337123       1
299680


In [3]:
heros = {deck.hero for deck in training_decks.values()}
print(heros)

{'Mage', 'Shaman', 'Warlock', 'Druid', 'Priest', 'Paladin', 'Rogue', 'Hunter', 'Warrior'}


In [15]:
train_df = training_games.get(['bot1', 'deck1', 'result']).rename(index=str, columns={'bot1': 'player', 'deck1': 'deck'})
train_df['result'] = 1 - train_df['result']
train_df = train_df.append([training_games.get(['bot2', 'deck2', 'result']).rename(index=str, columns={
    'bot2': 'player', 'deck2': 'deck'})])
train_df = train_df.groupby(['player', 'deck'])['result'].mean()
train_df = train_df.reset_index()
print(train_df.head())

  player        deck    result
0     A1  deck100087  0.291667
1     A1  deck101443  0.276627
2     A1  deck102280  0.550613
3     A1  deck104259  0.404459
4     A1  deck105300  0.397929


In [322]:
from sklearn.feature_extraction import DictVectorizer
v = DictVectorizer(sparse=False)
D = [deck.cards for deck in training_decks.values()]
X = v.fit_transform(D)



In [323]:
import numpy as np

players = ['A1', 'A2', 'B1', 'B2']

players_features = [(train_df['player'] == pl).as_matrix() for pl in players]

x = np.array([v.transform(training_decks[deck_name].cards)[0] for deck_name in train_df['deck']])

In [324]:
players_x = np.stack(players_features).T
X = np.concatenate([x, players_x], axis=1)
y = train_df['result'].as_matrix()

In [325]:
print(X.shape)
print(y.shape)

(1600, 334)
(1600,)


**shufflujemy dataset**

In [326]:
import random
indices = random.sample(range(len(y)), len(y))
X = X[indices]
y = y[indices]

In [327]:
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [328]:
print(X_train.shape, y_train.shape)
print(X_train, y_train)

(1280, 334) (1280,)
[[0. 0. 0. ... 0. 1. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 2. ... 0. 0. 1.]
 [0. 0. 0. ... 1. 0. 0.]
 [0. 2. 0. ... 0. 0. 0.]] [0.60206718 0.64046579 0.29464286 ... 0.29230769 0.47859922 0.60349854]


In [329]:
from sklearn.linear_model import LinearRegression, Ridge, LogisticRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR, LinearSVR
#logit = LinearSVR(C=0.02)
logit = Ridge()
logit = logit.fit(X_train, y_train)

In [330]:
import math

def reduce(x):
    x = np.array([max(val, 0) for val in x])
    return np.array([min(val, 100) for val in x])

In [331]:
print(X_val.shape, y_val.shape)

(320, 334) (320,)


In [332]:
from sklearn.metrics import mean_squared_error
y_pred = reduce(logit.predict(X_val) * 100)
print(np.sqrt(mean_squared_error(y_val * 100, y_pred)))

4.8608002828731784


In [333]:
print(y_pred)
print(y_val * 100)

[43.35660034 57.79756569 47.5376503  50.85760019 47.25015582 36.93754096
 38.84913823 40.77585944 43.62689715 53.59780575 44.02455764 23.54372743
 31.91962448 56.55714867 26.59351344 68.14303652 52.80241169 46.62457448
 54.05256683 43.27574898 66.05754856 39.39199878 49.21857557 36.87423435
 48.89119867 60.2898551  61.47515969 45.46573744 44.56227187 39.47004384
 38.92042381 56.14942734 48.08464296 71.1908336  23.09330567 60.57301231
 51.29750006 56.49639987 32.1532341  48.06488493 55.29004048 64.91488554
 38.06721028 57.31902118 61.43881056 59.4449072  64.00207268 51.04512104
 44.67878089 59.2872     49.17104625 69.9163602  43.84557869 38.30268639
 61.10039238 53.83242504 77.43739572 40.49983134 59.41576123 62.14130113
 35.76194151 42.59351503 69.82280142 59.88306931 42.14188884 66.32362736
 68.51754258 33.25420752 47.77209015 56.91702257 61.6287305  74.74146404
 78.54030128 77.2559925  71.4521599  69.30582243 47.32310854 54.59584103
 62.42656833 64.46986148 49.69731765 48.088082   46

In [216]:
final_classifier = Ridge()
final_classifier = final_classifier.fit(X, y)

In [217]:
def predict(deck, player):
    players_features = [player == pl for pl in players]
    x_test = np.array(v.transform(deck.cards)[0])
    x_test = np.concatenate([x_test, np.array(players_features)])
    return reduce(final_classifier.predict([x_test])) * 100


In [218]:

def get_test_decks_list():
    test_decks = []

    with open(test_path, 'r') as decks:
        for line in decks.readlines():
            json_info = json.loads(line)
            test_decks.append(json_info['deckName'][0])
    return test_decks

test_decks_names = get_test_decks_list()

def dump_results():
    df_results = pd.DataFrame()
    for player in players:
        for deck_name in test_decks_names:
            deck = test_decks[deck_name]
            win_rate = predict(deck, player)[0]
            df_results = df_results.append({'player': player, 'deck_name': deck_name, 'win_rate': win_rate}, 
                                           ignore_index=True)

    print(df_results.head())
    df_results.to_csv('data/test_results.csv', index=False, header=False, columns=['player', 'deck_name', 'win_rate'],
                     sep=';')

dump_results()

    deck_name player   win_rate
0  deck244804     A1  18.392592
1  deck124802     A1  40.175071
2  deck687350     A1  44.581154
3  deck517728     A1  49.120459
4  deck130762     A1  42.915430
